2019-08-27 02:13:30 +00:00
|
|
|
<?php
|
|
|
|
/** @license MIT
|
|
|
|
* Copyright 2017 J. King, Dustin Wilson et al.
|
|
|
|
* See LICENSE and AUTHORS files for details */
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace JKingWeb\Arsse\Misc;
|
|
|
|
|
2019-08-27 15:08:13 +00:00
|
|
|
/**
|
|
|
|
* A collection of functions for manipulating URLs
|
|
|
|
*/
|
2019-08-27 02:13:30 +00:00
|
|
|
class URL {
|
2019-08-27 15:08:13 +00:00
|
|
|
/** User component */
|
|
|
|
const P_USER = 1;
|
|
|
|
/** Password component */
|
|
|
|
const P_PASS = 2;
|
|
|
|
/** Path segment component */
|
|
|
|
const P_PATH = 3;
|
|
|
|
/** Full query component */
|
|
|
|
const P_QUERY = 4;
|
|
|
|
|
|
|
|
/** Normalizes an absolute URL
|
|
|
|
*
|
|
|
|
* Normalizations performed are:
|
|
|
|
*
|
|
|
|
* - Lowercasing scheme
|
2019-08-27 19:18:02 +00:00
|
|
|
* - Lowercasing ASCII host names
|
|
|
|
* - IDN normalization
|
2019-08-27 15:08:13 +00:00
|
|
|
* - Resolution of relative path segments
|
|
|
|
* - Discarding empty path segments
|
|
|
|
* - Discarding empty queries
|
|
|
|
* - %-encoding normalization
|
|
|
|
* - Fragment discarding
|
|
|
|
*
|
2019-08-27 19:18:02 +00:00
|
|
|
* It does NOT perform IPv6 address normalization, nor does it drop trailing slashes from paths
|
|
|
|
*
|
2019-08-27 15:08:13 +00:00
|
|
|
* @param string $url The URL to normalize. Relative URLs are returned unchanged
|
|
|
|
* @param string $u Username to add to the URL, replacing any existing credentials
|
|
|
|
* @param string $p Password to add to the URL, if a username is specified
|
|
|
|
*/
|
2019-08-27 02:13:30 +00:00
|
|
|
public static function normalize(string $url, string $u = null, string $p = null): string {
|
|
|
|
extract(parse_url($url));
|
|
|
|
if (!isset($scheme) || !isset($host) || !strlen($host)) {
|
|
|
|
return $url;
|
|
|
|
}
|
|
|
|
$out = strtolower($scheme)."://";
|
|
|
|
if (strlen($u ?? "")) {
|
2019-08-29 16:28:23 +00:00
|
|
|
$out .= self::normalizePart(rawurlencode($u), self::P_USER);
|
2019-08-27 02:13:30 +00:00
|
|
|
if (strlen($p ?? "")) {
|
2019-08-29 16:28:23 +00:00
|
|
|
$out .= ":".self::normalizePart(rawurlencode($p), self::P_PASS);
|
2019-08-27 02:13:30 +00:00
|
|
|
}
|
|
|
|
$out .= "@";
|
2019-08-27 15:08:13 +00:00
|
|
|
} elseif (strlen($user ?? "")) {
|
|
|
|
$out .= self::normalizePart($user, self::P_USER);
|
|
|
|
if (strlen($pass ?? "")) {
|
|
|
|
$out .= ":".self::normalizePart($pass, self::P_PASS);
|
2019-08-27 02:13:30 +00:00
|
|
|
}
|
|
|
|
$out .= "@";
|
|
|
|
}
|
2019-08-27 19:18:02 +00:00
|
|
|
$out .= self::normalizeHost($host);
|
2019-08-27 15:08:13 +00:00
|
|
|
$out .= isset($port) ? ":$port" : "";
|
|
|
|
$out .= self::normalizePath($path ?? "");
|
2019-08-27 02:13:30 +00:00
|
|
|
if (isset($query) && strlen($query)) {
|
|
|
|
$out .= "?".self::normalizePart($query, self::P_QUERY);
|
|
|
|
}
|
|
|
|
return $out;
|
|
|
|
}
|
|
|
|
|
2019-08-29 16:28:23 +00:00
|
|
|
/** Perform percent-encoding normalization for a given URL component */
|
|
|
|
protected static function normalizePart(string $part, int $type): string {
|
2019-08-27 02:13:30 +00:00
|
|
|
// stub
|
|
|
|
return $part;
|
|
|
|
}
|
|
|
|
|
2019-08-29 16:28:23 +00:00
|
|
|
/** Normalizes a hostname per IDNA:2008 */
|
2019-08-27 02:13:30 +00:00
|
|
|
protected static function normalizeHost(string $host): string {
|
2019-08-27 19:18:02 +00:00
|
|
|
$idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
|
|
|
|
return $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host;
|
2019-08-27 02:13:30 +00:00
|
|
|
}
|
|
|
|
|
2019-08-27 15:08:13 +00:00
|
|
|
/** Normalizes the whole path segment to remove empty segments and relative segments */
|
2019-08-27 02:13:30 +00:00
|
|
|
protected static function normalizePath(string $path): string {
|
2019-08-27 15:08:13 +00:00
|
|
|
$parts = explode("/", $path);
|
|
|
|
$out = [];
|
|
|
|
foreach($parts as $p) {
|
|
|
|
switch ($p) {
|
|
|
|
case "":
|
|
|
|
case ".":
|
|
|
|
break;
|
|
|
|
case "..":
|
|
|
|
array_pop($out);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
$out[] = self::normalizePart($p, self::P_PATH);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return str_replace("//", "/", "/".implode("/", $out).(substr($path, -1) === "/" ? "/" : ""));
|
2019-08-27 02:13:30 +00:00
|
|
|
}
|
|
|
|
}
|