mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 21:22:40 +00:00
Handle IDNs
While IPv6 address normalization was originally planned, this was deemed too much effort to bother with such a niche feature; IPv6 addresses are instead passed through unmodified
This commit is contained in:
parent
dc750acf07
commit
d4802bcdb6
2 changed files with 10 additions and 15 deletions
|
@ -24,15 +24,16 @@ class URL {
|
||||||
* Normalizations performed are:
|
* Normalizations performed are:
|
||||||
*
|
*
|
||||||
* - Lowercasing scheme
|
* - Lowercasing scheme
|
||||||
* - Lowercasing host names
|
* - Lowercasing ASCII host names
|
||||||
* - IDN normalization (IDN rather than punycode is returned)
|
* - IDN normalization
|
||||||
* - IPv6 address normalization
|
|
||||||
* - Resolution of relative path segments
|
* - Resolution of relative path segments
|
||||||
* - Discarding empty path segments
|
* - Discarding empty path segments
|
||||||
* - Discarding empty queries
|
* - Discarding empty queries
|
||||||
* - %-encoding normalization
|
* - %-encoding normalization
|
||||||
* - Fragment discarding
|
* - Fragment discarding
|
||||||
*
|
*
|
||||||
|
* It does NOT perform IPv6 address normalization, nor does it drop trailing slashes from paths
|
||||||
|
*
|
||||||
* @param string $url The URL to normalize. Relative URLs are returned unchanged
|
* @param string $url The URL to normalize. Relative URLs are returned unchanged
|
||||||
* @param string $u Username to add to the URL, replacing any existing credentials
|
* @param string $u Username to add to the URL, replacing any existing credentials
|
||||||
* @param string $p Password to add to the URL, if a username is specified
|
* @param string $p Password to add to the URL, if a username is specified
|
||||||
|
@ -56,11 +57,7 @@ class URL {
|
||||||
}
|
}
|
||||||
$out .= "@";
|
$out .= "@";
|
||||||
}
|
}
|
||||||
if ($host[0] === "[") {
|
|
||||||
$out .= self::normalizeIPv6($host);
|
|
||||||
} else {
|
|
||||||
$out .= self::normalizeHost($host);
|
$out .= self::normalizeHost($host);
|
||||||
}
|
|
||||||
$out .= isset($port) ? ":$port" : "";
|
$out .= isset($port) ? ":$port" : "";
|
||||||
$out .= self::normalizePath($path ?? "");
|
$out .= self::normalizePath($path ?? "");
|
||||||
if (isset($query) && strlen($query)) {
|
if (isset($query) && strlen($query)) {
|
||||||
|
@ -75,13 +72,8 @@ class URL {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static function normalizeHost(string $host): string {
|
protected static function normalizeHost(string $host): string {
|
||||||
// stub
|
$idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
|
||||||
return $host;
|
return $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host;
|
||||||
}
|
|
||||||
|
|
||||||
protected static function normalizeIPv6(string $addr): string {
|
|
||||||
// stub
|
|
||||||
return $addr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Normalizes the whole path segment to remove empty segments and relative segments */
|
/** Normalizes the whole path segment to remove empty segments and relative segments */
|
||||||
|
|
|
@ -58,6 +58,9 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest {
|
||||||
["http://example.com/a/../b/", "http://example.com/b/"],
|
["http://example.com/a/../b/", "http://example.com/b/"],
|
||||||
["http://example.com/.a/", "http://example.com/.a/"],
|
["http://example.com/.a/", "http://example.com/.a/"],
|
||||||
["http://example.com/..a/", "http://example.com/..a/"],
|
["http://example.com/..a/", "http://example.com/..a/"],
|
||||||
|
["http://日本.example.com/", "http://日本.example.com/"],
|
||||||
|
["http://EXAMPLE.COM/", "http://example.com/"],
|
||||||
|
["http://É.example.com/", "http://é.example.com/"],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue