mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2025-01-08 17:02:41 +00:00
Percent-encoding and IPv6 normalization
This commit is contained in:
parent
12fe786a2f
commit
bd71ddb929
2 changed files with 59 additions and 19 deletions
|
@ -10,14 +10,6 @@ namespace JKingWeb\Arsse\Misc;
|
||||||
* A collection of functions for manipulating URLs
|
* A collection of functions for manipulating URLs
|
||||||
*/
|
*/
|
||||||
class URL {
|
class URL {
|
||||||
/** User component */
|
|
||||||
const P_USER = 1;
|
|
||||||
/** Password component */
|
|
||||||
const P_PASS = 2;
|
|
||||||
/** Path segment component */
|
|
||||||
const P_PATH = 3;
|
|
||||||
/** Full query component */
|
|
||||||
const P_QUERY = 4;
|
|
||||||
|
|
||||||
/** Normalizes an absolute URL
|
/** Normalizes an absolute URL
|
||||||
*
|
*
|
||||||
|
@ -45,15 +37,15 @@ class URL {
|
||||||
}
|
}
|
||||||
$out = strtolower($scheme)."://";
|
$out = strtolower($scheme)."://";
|
||||||
if (strlen($u ?? "")) {
|
if (strlen($u ?? "")) {
|
||||||
$out .= self::normalizePart(rawurlencode($u), self::P_USER);
|
$out .= self::normalizeEncoding(rawurlencode($u));
|
||||||
if (strlen($p ?? "")) {
|
if (strlen($p ?? "")) {
|
||||||
$out .= ":".self::normalizePart(rawurlencode($p), self::P_PASS);
|
$out .= ":".self::normalizeEncoding(rawurlencode($p));
|
||||||
}
|
}
|
||||||
$out .= "@";
|
$out .= "@";
|
||||||
} elseif (strlen($user ?? "")) {
|
} elseif (strlen($user ?? "")) {
|
||||||
$out .= self::normalizePart($user, self::P_USER);
|
$out .= self::normalizeEncoding($user);
|
||||||
if (strlen($pass ?? "")) {
|
if (strlen($pass ?? "")) {
|
||||||
$out .= ":".self::normalizePart($pass, self::P_PASS);
|
$out .= ":".self::normalizeEncoding($pass);
|
||||||
}
|
}
|
||||||
$out .= "@";
|
$out .= "@";
|
||||||
}
|
}
|
||||||
|
@ -61,26 +53,68 @@ class URL {
|
||||||
$out .= isset($port) ? ":$port" : "";
|
$out .= isset($port) ? ":$port" : "";
|
||||||
$out .= self::normalizePath($path ?? "");
|
$out .= self::normalizePath($path ?? "");
|
||||||
if (isset($query) && strlen($query)) {
|
if (isset($query) && strlen($query)) {
|
||||||
$out .= "?".self::normalizePart($query, self::P_QUERY);
|
$out .= "?".self::normalizeEncoding($query);
|
||||||
}
|
}
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Perform percent-encoding normalization for a given URL component */
|
/** Perform percent-encoding normalization for a given URL component */
|
||||||
protected static function normalizePart(string $part, int $type): string {
|
protected static function normalizeEncoding(string $part): string {
|
||||||
// stub
|
$pos = 0;
|
||||||
return $part;
|
$end = strlen($part);
|
||||||
|
$out = "";
|
||||||
|
// process each character in sequence
|
||||||
|
while ($pos < $end) {
|
||||||
|
$c = $part[$pos];
|
||||||
|
if ($c === "%") {
|
||||||
|
// the % character signals an encoded character...
|
||||||
|
$d = substr($part, $pos+1, 2);
|
||||||
|
if (!preg_match("/^[0-9a-fA-F]{2}$/", $d)) {
|
||||||
|
// unless there are fewer than two characters left in the string or the two characters are not hex digits
|
||||||
|
$d = ord($c);
|
||||||
|
} else {
|
||||||
|
$d = hexdec($d);
|
||||||
|
$pos += 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$d = ord($c);
|
||||||
|
}
|
||||||
|
$dc = chr($d);
|
||||||
|
if ($d < 0x21 || $d > 0x7E || $d == 0x25) {
|
||||||
|
// these characters are always encoded
|
||||||
|
$out .= "%".strtoupper(dechex($d));
|
||||||
|
} elseif (preg_match("/[a-zA-Z0-9\._~-]/", $dc)) {
|
||||||
|
// these characters are never encoded
|
||||||
|
$out .= $dc;
|
||||||
|
} else {
|
||||||
|
// these characters are passed through as-is
|
||||||
|
if ($c === "%") {
|
||||||
|
$out .= "%".strtoupper(dechex($d));
|
||||||
|
} else {
|
||||||
|
$out .= $c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$pos++;
|
||||||
|
}
|
||||||
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Normalizes a hostname per IDNA:2008 */
|
/** Normalizes a hostname per IDNA:2008 */
|
||||||
protected static function normalizeHost(string $host): string {
|
protected static function normalizeHost(string $host): string {
|
||||||
|
if ($host[0] === "[" && substr($host, -1) === "]") {
|
||||||
|
// normalize IPv6 addresses
|
||||||
|
$addr = @inet_pton(substr($host, 1, strlen($host) - 2));
|
||||||
|
if ($addr !== false) {
|
||||||
|
return "[".inet_ntop($addr)."]";
|
||||||
|
}
|
||||||
|
}
|
||||||
$idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
|
$idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
|
||||||
return $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host;
|
return $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Normalizes the whole path segment to remove empty segments and relative segments */
|
/** Normalizes the whole path segment to remove empty segments and relative segments */
|
||||||
protected static function normalizePath(string $path): string {
|
protected static function normalizePath(string $path): string {
|
||||||
$parts = explode("/", $path);
|
$parts = explode("/", self::normalizeEncoding($path));
|
||||||
$out = [];
|
$out = [];
|
||||||
foreach($parts as $p) {
|
foreach($parts as $p) {
|
||||||
switch ($p) {
|
switch ($p) {
|
||||||
|
@ -91,7 +125,7 @@ class URL {
|
||||||
array_pop($out);
|
array_pop($out);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
$out[] = self::normalizePart($p, self::P_PATH);
|
$out[] = $p;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return str_replace("//", "/", "/".implode("/", $out).(substr($path, -1) === "/" ? "/" : ""));
|
return str_replace("//", "/", "/".implode("/", $out).(substr($path, -1) === "/" ? "/" : ""));
|
||||||
|
|
|
@ -23,7 +23,6 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest {
|
||||||
return [
|
return [
|
||||||
["/", "/"],
|
["/", "/"],
|
||||||
["//example.com/", "//example.com/"],
|
["//example.com/", "//example.com/"],
|
||||||
["http://[::1]/", "http://[::1]/"],
|
|
||||||
["http://example.com/", "http://example.com/"],
|
["http://example.com/", "http://example.com/"],
|
||||||
["HTTP://example.com/", "http://example.com/"],
|
["HTTP://example.com/", "http://example.com/"],
|
||||||
["http://example.com", "http://example.com/"],
|
["http://example.com", "http://example.com/"],
|
||||||
|
@ -61,6 +60,13 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest {
|
||||||
["http://日本.example.com/", "http://日本.example.com/"],
|
["http://日本.example.com/", "http://日本.example.com/"],
|
||||||
["http://EXAMPLE.COM/", "http://example.com/"],
|
["http://EXAMPLE.COM/", "http://example.com/"],
|
||||||
["http://É.example.com/", "http://é.example.com/"],
|
["http://É.example.com/", "http://é.example.com/"],
|
||||||
|
["http://[::1]/", "http://[::1]/"],
|
||||||
|
["http://[0::1]/", "http://[::1]/"],
|
||||||
|
["http://[Z]/", "http://[z]/"],
|
||||||
|
["http://example.com/ ?%61=%3d", "http://example.com/%20?a=%3D"],
|
||||||
|
["http://example.com/%", "http://example.com/%25"],
|
||||||
|
["http://example.com/%a", "http://example.com/%25a"],
|
||||||
|
["http://example.com/%za", "http://example.com/%25za"],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue