From dc750acf079f4041483d7910a9d8cf5ddf91e93f Mon Sep 17 00:00:00 2001 From: "J. King" Date: Tue, 27 Aug 2019 11:08:13 -0400 Subject: [PATCH] Handle ports, paths, and credentials correctly --- lib/Misc/URL.php | 69 ++++++++++++++++++++++++++++-------- tests/cases/Misc/TestURL.php | 46 ++++++++++++++++++++---- 2 files changed, 93 insertions(+), 22 deletions(-) diff --git a/lib/Misc/URL.php b/lib/Misc/URL.php index e32b648e..c8589b03 100644 --- a/lib/Misc/URL.php +++ b/lib/Misc/URL.php @@ -6,7 +6,37 @@ declare(strict_types=1); namespace JKingWeb\Arsse\Misc; +/** + * A collection of functions for manipulating URLs + */ class URL { + /** User component */ + const P_USER = 1; + /** Password component */ + const P_PASS = 2; + /** Path segment component */ + const P_PATH = 3; + /** Full query component */ + const P_QUERY = 4; + + /** Normalizes an absolute URL + * + * Normalizations performed are: + * + * - Lowercasing scheme + * - Lowercasing host names + * - IDN normalization (IDN rather than punycode is returned) + * - IPv6 address normalization + * - Resolution of relative path segments + * - Discarding empty path segments + * - Discarding empty queries + * - %-encoding normalization + * - Fragment discarding + * + * @param string $url The URL to normalize. Relative URLs are returned unchanged + * @param string $u Username to add to the URL, replacing any existing credentials + * @param string $p Password to add to the URL, if a username is specified + */ public static function normalize(string $url, string $u = null, string $p = null): string { extract(parse_url($url)); if (!isset($scheme) || !isset($host) || !strlen($host)) { @@ -14,15 +44,15 @@ class URL { } $out = strtolower($scheme)."://"; if (strlen($u ?? "")) { - $out .= self::normalizePart($u, self::P_USER, false); + $out .= self::normalizePart(rawurlencode($u), self::P_USER, false); if (strlen($p ?? "")) { - $out .= ":".self::normalizePart($p, self::P_PASS, false); + $out .= ":".self::normalizePart(rawurlencode($p), self::P_PASS, false); } $out .= "@"; - } elseif (strlen($username ?? "")) { - $out .= self::normalizePart($username, self::P_USER); - if (strlen($password ?? "")) { - $out .= ":".self::normalizePart($username, self::P_PASS); + } elseif (strlen($user ?? "")) { + $out .= self::normalizePart($user, self::P_USER); + if (strlen($pass ?? "")) { + $out .= ":".self::normalizePart($pass, self::P_PASS); } $out .= "@"; } @@ -31,11 +61,8 @@ class URL { } else { $out .= self::normalizeHost($host); } - if (isset($path)) { - $out .= self::normalizePath($path); - } else { - $out .= "/"; - } + $out .= isset($port) ? ":$port" : ""; + $out .= self::normalizePath($path ?? ""); if (isset($query) && strlen($query)) { $out .= "?".self::normalizePart($query, self::P_QUERY); } @@ -57,10 +84,22 @@ class URL { return $addr; } + /** Normalizes the whole path segment to remove empty segments and relative segments */ protected static function normalizePath(string $path): string { - // stub - return $path; + $parts = explode("/", $path); + $out = []; + foreach($parts as $p) { + switch ($p) { + case "": + case ".": + break; + case "..": + array_pop($out); + break; + default: + $out[] = self::normalizePart($p, self::P_PATH); + } + } + return str_replace("//", "/", "/".implode("/", $out).(substr($path, -1) === "/" ? "/" : "")); } - - } diff --git a/tests/cases/Misc/TestURL.php b/tests/cases/Misc/TestURL.php index 0aa86a90..b6c95b26 100644 --- a/tests/cases/Misc/TestURL.php +++ b/tests/cases/Misc/TestURL.php @@ -15,17 +15,49 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest { } /** @dataProvider provideNormalizations */ - public function testNormalizeAUrl(string $in, string $exp) { - $this->assertSame($exp, URL::normalize($in)); + public function testNormalizeAUrl(string $url, string $exp, string $user = null, string $pass = null) { + $this->assertSame($exp, URL::normalize($url, $user, $pass)); } public function provideNormalizations() { return [ - ["/", "/"], - ["//example.com/", "//example.com/"], - ["http://example.com/", "http://example.com/"], - ["http://[::1]/", "http://[::1]/"], - ["HTTP://example.com/", "http://example.com/"], + ["/", "/"], + ["//example.com/", "//example.com/"], + ["http://[::1]/", "http://[::1]/"], + ["http://example.com/", "http://example.com/"], + ["HTTP://example.com/", "http://example.com/"], + ["http://example.com", "http://example.com/"], + ["http://example.com:/", "http://example.com/"], + ["HTTP://example.com:80/", "http://example.com:80/"], + ["HTTP://example.com:80", "http://example.com:80/"], + ["http://example.com/?", "http://example.com/"], + ["http://example.com?", "http://example.com/"], + ["http://example.com/#fragment", "http://example.com/"], + ["http://example.com#fragment", "http://example.com/"], + ["http://example.com?#", "http://example.com/"], + ["http://example.com/?key=value", "http://example.com/?key=value"], + ["http://example.com/", "http://user:pass@example.com/", "user", "pass"], + ["http://example.com/", "http://user@example.com/", "user"], + ["http://user:pass@example.com/", "http://user:pass@example.com/"], + ["http://user@example.com/", "http://user@example.com/"], + ["http://user:pass@example.com/", "http://u:p@example.com/", "u", "p"], + ["http://user:pass@example.com/", "http://u@example.com/", "u"], + ["http://user:pass@example.com/", "http://user:pass@example.com/", "", "p"], + ["http://example.com/", "http://example.com/", "", "p"], + ["http://example.com/path", "http://example.com/path"], + ["http://example.com/path/", "http://example.com/path/"], + ["http://example.com/path/.", "http://example.com/path"], + ["http://example.com/path/./", "http://example.com/path/"], + ["http://example.com/path/..", "http://example.com/"], + ["http://example.com/path/../", "http://example.com/"], + ["http://example.com/a/b/..", "http://example.com/a"], + ["http://example.com/a/b/../", "http://example.com/a/"], + ["http://example.com/../", "http://example.com/"], + ["http://example.com////", "http://example.com/"], + ["http://example.com/a/./b/", "http://example.com/a/b/"], + ["http://example.com/a/../b/", "http://example.com/b/"], + ["http://example.com/.a/", "http://example.com/.a/"], + ["http://example.com/..a/", "http://example.com/..a/"], ]; } }