<?php
/** @license MIT
 * Copyright 2017 J. King, Dustin Wilson et al.
 * See LICENSE and AUTHORS files for details */

declare(strict_types=1);
namespace JKingWeb\Arsse\Misc;

/**
 * A collection of functions for manipulating URLs
 */
class URL {

    /** Returns whether a URL is absolute i.e. has a scheme */
    public static function absolute(string $url): bool {
        return (bool) strlen((string) parse_url($url, \PHP_URL_SCHEME));
    }

    /** Normalizes a URL
     *
     * Normalizations performed are:
     *
     * - Lowercasing scheme
     * - Lowercasing ASCII host names
     * - IDN normalization
     * - IPv6 address normalization
     * - Resolution of relative path segments
     * - Discarding empty path segments
     * - Discarding empty queries
     * - Generic percent-encoding normalization
     * - Fragment discarding
     *
     * It does NOT drop trailing slashes from paths, nor does it perform Unicode normalization or context-aware percent-encoding normalization
     *
     * @param string $url The URL to normalize
     * @param string $u Username to add to the URL, replacing any existing credentials
     * @param string $p Password to add to the URL, if a username is specified
     */
    public static function normalize(string $url, string $u = null, string $p = null): string {
        extract(parse_url($url));
        $out = "";
        if (isset($scheme)) {
            $out .= strtolower($scheme).":";
        }
        if (isset($host)) {
            $out .= "//";
            if (strlen($u ?? "")) {
                $out .= self::normalizeEncoding(rawurlencode($u));
                if (strlen($p ?? "")) {
                    $out .= ":".self::normalizeEncoding(rawurlencode($p));
                }
                $out .= "@";
            } elseif (strlen($user ?? "")) {
                $out .= self::normalizeEncoding($user);
                if (strlen($pass ?? "")) {
                    $out .= ":".self::normalizeEncoding($pass);
                }
                $out .= "@";
            }
            $out .= self::normalizeHost($host);
            $out .= isset($port) ? ":$port" : "";
        }
        $out .= self::normalizePath($path ?? "", isset($host));
        if (isset($query) && strlen($query)) {
            $out .= "?".self::normalizeEncoding($query);
        }
        return $out;
    }

    /** Perform percent-encoding normalization for a given URL component */
    protected static function normalizeEncoding(string $part): string {
        $pos = 0;
        $end = strlen($part);
        $out = "";
        // process each character in sequence
        while ($pos < $end) {
            $c = $part[$pos];
            if ($c === "%") {
                // the % character signals an encoded character...
                $d = substr($part, $pos + 1, 2);
                if (!preg_match("/^[0-9a-fA-F]{2}$/", $d)) {
                    // unless there are fewer than two characters left in the string or the two characters are not hex digits
                    $d = ord($c);
                } else {
                    $d = hexdec($d);
                    $pos += 2;
                }
            } else {
                $d = ord($c);
            }
            $dc = chr($d);
            if ($d < 0x21 || $d > 0x7E || $d == 0x25) {
                // these characters are always encoded
                $out .= "%".strtoupper(dechex($d));
            } elseif (preg_match("/[a-zA-Z0-9\._~-]/", $dc)) {
                // these characters are never encoded
                $out .= $dc;
            } else {
                // these characters are passed through as-is
                if ($c === "%") {
                    $out .= "%".strtoupper(dechex($d));
                } else {
                    $out .= $c;
                }
            }
            $pos++;
        }
        return $out;
    }

    /** Normalizes a hostname per IDNA:2008 */
    protected static function normalizeHost(string $host): string {
        if ($host[0] === "[" && substr($host, -1) === "]") {
            // normalize IPv6 addresses
            $addr = @inet_pton(substr($host, 1, strlen($host) - 2));
            if ($addr !== false) {
                return "[".inet_ntop($addr)."]";
            }
        }
        $idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
        return $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host;
    }

    /** Normalizes the whole path segment to remove empty segments and relative segments */
    protected static function normalizePath(string $path, bool $hasHost): string {
        $parts = explode("/", self::normalizeEncoding($path));
        $absolute = ($hasHost || $path[0] === "/");
        $index = (substr($path, -1) === "/");
        $out = [];
        foreach ($parts as $p) {
            switch ($p) {
                case "":
                case ".":
                    break;
                case "..":
                    array_pop($out);
                    break;
                default:
                    $out[] = $p;
            }
        }
        $out = implode("/", $out);
        $out = ($absolute ? "/" : "").$out.($index ? "/" : "");
        return str_replace("//", "/", $out);
    }

    /** Appends data to a URL's query component
     *
     * @param string $url The input URL
     * @param string $data The data to append. This should already be escaped where necessary and not start with any delimiter
     * @param string $glue The query subcomponent delimiter, usually "&". If the URL has no query, "?" will be prepended instead
     */
    public static function queryAppend(string $url, string $data, string $glue = "&"): string {
        if (!strlen($data)) {
            return $url;
        }
        $insPos = strpos($url, "#");
        $insPos = $insPos === false ? strlen($url) : $insPos;
        $qPos = strpos($url, "?");
        $hasQuery = $qPos !== false;
        $glue = $hasQuery ? $glue : "?";
        if ($hasQuery && $insPos > 0) {
            if ($url[$insPos - 1] === $glue || ($insPos - 1) == $qPos) {
                // if the URL already has excess glue, use it
                $glue = "";
            }
        }
        return substr($url, 0, $insPos).$glue.$data.substr($url, $insPos);
    }
}