mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2025-01-25 12:30:34 +00:00
367 lines
16 KiB
PHP
367 lines
16 KiB
PHP
<?php
|
|
/** @license MIT
|
|
* Copyright 2017 J. King, Dustin Wilson et al.
|
|
* See LICENSE and AUTHORS files for details */
|
|
|
|
declare(strict_types=1);
|
|
namespace JKingWeb\Arsse\REST\TinyTinyRSS;
|
|
|
|
use JKingWeb\Arsse\Context\Context;
|
|
use JKingWeb\Arsse\Misc\Date;
|
|
|
|
class Search {
|
|
const STATE_BEFORE_TOKEN = 0;
|
|
const STATE_BEFORE_TOKEN_QUOTED = 1;
|
|
const STATE_IN_DATE = 2;
|
|
const STATE_IN_DATE_QUOTED = 3;
|
|
const STATE_IN_TOKEN_OR_TAG = 4;
|
|
const STATE_IN_TOKEN_OR_TAG_QUOTED = 5;
|
|
const STATE_IN_TOKEN = 6;
|
|
const STATE_IN_TOKEN_QUOTED = 7;
|
|
|
|
const FIELDS_BOOLEAN = [
|
|
"unread" => "unread",
|
|
"star" => "starred",
|
|
"note" => "annotated",
|
|
"pub" => "published", // TODO: not implemented
|
|
];
|
|
const FIELDS_TEXT = [
|
|
"title" => "titleTerms",
|
|
"author" => "authorTerms",
|
|
"note" => "annotationTerms",
|
|
"" => "searchTerms",
|
|
];
|
|
|
|
public static function parse(string $search, Context $context = null) {
|
|
// normalize the input
|
|
$search = strtolower(trim(preg_replace("<\s+>", " ", $search)));
|
|
// set initial state
|
|
$tokens = [];
|
|
$pos = -1;
|
|
$stop = strlen($search);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$buffer = "";
|
|
$tag = "";
|
|
$flag_negative = false;
|
|
$context = $context ?? new Context;
|
|
// process
|
|
try {
|
|
while (++$pos <= $stop) {
|
|
$char = @$search[$pos];
|
|
switch ($state) {
|
|
case self::STATE_BEFORE_TOKEN:
|
|
switch ($char) {
|
|
case "":
|
|
continue 3;
|
|
case " ":
|
|
continue 3;
|
|
case '"':
|
|
if ($flag_negative) {
|
|
$buffer .= $char;
|
|
$state = self::STATE_IN_TOKEN_OR_TAG;
|
|
} else {
|
|
$state = self::STATE_BEFORE_TOKEN_QUOTED;
|
|
}
|
|
continue 3;
|
|
case "-":
|
|
if (!$flag_negative) {
|
|
$flag_negative = true;
|
|
} else {
|
|
$buffer .= $char;
|
|
$state = self::STATE_IN_TOKEN_OR_TAG;
|
|
}
|
|
continue 3;
|
|
case "@":
|
|
$state = self::STATE_IN_DATE;
|
|
continue 3;
|
|
case ":":
|
|
$state = self::STATE_IN_TOKEN;
|
|
continue 3;
|
|
default:
|
|
$buffer .= $char;
|
|
$state = self::STATE_IN_TOKEN_OR_TAG;
|
|
continue 3;
|
|
}
|
|
// no break
|
|
case self::STATE_BEFORE_TOKEN_QUOTED:
|
|
switch ($char) {
|
|
case "":
|
|
continue 3;
|
|
case '"':
|
|
if (($pos + 1 == $stop) || $search[$pos + 1] === " ") {
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
|
|
} else {
|
|
$state = self::STATE_IN_TOKEN_OR_TAG;
|
|
}
|
|
continue 3;
|
|
case "\\":
|
|
if ($pos + 1 == $stop) {
|
|
$buffer .= $char;
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$buffer .= $char;
|
|
}
|
|
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
|
|
continue 3;
|
|
case "-":
|
|
if (!$flag_negative) {
|
|
$flag_negative = true;
|
|
} else {
|
|
$buffer .= $char;
|
|
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
|
|
}
|
|
continue 3;
|
|
case "@":
|
|
$state = self::STATE_IN_DATE_QUOTED;
|
|
continue 3;
|
|
case ":":
|
|
$state = self::STATE_IN_TOKEN_QUOTED;
|
|
continue 3;
|
|
default:
|
|
$buffer .= $char;
|
|
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
|
|
continue 3;
|
|
}
|
|
// no break
|
|
case self::STATE_IN_DATE:
|
|
while ($pos < $stop && $search[$pos] !== " ") {
|
|
$buffer .= $search[$pos++];
|
|
}
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, true);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
continue 2;
|
|
case self::STATE_IN_DATE_QUOTED:
|
|
switch ($char) {
|
|
case "":
|
|
case '"':
|
|
if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") {
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, true);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$state = self::STATE_IN_DATE;
|
|
}
|
|
continue 3;
|
|
case "\\":
|
|
if ($pos + 1 == $stop) {
|
|
$buffer .= $char;
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$buffer .= $char;
|
|
}
|
|
continue 3;
|
|
default:
|
|
$buffer .= $char;
|
|
continue 3;
|
|
}
|
|
// no break
|
|
case self::STATE_IN_TOKEN:
|
|
while ($pos < $stop && $search[$pos] !== " ") {
|
|
$buffer .= $search[$pos++];
|
|
}
|
|
if (!strlen($tag)) {
|
|
$buffer = ":".$buffer;
|
|
}
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
continue 2;
|
|
case self::STATE_IN_TOKEN_QUOTED:
|
|
switch ($char) {
|
|
case "":
|
|
case '"':
|
|
if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") {
|
|
if (!strlen($tag)) {
|
|
$buffer = ":".$buffer;
|
|
}
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$state = self::STATE_IN_TOKEN;
|
|
}
|
|
continue 3;
|
|
case "\\":
|
|
if ($pos + 1 == $stop) {
|
|
$buffer .= $char;
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$buffer .= $char;
|
|
}
|
|
continue 3;
|
|
default:
|
|
$buffer .= $char;
|
|
continue 3;
|
|
}
|
|
// no break
|
|
case self::STATE_IN_TOKEN_OR_TAG:
|
|
switch ($char) {
|
|
case "":
|
|
case " ":
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
continue 3;
|
|
case ":":
|
|
$tag = $buffer;
|
|
$buffer = "";
|
|
$state = self::STATE_IN_TOKEN;
|
|
continue 3;
|
|
default:
|
|
$buffer .= $char;
|
|
continue 3;
|
|
}
|
|
// no break
|
|
case self::STATE_IN_TOKEN_OR_TAG_QUOTED:
|
|
switch ($char) {
|
|
case "":
|
|
case '"':
|
|
if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") {
|
|
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
|
|
$state = self::STATE_BEFORE_TOKEN;
|
|
$flag_negative = false;
|
|
$buffer = $tag = "";
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$state = self::STATE_IN_TOKEN_OR_TAG;
|
|
}
|
|
continue 3;
|
|
case "\\":
|
|
if ($pos + 1 == $stop) {
|
|
$buffer .= $char;
|
|
} elseif ($search[$pos + 1] === '"') {
|
|
$buffer .= '"';
|
|
$pos++;
|
|
} else {
|
|
$buffer .= $char;
|
|
}
|
|
continue 3;
|
|
case ":":
|
|
$tag = $buffer;
|
|
$buffer = "";
|
|
$state = self::STATE_IN_TOKEN_QUOTED;
|
|
continue 3;
|
|
default:
|
|
$buffer .= $char;
|
|
continue 3;
|
|
}
|
|
// no break
|
|
default:
|
|
throw new \Exception; // @codeCoverageIgnore
|
|
}
|
|
}
|
|
} catch (Exception $e) {
|
|
return null;
|
|
}
|
|
return $context;
|
|
}
|
|
|
|
protected static function processToken(Context $c, string $value, string $tag, bool $neg, bool $date): Context {
|
|
if (!strlen($value) && !strlen($tag)) {
|
|
return $c;
|
|
} elseif (!strlen($value)) {
|
|
// if a tag has an empty value, the tag is treated as a search term instead
|
|
$value = "$tag:";
|
|
$tag = "";
|
|
}
|
|
if ($date) {
|
|
return self::setDate($value, $c, $neg);
|
|
} elseif (isset(self::FIELDS_BOOLEAN[$tag])) {
|
|
return self::setBoolean($tag, $value, $c, $neg);
|
|
} else {
|
|
return self::addTerm($tag, $value, $c, $neg);
|
|
}
|
|
}
|
|
|
|
protected static function addTerm(string $tag, string $value, Context $c, bool $neg): Context {
|
|
$c = $neg ? $c->not : $c;
|
|
$type = self::FIELDS_TEXT[$tag] ?? "";
|
|
if (!$type) {
|
|
$value = "$tag:$value";
|
|
$type = self::FIELDS_TEXT[""];
|
|
}
|
|
return $c->$type(array_merge($c->$type ?? [], [$value]));
|
|
}
|
|
|
|
protected static function setDate(string $value, Context $c, bool $neg): Context {
|
|
$spec = Date::normalize($value);
|
|
// TTRSS treats invalid dates as the start of the Unix epoch; we ignore them instead
|
|
if (!$spec) {
|
|
return $c;
|
|
}
|
|
$day = $spec->format("Y-m-d");
|
|
$start = $day."T00:00:00+00:00";
|
|
$end = $day."T23:59:59+00:00";
|
|
// if a date is already set, the same date is a no-op; anything else is a contradiction
|
|
$cc = $neg ? $c->not : $c;
|
|
if ($cc->modifiedSince() || $cc->notModifiedSince()) {
|
|
if (!$cc->modifiedSince() || !$cc->notModifiedSince() || $cc->modifiedSince->format("c") !== $start || $cc->notModifiedSince->format("c") !== $end) {
|
|
// FIXME: multiple negative dates should be allowed, but the design of the Context class does not support this
|
|
throw new Exception;
|
|
} else {
|
|
return $c;
|
|
}
|
|
}
|
|
$cc->modifiedSince($start);
|
|
$cc->notModifiedSince($end);
|
|
return $c;
|
|
}
|
|
|
|
protected static function setBoolean(string $tag, string $value, Context $c, bool $neg): Context {
|
|
$set = ["true" => true, "false" => false][$value] ?? null;
|
|
if (is_null($set)) {
|
|
return self::addTerm($tag, $value, $c, $neg);
|
|
} else {
|
|
// apply negation
|
|
$set = $neg ? !$set : $set;
|
|
if ($tag === "pub") {
|
|
// TODO: this needs to be implemented correctly if the Published feed is implemented
|
|
// currently specifying true will always yield an empty result (nothing is ever published), and specifying false is a no-op (matches everything)
|
|
if ($set) {
|
|
throw new Exception;
|
|
} else {
|
|
return $c;
|
|
}
|
|
} else {
|
|
$field = (self::FIELDS_BOOLEAN[$tag] ?? "");
|
|
if (!$c->$field()) {
|
|
// field has not yet been set; set it
|
|
return $c->$field($set);
|
|
} elseif ($c->$field == $set) {
|
|
// field is already set to same value; do nothing
|
|
return $c;
|
|
} else {
|
|
// contradiction: query would return no results
|
|
throw new Exception;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|