1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-31 21:12:41 +00:00

Add parser for TTRSS search strings

This commit is contained in:
J. King 2019-02-28 15:31:33 -05:00
parent 95de375e0b
commit 85307bc90a
3 changed files with 488 additions and 0 deletions

View file

@ -0,0 +1,361 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\REST\TinyTinyRSS;
use JKingWeb\Arsse\Context\Context;
use JKingWeb\Arsse\Misc\Date;
class Search {
const STATE_BEFORE_TOKEN = 0;
const STATE_BEFORE_TOKEN_QUOTED = 1;
const STATE_IN_DATE = 2;
const STATE_IN_DATE_QUOTED = 3;
const STATE_IN_TOKEN_OR_TAG = 4;
const STATE_IN_TOKEN_OR_TAG_QUOTED = 5;
const STATE_IN_TOKEN = 6;
const STATE_IN_TOKEN_QUOTED = 7;
const FIELDS_BOOLEAN = [
"unread" => "unread",
"star" => "starred",
"note" => "annotated",
"pub" => "published", // TODO: not implemented
];
const FIELDS_TEXT = [
"title" => "titleTerms",
"author" => "authorTerms",
"note" => "annotationTerms",
"" => "searchTerms",
];
public static function parse(string $search, Context $context = null) {
// normalize the input
$search = strtolower(trim(preg_replace("<\s+>", " ", $search)));
// set initial state
$tokens = [];
$pos = -1;
$stop = strlen($search);
$state = self::STATE_BEFORE_TOKEN;
$buffer = "";
$tag = "";
$flag_negative = false;
$context = $context ?? new Context;
// process
try {
while (++$pos <= $stop) {
$char = @$search[$pos];
switch ($state) {
case self::STATE_BEFORE_TOKEN:
switch ($char) {
case "":
continue 3;
case " ":
continue 3;
case '"':
if ($flag_negative) {
$buffer .= $char;
$state = self::STATE_IN_TOKEN_OR_TAG;
} else {
$state = self::STATE_BEFORE_TOKEN_QUOTED;
}
continue 3;
case "-":
if (!$flag_negative) {
$flag_negative = true;
} else {
$buffer .= $char;
$state = self::STATE_IN_TOKEN_OR_TAG;
}
continue 3;
case "@":
$state = self::STATE_IN_DATE;
continue 3;
case ":":
$state = self::STATE_IN_TOKEN;
continue 3;
default:
$buffer .= $char;
$state = self::STATE_IN_TOKEN_OR_TAG;
continue 3;
}
case self::STATE_BEFORE_TOKEN_QUOTED:
switch ($char) {
case "":
continue 3;
case '"':
if (($pos + 1 == $stop) || $search[$pos + 1] === " ") {
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
} else {
$state = self::STATE_IN_TOKEN_OR_TAG;
}
continue 3;
case "\\":
if ($pos + 1 == $stop) {
$buffer .= $char;
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$buffer .= $char;
}
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
continue 3;
case "-":
if (!$flag_negative) {
$flag_negative = true;
} else {
$buffer .= $char;
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
}
continue 3;
case "@":
$state = self::STATE_IN_DATE_QUOTED;
continue 3;
case ":":
$state = self::STATE_IN_TOKEN_QUOTED;
continue 3;
default:
$buffer .= $char;
$state = self::STATE_IN_TOKEN_OR_TAG_QUOTED;
continue 3;
}
case self::STATE_IN_DATE:
while ($pos < $stop && $search[$pos] !== " ") {
$buffer .= $search[$pos++];
}
$context = self::processToken($context, $buffer, $tag, $flag_negative, true);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
continue 2;
case self::STATE_IN_DATE_QUOTED:
switch ($char) {
case "":
case '"':
if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") {
$context = self::processToken($context, $buffer, $tag, $flag_negative, true);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$state = self::STATE_IN_DATE;
}
continue 3;
case "\\":
if ($pos + 1 == $stop) {
$buffer .= $char;
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$buffer .= $char;
}
continue 3;
default:
$buffer .= $char;
continue 3;
}
case self::STATE_IN_TOKEN:
while ($pos < $stop && $search[$pos] !== " ") {
$buffer .= $search[$pos++];
}
if (!strlen($tag)) {
$buffer = ":".$buffer;
}
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
continue 2;
case self::STATE_IN_TOKEN_QUOTED:
switch ($char) {
case "":
case '"':
if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") {
if (!strlen($tag)) {
$buffer = ":".$buffer;
}
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$state = self::STATE_IN_TOKEN;
}
continue 3;
case "\\":
if ($pos + 1 == $stop) {
$buffer .= $char;
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$buffer .= $char;
}
continue 3;
default:
$buffer .= $char;
continue 3;
}
case self::STATE_IN_TOKEN_OR_TAG:
switch ($char) {
case "":
case " ":
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
continue 3;
case ":";
$tag = $buffer;
$buffer = "";
$state = self::STATE_IN_TOKEN;
continue 3;
default:
$buffer .= $char;
continue 3;
}
case self::STATE_IN_TOKEN_OR_TAG_QUOTED:
switch ($char) {
case "":
case '"':
if (($pos + 1 >= $stop) || $search[$pos + 1] === " ") {
$context = self::processToken($context, $buffer, $tag, $flag_negative, false);
$state = self::STATE_BEFORE_TOKEN;
$flag_negative = false;
$buffer = $tag = "";
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$state = self::STATE_IN_TOKEN_OR_TAG;
}
continue 3;
case "\\":
if ($pos + 1 == $stop) {
$buffer .= $char;
} elseif ($search[$pos + 1] === '"') {
$buffer .= '"';
$pos++;
} else {
$buffer .= $char;
}
continue 3;
case ":":
$tag = $buffer;
$buffer = "";
$state = self::STATE_IN_TOKEN_QUOTED;
continue 3;
default:
$buffer .= $char;
continue 3;
}
default:
throw new \Exception; // @codeCoverageIgnore
}
}
} catch (Exception $e) {
return null;
}
return $context;
}
protected static function processToken(Context $c, string $value, string $tag, bool $neg, bool $date): Context {
if (!strlen($value) && !strlen($tag)) {
return $c;
} elseif (!strlen($value)) {
// if a tag has an empty value, the tag is treated as a search term instead
$value = "$tag:";
$tag = "";
}
if ($date) {
return self::setDate($value, $c, $neg);
} elseif (isset(self::FIELDS_BOOLEAN[$tag])) {
return self::setBoolean($tag, $value, $c, $neg);
} else {
return self::addTerm($tag, $value, $c, $neg);
}
}
protected static function addTerm(string $tag, string $value, Context $c, bool $neg): Context {
$c = $neg ? $c->not : $c;
$type = self::FIELDS_TEXT[$tag] ?? "";
if (!$type) {
$value = "$tag:$value";
$type = self::FIELDS_TEXT[""];
}
return $c->$type(array_merge($c->$type ?? [], [$value]));
}
protected static function setDate(string $value, Context $c, bool $neg): Context {
$spec = Date::normalize($value);
// TTRSS treats invalid dates as the start of the Unix epoch; we ignore them instead
if (!$spec) {
return $c;
}
$day = $spec->format("Y-m-d");
$start = $day."T00:00:00+00:00";
$end = $day."T23:59:59+00:00";
// if a date is already set, the same date is a no-op; anything else is a contradiction
$cc = $neg ? $c->not : $c;
if ($cc->modifiedSince() || $cc->notModifiedSince()) {
if (!$cc->modifiedSince() || !$cc->notModifiedSince() || $cc->modifiedSince->format("c") !== $start || $cc->notModifiedSince->format("c") !== $end) {
// FIXME: multiple negative dates should be allowed, but the design of the Context class does not support this
throw new Exception;
} else {
return $c;
}
}
$cc->modifiedSince($start);
$cc->notModifiedSince($end);
return $c;
}
protected static function setBoolean(string $tag, string $value, Context $c, bool $neg): Context {
$set = ["true" => true, "false" => false][$value] ?? null;
if (is_null($set)) {
return self::addTerm($tag, $value, $c, $neg);
} else {
// apply negation
$set = $neg ? !$set : $set;
if ($tag === "pub") {
// TODO: this needs to be implemented correctly if the Published feed is implemented
// currently specifying true will always yield an empty result (nothing is ever published), and specifying false is a no-op (matches everything)
if ($set) {
throw new Exception;
} else {
return $c;
}
} else {
$field = (self::FIELDS_BOOLEAN[$tag] ?? "");
if (!$c->$field()) {
// field has not yet been set; set it
return $c->$field($set);
} elseif ($c->$field == $set) {
// field is already set to same value; do nothing
return $c;
} else {
// contradiction: query would return no results
throw new Exception;
}
}
}
}
}

View file

@ -0,0 +1,126 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\TestCase\REST\TinyTinyRSS;
use JKingWeb\Arsse\Context\Context;
use JKingWeb\Arsse\REST\TinyTinyRSS\Search;
/** @covers \JKingWeb\Arsse\REST\TinyTinyRSS\Search */
class TestSearch extends \JKingWeb\Arsse\Test\AbstractTest {
public function provideSearchStrings() {
return [
'Blank string' => ["", new Context],
'Whitespace only' => [" \n \t", new Context],
'Simple bare token' => ['OOK', (new Context)->searchTerms(["ook"])],
'Simple negative bare token' => ['-OOK', (new Context)->not->searchTerms(["ook"])],
'Simple quoted token' => ['"OOK eek"', (new Context)->searchTerms(["ook eek"])],
'Simple negative quoted token' => ['"-OOK eek"', (new Context)->not->searchTerms(["ook eek"])],
'Simple bare tokens' => ['OOK eek', (new Context)->searchTerms(["ook", "eek"])],
'Simple mixed bare tokens' => ['-OOK eek', (new Context)->not->searchTerms(["ook"])->searchTerms(["eek"])],
'Unclosed quoted token' => ['"OOK eek', (new Context)->searchTerms(["ook eek"])],
'Unclosed quoted token 2' => ['"OOK eek" "', (new Context)->searchTerms(["ook eek"])],
'Broken quoted token 1' => ['"-OOK"eek"', (new Context)->not->searchTerms(["ookeek\""])],
'Broken quoted token 2' => ['""eek"', (new Context)->searchTerms(["eek\""])],
'Broken quoted token 3' => ['"-"eek"', (new Context)->not->searchTerms(["eek\""])],
'Empty quoted token' => ['""', new Context],
'Simple quoted tokens' => ['"OOK eek" "eek ack"', (new Context)->searchTerms(["ook eek", "eek ack"])],
'Bare blank tag' => [':ook', (new Context)->searchTerms([":ook"])],
'Quoted blank tag' => ['":ook"', (new Context)->searchTerms([":ook"])],
'Bare negative blank tag' => ['-:ook', (new Context)->not->searchTerms([":ook"])],
'Quoted negative blank tag' => ['"-:ook"', (new Context)->not->searchTerms([":ook"])],
'Bare valueless blank tag' => [':', (new Context)->searchTerms([":"])],
'Quoted valueless blank tag' => ['":"', (new Context)->searchTerms([":"])],
'Bare negative valueless blank tag' => ['-:', (new Context)->not->searchTerms([":"])],
'Quoted negative valueless blank tag' => ['"-:"', (new Context)->not->searchTerms([":"])],
'Double negative' => ['--eek', (new Context)->not->searchTerms(["-eek"])],
'Double negative 2' => ['--@eek', (new Context)->not->searchTerms(["-@eek"])],
'Double negative 3' => ['"--@eek"', (new Context)->not->searchTerms(["-@eek"])],
'Double negative 4' => ['"--eek"', (new Context)->not->searchTerms(["-eek"])],
'Negative before quote' => ['-"ook"', (new Context)->not->searchTerms(["\"ook\""])],
'Bare unread tag true' => ['UNREAD:true', (new Context)->unread(true)],
'Bare unread tag false' => ['UNREAD:false', (new Context)->unread(false)],
'Bare negative unread tag true' => ['-unread:true', (new Context)->unread(false)],
'Bare negative unread tag false' => ['-unread:false', (new Context)->unread(true)],
'Quoted unread tag true' => ['"UNREAD:true"', (new Context)->unread(true)],
'Quoted unread tag false' => ['"UNREAD:false"', (new Context)->unread(false)],
'Quoted negative unread tag true' => ['"-unread:true"', (new Context)->unread(false)],
'Quoted negative unread tag false' => ['"-unread:false"', (new Context)->unread(true)],
'Bare star tag true' => ['STAR:true', (new Context)->starred(true)],
'Bare star tag false' => ['STAR:false', (new Context)->starred(false)],
'Bare negative star tag true' => ['-star:true', (new Context)->starred(false)],
'Bare negative star tag false' => ['-star:false', (new Context)->starred(true)],
'Quoted star tag true' => ['"STAR:true"', (new Context)->starred(true)],
'Quoted star tag false' => ['"STAR:false"', (new Context)->starred(false)],
'Quoted negative star tag true' => ['"-star:true"', (new Context)->starred(false)],
'Quoted negative star tag false' => ['"-star:false"', (new Context)->starred(true)],
'Bare note tag true' => ['NOTE:true', (new Context)->annotated(true)],
'Bare note tag false' => ['NOTE:false', (new Context)->annotated(false)],
'Bare negative note tag true' => ['-note:true', (new Context)->annotated(false)],
'Bare negative note tag false' => ['-note:false', (new Context)->annotated(true)],
'Quoted note tag true' => ['"NOTE:true"', (new Context)->annotated(true)],
'Quoted note tag false' => ['"NOTE:false"', (new Context)->annotated(false)],
'Quoted negative note tag true' => ['"-note:true"', (new Context)->annotated(false)],
'Quoted negative note tag false' => ['"-note:false"', (new Context)->annotated(true)],
'Bare pub tag true' => ['PUB:true', null],
'Bare pub tag false' => ['PUB:false', new Context],
'Bare negative pub tag true' => ['-pub:true', new Context],
'Bare negative pub tag false' => ['-pub:false', null],
'Quoted pub tag true' => ['"PUB:true"', null],
'Quoted pub tag false' => ['"PUB:false"', new Context],
'Quoted negative pub tag true' => ['"-pub:true"', new Context],
'Quoted negative pub tag false' => ['"-pub:false"', null],
'Non-boolean unread tag' => ['unread:maybe', (new Context)->searchTerms(["unread:maybe"])],
'Non-boolean star tag' => ['star:maybe', (new Context)->searchTerms(["star:maybe"])],
'Non-boolean pub tag' => ['pub:maybe', (new Context)->searchTerms(["pub:maybe"])],
'Non-boolean note tag' => ['note:maybe', (new Context)->annotationTerms(["maybe"])],
'Valueless unread tag' => ['unread:', (new Context)->searchTerms(["unread:"])],
'Valueless star tag' => ['star:', (new Context)->searchTerms(["star:"])],
'Valueless pub tag' => ['pub:', (new Context)->searchTerms(["pub:"])],
'Valueless note tag' => ['note:', (new Context)->searchTerms(["note:"])],
'Valueless title tag' => ['title:', (new Context)->searchTerms(["title:"])],
'Valueless author tag' => ['author:', (new Context)->searchTerms(["author:"])],
'Escaped quote 1' => ['"""I say, Jeeves!"""', (new Context)->searchTerms(["\"i say, jeeves!\""])],
'Escaped quote 2' => ['"\\"I say, Jeeves!\\""', (new Context)->searchTerms(["\"i say, jeeves!\""])],
'Escaped quote 3' => ['\\"I say, Jeeves!\\"', (new Context)->searchTerms(["\\\"i", "say,", "jeeves!\\\""])],
'Escaped quote 4' => ['"\\"\\I say, Jeeves!\\""', (new Context)->searchTerms(["\"\\i say, jeeves!\""])],
'Escaped quote 5' => ['"\\I say, Jeeves!"', (new Context)->searchTerms(["\\i say, jeeves!"])],
'Escaped quote 6' => ['"\\"I say, Jeeves!\\', (new Context)->searchTerms(["\"i say, jeeves!\\"])],
'Escaped quote 7' => ['"\\', (new Context)->searchTerms(["\\"])],
'Quoted author tag 1' => ['"author:Neal Stephenson"', (new Context)->authorTerms(["neal stephenson"])],
'Quoted author tag 2' => ['"author:Jo ""Cap\'n Tripps"" Ashburn"', (new Context)->authorTerms(["jo \"cap'n tripps\" ashburn"])],
'Quoted author tag 3' => ['"author:Jo \\"Cap\'n Tripps\\" Ashburn"', (new Context)->authorTerms(["jo \"cap'n tripps\" ashburn"])],
'Quoted author tag 4' => ['"author:Jo ""Cap\'n Tripps"Ashburn"', (new Context)->authorTerms(["jo \"cap'n trippsashburn\""])],
'Quoted author tag 5' => ['"author:Jo ""Cap\'n Tripps\ Ashburn"', (new Context)->authorTerms(["jo \"cap'n tripps\\ ashburn"])],
'Quoted author tag 6' => ['"author:Neal Stephenson\\', (new Context)->authorTerms(["neal stephenson\\"])],
'Quoted title tag' => ['"title:Generic title"', (new Context)->titleTerms(["generic title"])],
'Contradictory booleans' => ['unread:true -unread:true', null],
'Doubled boolean' => ['unread:true unread:true', (new Context)->unread(true)],
'Bare blank date' => ['@', new Context],
'Quoted blank date' => ['"@"', new Context],
'Bare ISO date' => ['@2019-03-01', (new Context)->modifiedSince("2019-03-01T00:00:00Z")->notModifiedSince("2019-03-01T23:59:59Z")],
'Quoted ISO date' => ['"@March 1st, 2019"', (new Context)->modifiedSince("2019-03-01T00:00:00Z")->notModifiedSince("2019-03-01T23:59:59Z")],
'Bare negative ISO date' => ['-@2019-03-01', (new Context)->not->modifiedSince("2019-03-01T00:00:00Z")->not->notModifiedSince("2019-03-01T23:59:59Z")],
'Quoted negative English date' => ['"-@March 1st, 2019"', (new Context)->not->modifiedSince("2019-03-01T00:00:00Z")->not->notModifiedSince("2019-03-01T23:59:59Z")],
'Invalid date' => ['@Bugaboo', new Context],
'Escaped quoted date 1' => ['"@""Yesterday" and today', (new Context)->searchTerms(["and", "today"])],
'Escaped quoted date 2' => ['"@\\"Yesterday" and today', (new Context)->searchTerms(["and", "today"])],
'Escaped quoted date 3' => ['"@Yesterday\\', new Context],
'Escaped quoted date 4' => ['"@Yesterday\\and today', new Context],
'Escaped quoted date 5' => ['"@Yesterday"and today', (new Context)->searchTerms(["today"])],
'Contradictory dates' => ['@Yesterday @Today', null],
'Doubled date' => ['"@March 1st, 2019" @2019-03-01', (new Context)->modifiedSince("2019-03-01T00:00:00Z")->notModifiedSince("2019-03-01T23:59:59Z")],
'Doubled negative date' => ['"-@March 1st, 2019" -@2019-03-01', (new Context)->not->modifiedSince("2019-03-01T00:00:00Z")->not->notModifiedSince("2019-03-01T23:59:59Z")],
];
}
/** @dataProvider provideSearchStrings */
public function testApplySearchToContext(string $search, $exp) {
$act = Search::parse($search);
//var_export($act);
$this->assertEquals($exp, $act);
}
}

View file

@ -99,6 +99,7 @@
<file>cases/REST/NextCloudNews/PDO/TestV1_2.php</file>
</testsuite>
<testsuite name="TTRSS">
<file>cases/REST/TinyTinyRSS/TestSearch.php</file>
<file>cases/REST/TinyTinyRSS/TestAPI.php</file>
<file>cases/REST/TinyTinyRSS/TestIcon.php</file>
<file>cases/REST/TinyTinyRSS/PDO/TestAPI.php</file>