1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 21:22:40 +00:00

Rule refactoring

- The Database class is now responsible for preparing rules
- Rules are now returned in an array keyed by user
- Empty strings are now passed through during rule preparation
This commit is contained in:
J. King 2021-01-08 14:17:46 -05:00
parent c1eff8479c
commit 4f34b4ff29
6 changed files with 57 additions and 43 deletions

View file

@ -13,6 +13,8 @@ use JKingWeb\Arsse\Context\Context;
use JKingWeb\Arsse\Misc\Date; use JKingWeb\Arsse\Misc\Date;
use JKingWeb\Arsse\Misc\ValueInfo as V; use JKingWeb\Arsse\Misc\ValueInfo as V;
use JKingWeb\Arsse\Misc\URL; use JKingWeb\Arsse\Misc\URL;
use JKingWeb\Arsse\Rule\Rule;
use JKingWeb\Arsse\Rule\Exception as RuleException;
/** The high-level interface with the database /** The high-level interface with the database
* *
@ -1205,14 +1207,26 @@ class Database {
/** Retrieves the set of filters users have applied to a given feed /** Retrieves the set of filters users have applied to a given feed
* *
* Each record includes the following keys: * The result is an associative array whose keys are usernames, values
* * being an array in turn with the following keys:
* - "owner": The user for whom to apply the filters *
* - "keep": The "keep" rule; any articles which fail to match this rule are hidden * - "keep": The "keep" rule as a prepared pattern; any articles which fail to match this rule are hidden
* - "block": The block rule; any article which matches this rule are hidden * - "block": The block rule as a prepared pattern; any articles which match this rule are hidden
*/ */
public function feedRulesGet(int $feedID): Db\Result { public function feedRulesGet(int $feedID): array {
return $this->db->prepare("SELECT owner, coalesce(keep_rule, '') as keep, coalesce(block_rule, '') as block from arsse_subscriptions where feed = ? and (coalesce(keep_rule, '') || coalesce(block_rule, '')) <> '' order by owner", "int")->run($feedID); $out = [];
$result = $this->db->prepare("SELECT owner, coalesce(keep_rule, '') as keep, coalesce(block_rule, '') as block from arsse_subscriptions where feed = ? and (coalesce(keep_rule, '') || coalesce(block_rule, '')) <> '' order by owner", "int")->run($feedID);
foreach ($result as $row) {
try {
$keep = Rule::prep($row['keep']);
$block = Rule::prep($row['block']);
} catch (RuleException $e) {
// invalid rules should not normally appear in the database, but it's possible
continue;
}
$out[$row['owner']] = ['keep' => $keep, 'block' => $block];
}
return $out;
} }
/** Retrieves various identifiers for the latest $count articles in the given newsfeed. The identifiers are: /** Retrieves various identifiers for the latest $count articles in the given newsfeed. The identifiers are:

View file

@ -455,7 +455,7 @@ class Feed {
protected function computeFilterRules(int $feedID): void { protected function computeFilterRules(int $feedID): void {
$rules = Arsse::$db->feedRulesGet($feedID); $rules = Arsse::$db->feedRulesGet($feedID);
foreach ($rules as $r) { foreach ($rules as $user => $r) {
$stats = ['new' => [], 'changed' => []]; $stats = ['new' => [], 'changed' => []];
foreach ($this->newItems as $index => $item) { foreach ($this->newItems as $index => $item) {
$stats['new'][$index] = Rule::apply($r['keep'], $r['block'], $item->title, $item->categories); $stats['new'][$index] = Rule::apply($r['keep'], $r['block'], $item->title, $item->categories);
@ -463,7 +463,7 @@ class Feed {
foreach ($this->changedItems as $index => $item) { foreach ($this->changedItems as $index => $item) {
$stats['changed'][$index] = Rule::apply($r['keep'], $r['block'], $item->title, $item->categories); $stats['changed'][$index] = Rule::apply($r['keep'], $r['block'], $item->title, $item->categories);
} }
$this->filteredItems[$r['owner']] = $stats; $this->filteredItems[$user] = $stats;
} }
} }
} }

View file

@ -8,6 +8,9 @@ namespace JKingWeb\Arsse\Rule;
abstract class Rule { abstract class Rule {
public static function prep(string $pattern): string { public static function prep(string $pattern): string {
if (!strlen($pattern)) {
return "";
}
if (preg_match_all("<`>", $pattern, $m, \PREG_OFFSET_CAPTURE)) { if (preg_match_all("<`>", $pattern, $m, \PREG_OFFSET_CAPTURE)) {
// where necessary escape our chosen delimiter (backtick) in reverse order // where necessary escape our chosen delimiter (backtick) in reverse order
foreach (array_reverse($m[0]) as [,$pos]) { foreach (array_reverse($m[0]) as [,$pos]) {
@ -42,7 +45,13 @@ abstract class Rule {
* *
* Returns true if the article is to be kept, and false if it is to be suppressed * Returns true if the article is to be kept, and false if it is to be suppressed
*/ */
public static function apply(string $keepRule, string $blockRule, string $title, array $categories = []): bool { public static function apply(string $keepPattern, string $blockPattern, string $title, array $categories = []): bool {
// ensure input is valid
assert(!strlen($keepPattern) || @preg_match($keepPattern, "") !== false, new \Exception("Keep pattern is invalid"));
assert(!strlen($blockPattern) || @preg_match($blockPattern, "") !== false, new \Exception("Block pattern is invalid"));
assert(sizeof(array_filter($categories, function($v) {
return !is_string($v);
})) === 0, new \Exception("All categories must be strings"));
// if neither rule is processed we should keep // if neither rule is processed we should keep
$keep = true; $keep = true;
// merge and clean the data to match // merge and clean the data to match
@ -50,38 +59,24 @@ abstract class Rule {
return preg_replace('/\s+/', " ", $str); return preg_replace('/\s+/', " ", $str);
}, array_merge([$title], $categories)); }, array_merge([$title], $categories));
// process the keep rule if it exists // process the keep rule if it exists
if (strlen($keepRule)) { if (strlen($keepPattern)) {
try {
$rule = static::prep($keepRule);
} catch (Exception $e) {
return true;
}
// if a keep rule is specified the default state is now not to keep // if a keep rule is specified the default state is now not to keep
$keep = false; $keep = false;
foreach ($data as $str) { foreach ($data as $str) {
if (is_string($str)) { if (preg_match($keepPattern, $str)) {
if (preg_match($rule, $str)) { // keep if the keep-rule matches one of the strings
// keep if the keep-rule matches one of the strings $keep = true;
$keep = true; break;
break;
}
} }
} }
} }
// process the block rule if the keep rule was matched // process the block rule if the keep rule was matched
if ($keep && strlen($blockRule)) { if ($keep && strlen($blockPattern)) {
try {
$rule = static::prep($blockRule);
} catch (Exception $e) {
return true;
}
foreach ($data as $str) { foreach ($data as $str) {
if (is_string($str)) { if (preg_match($blockPattern, $str)) {
if (preg_match($rule, $str)) { // do not keep if the block-rule matches one of the strings
// do not keep if the block-rule matches one of the strings $keep = false;
$keep = false; break;
break;
}
} }
} }
} }

View file

@ -76,9 +76,9 @@ trait SeriesFeed {
], ],
'rows' => [ 'rows' => [
[1,'john.doe@example.com',1,null,'^Sport$'], [1,'john.doe@example.com',1,null,'^Sport$'],
[2,'john.doe@example.com',2,null,null], [2,'john.doe@example.com',2,"",null],
[3,'john.doe@example.com',3,'\w+',null], [3,'john.doe@example.com',3,'\w+',null],
[4,'john.doe@example.com',4,null,null], [4,'john.doe@example.com',4,'\w+',"["], // invalid rule leads to both rules being ignored
[5,'john.doe@example.com',5,null,'and/or'], [5,'john.doe@example.com',5,null,'and/or'],
[6,'jane.doe@example.com',1,'^(?i)[a-z]+','bluberry'], [6,'jane.doe@example.com',1,'^(?i)[a-z]+','bluberry'],
], ],
@ -205,16 +205,16 @@ trait SeriesFeed {
/** @dataProvider provideFilterRules */ /** @dataProvider provideFilterRules */
public function testGetRules(int $in, array $exp): void { public function testGetRules(int $in, array $exp): void {
$this->assertResult($exp, Arsse::$db->feedRulesGet($in)); $this->assertSame($exp, Arsse::$db->feedRulesGet($in));
} }
public function provideFilterRules(): iterable { public function provideFilterRules(): iterable {
return [ return [
[1, [['owner' => "john.doe@example.com", 'keep' => "", 'block' => "^Sport$"], ['owner' => "jane.doe@example.com", 'keep' => "^(?i)[a-z]+", 'block' => "bluberry"]]], [1, ['jane.doe@example.com' => ['keep' => "`^(?i)[a-z]+`u", 'block' => "`bluberry`u"], 'john.doe@example.com' => ['keep' => "", 'block' => "`^Sport$`u"]]],
[2, []], [2, []],
[3, [['owner' => "john.doe@example.com", 'keep' => '\w+', 'block' => ""]]], [3, ['john.doe@example.com' => ['keep' => '`\w+`u', 'block' => ""]]],
[4, []], [4, []],
[5, [['owner' => "john.doe@example.com", 'keep' => "", 'block' => "and/or"]]], [5, ['john.doe@example.com' => ['keep' => "", 'block' => "`and/or`u"]]],
]; ];
} }

View file

@ -95,7 +95,7 @@ class TestFeed extends \JKingWeb\Arsse\Test\AbstractTest {
self::clearData(); self::clearData();
self::setConf(); self::setConf();
Arsse::$db = \Phake::mock(Database::class); Arsse::$db = \Phake::mock(Database::class);
\Phake::when(Arsse::$db)->feedRulesGet->thenReturn(new Result([])); \Phake::when(Arsse::$db)->feedRulesGet->thenReturn([]);
} }
public function testParseAFeed(): void { public function testParseAFeed(): void {

View file

@ -23,8 +23,15 @@ class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
Rule::prep("["); Rule::prep("[");
} }
public function testPrepareAnEmptyPattern(): void {
$this->assertTrue(Rule::validate(""));
$this->assertSame("", Rule::prep(""));
}
/** @dataProvider provideApplications */ /** @dataProvider provideApplications */
public function testApplyRules(string $keepRule, string $blockRule, string $title, array $categories, $exp): void { public function testApplyRules(string $keepRule, string $blockRule, string $title, array $categories, $exp): void {
$keepRule = Rule::prep($keepRule);
$blockRule = Rule::prep($blockRule);
if ($exp instanceof \Exception) { if ($exp instanceof \Exception) {
$this->assertException($exp); $this->assertException($exp);
Rule::apply($keepRule, $blockRule, $title, $categories); Rule::apply($keepRule, $blockRule, $title, $categories);
@ -43,8 +50,6 @@ class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
["", "^Category$", "Title", ["Dummy", "Category"], false], ["", "^Category$", "Title", ["Dummy", "Category"], false],
["", "^Naught$", "Title", ["Dummy", "Category"], true], ["", "^Naught$", "Title", ["Dummy", "Category"], true],
["^Category$", "^Category$", "Title", ["Dummy", "Category"], false], ["^Category$", "^Category$", "Title", ["Dummy", "Category"], false],
["[", "", "Title", ["Dummy", "Category"], true],
["", "[", "Title", ["Dummy", "Category"], true],
["", "^A B C$", "A B\nC", ["X\n Y \t \r Z"], false], ["", "^A B C$", "A B\nC", ["X\n Y \t \r Z"], false],
["", "^X Y Z$", "A B\nC", ["X\n Y \t \r Z"], false], ["", "^X Y Z$", "A B\nC", ["X\n Y \t \r Z"], false],
]; ];