1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 21:22:40 +00:00

Partial implementation of filter rule handling

This commit is contained in:
J. King 2021-01-03 16:41:15 -05:00
parent 7e17332714
commit ffc5579a7a
7 changed files with 90 additions and 11 deletions

View file

@ -100,6 +100,7 @@ abstract class AbstractException extends \Exception {
"ImportExport/Exception.invalidFolderName" => 10613, "ImportExport/Exception.invalidFolderName" => 10613,
"ImportExport/Exception.invalidFolderCopy" => 10614, "ImportExport/Exception.invalidFolderCopy" => 10614,
"ImportExport/Exception.invalidTagName" => 10615, "ImportExport/Exception.invalidTagName" => 10615,
"Rule/Exception.invalidPattern" => 10701,
]; ];
public function __construct(string $msgID = "", $vars = null, \Throwable $e = null) { public function __construct(string $msgID = "", $vars = null, \Throwable $e = null) {

View file

@ -79,12 +79,16 @@ class Feed {
// we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged // we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged
if (!sizeof($this->newItems) && !sizeof($this->changedItems)) { if (!sizeof($this->newItems) && !sizeof($this->changedItems)) {
$this->modified = false; $this->modified = false;
} else {
if ($feedID) {
$this->computeFilterRules($feedID);
} }
// if requested, scrape full content for any new and changed items // if requested, scrape full content for any new and changed items
if ($scrape) { if ($scrape) {
$this->scrape(); $this->scrape();
} }
} }
}
// compute the time at which the feed should next be fetched // compute the time at which the feed should next be fetched
$this->nextFetch = $this->computeNextFetch(); $this->nextFetch = $this->computeNextFetch();
} }
@ -119,7 +123,7 @@ class Feed {
} }
} }
protected function parse(): bool { protected function parse(): void {
try { try {
$feed = $this->resource->reader->getParser( $feed = $this->resource->reader->getParser(
$this->resource->getUrl(), $this->resource->getUrl(),
@ -222,7 +226,6 @@ class Feed {
sort($f->categories); sort($f->categories);
} }
$this->data = $feed; $this->data = $feed;
return true;
} }
protected function deduplicateItems(array $items): array { protected function deduplicateItems(array $items): array {
@ -269,13 +272,13 @@ class Feed {
return $out; return $out;
} }
protected function matchToDatabase(int $feedID = null): bool { protected function matchToDatabase(int $feedID = null): void {
// first perform deduplication on items // first perform deduplication on items
$items = $this->deduplicateItems($this->data->items); $items = $this->deduplicateItems($this->data->items);
// if we haven't been given a database feed ID to check against, all items are new // if we haven't been given a database feed ID to check against, all items are new
if (is_null($feedID)) { if (is_null($feedID)) {
$this->newItems = $items; $this->newItems = $items;
return true; return;
} }
// get as many of the latest articles in the database as there are in the feed // get as many of the latest articles in the database as there are in the feed
$articles = Arsse::$db->feedMatchLatest($feedID, sizeof($items))->getAll(); $articles = Arsse::$db->feedMatchLatest($feedID, sizeof($items))->getAll();
@ -303,7 +306,6 @@ class Feed {
// merge the two change-lists, preserving keys // merge the two change-lists, preserving keys
$this->changedItems = array_combine(array_merge(array_keys($this->changedItems), array_keys($changed)), array_merge($this->changedItems, $changed)); $this->changedItems = array_combine(array_merge(array_keys($this->changedItems), array_keys($changed)), array_merge($this->changedItems, $changed));
} }
return true;
} }
protected function matchItems(array $items, array $articles): array { protected function matchItems(array $items, array $articles): array {
@ -438,7 +440,7 @@ class Feed {
return $dates; return $dates;
} }
protected function scrape(): bool { protected function scrape(): void {
$scraper = new Scraper(self::configure()); $scraper = new Scraper(self::configure());
foreach (array_merge($this->newItems, $this->changedItems) as $item) { foreach (array_merge($this->newItems, $this->changedItems) as $item) {
$scraper->setUrl($item->url); $scraper->setUrl($item->url);
@ -447,6 +449,17 @@ class Feed {
$item->content = $scraper->getFilteredContent(); $item->content = $scraper->getFilteredContent();
} }
} }
return true; }
protected function computeFilterRules(int $feedID): void {
return;
$rules = Arsse::$db->feedRulesGet($feedID);
foreach ($rules as $r) {
$keep = "";
$block = "";
if (strlen($r['keep'])) {
}
}
} }
} }

10
lib/Rule/Exception.php Normal file
View file

@ -0,0 +1,10 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\Rule;
class Exception extends \JKingWeb\Arsse\AbstractException {
}

31
lib/Rule/Rule.php Normal file
View file

@ -0,0 +1,31 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\Rule;
abstract class Rule {
public static function prep(string $pattern): string {
if (preg_match_all("<`>", $pattern, $m, \PREG_OFFSET_CAPTURE)) {
// where necessary escape our chosen delimiter (backtick) in reverse order
foreach (array_reverse($m[0]) as [,$pos]) {
// count the number of backslashes preceding the delimiter character
$count = 0;
$p = $pos;
while ($p-- && $pattern[$p] === "\\" && ++$count);
// if the number is even (including zero), add a backslash
if ($count % 2 === 0) {
$pattern = substr($pattern, 0, $pos)."\\".substr($pattern, $pos);
}
}
}
// add the delimiters and test the pattern
$pattern = "`$pattern`u";
if (@preg_match($pattern, "") === false) {
throw new Exception("invalidPattern");
}
return $pattern;
}
}

View file

@ -194,4 +194,5 @@ return [
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderName' => 'Input data contains an invalid folder name', 'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderName' => 'Input data contains an invalid folder name',
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderCopy' => 'Input data contains multiple folders of the same name under the same parent', 'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderCopy' => 'Input data contains multiple folders of the same name under the same parent',
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidTagName' => 'Input data contains an invalid tag name', 'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidTagName' => 'Input data contains an invalid tag name',
'Exception.JKingWeb/Arsse/Rule/Exception.invalidPattern' => 'Specified rule pattern is invalid'
]; ];

View file

@ -0,0 +1,22 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\TestCase\Misc;
use JKingWeb\Arsse\Rule\Rule;
/** @covers \JKingWeb\Arsse\Rule\Rule */
class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
public function testPrepareAPattern(): void {
$exp = "`\\`..\\`..\\`..\\\\\\`..`u";
$this->assertSame($exp, Rule::prep("`..`..\\`..\\\\`.."));
}
public function testPrepareAnInvalidPattern(): void {
$this->assertException("invalidPattern", "Rule");
Rule::prep("[");
}
}

View file

@ -51,6 +51,7 @@
<file>cases/Misc/TestContext.php</file> <file>cases/Misc/TestContext.php</file>
<file>cases/Misc/TestURL.php</file> <file>cases/Misc/TestURL.php</file>
<file>cases/Misc/TestHTTP.php</file> <file>cases/Misc/TestHTTP.php</file>
<file>cases/Misc/TestRule.php</file>
</testsuite> </testsuite>
<testsuite name="User management"> <testsuite name="User management">
<file>cases/User/TestInternal.php</file> <file>cases/User/TestInternal.php</file>