1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 13:12:41 +00:00

Partial implementation of filter rule handling

This commit is contained in:
J. King 2021-01-03 16:41:15 -05:00
parent 7e17332714
commit ffc5579a7a
7 changed files with 90 additions and 11 deletions

View file

@ -100,6 +100,7 @@ abstract class AbstractException extends \Exception {
"ImportExport/Exception.invalidFolderName" => 10613,
"ImportExport/Exception.invalidFolderCopy" => 10614,
"ImportExport/Exception.invalidTagName" => 10615,
"Rule/Exception.invalidPattern" => 10701,
];
public function __construct(string $msgID = "", $vars = null, \Throwable $e = null) {

View file

@ -79,12 +79,16 @@ class Feed {
// we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged
if (!sizeof($this->newItems) && !sizeof($this->changedItems)) {
$this->modified = false;
} else {
if ($feedID) {
$this->computeFilterRules($feedID);
}
// if requested, scrape full content for any new and changed items
if ($scrape) {
$this->scrape();
}
}
}
// compute the time at which the feed should next be fetched
$this->nextFetch = $this->computeNextFetch();
}
@ -119,7 +123,7 @@ class Feed {
}
}
protected function parse(): bool {
protected function parse(): void {
try {
$feed = $this->resource->reader->getParser(
$this->resource->getUrl(),
@ -222,7 +226,6 @@ class Feed {
sort($f->categories);
}
$this->data = $feed;
return true;
}
protected function deduplicateItems(array $items): array {
@ -269,13 +272,13 @@ class Feed {
return $out;
}
protected function matchToDatabase(int $feedID = null): bool {
protected function matchToDatabase(int $feedID = null): void {
// first perform deduplication on items
$items = $this->deduplicateItems($this->data->items);
// if we haven't been given a database feed ID to check against, all items are new
if (is_null($feedID)) {
$this->newItems = $items;
return true;
return;
}
// get as many of the latest articles in the database as there are in the feed
$articles = Arsse::$db->feedMatchLatest($feedID, sizeof($items))->getAll();
@ -303,7 +306,6 @@ class Feed {
// merge the two change-lists, preserving keys
$this->changedItems = array_combine(array_merge(array_keys($this->changedItems), array_keys($changed)), array_merge($this->changedItems, $changed));
}
return true;
}
protected function matchItems(array $items, array $articles): array {
@ -438,7 +440,7 @@ class Feed {
return $dates;
}
protected function scrape(): bool {
protected function scrape(): void {
$scraper = new Scraper(self::configure());
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
$scraper->setUrl($item->url);
@ -447,6 +449,17 @@ class Feed {
$item->content = $scraper->getFilteredContent();
}
}
return true;
}
protected function computeFilterRules(int $feedID): void {
return;
$rules = Arsse::$db->feedRulesGet($feedID);
foreach ($rules as $r) {
$keep = "";
$block = "";
if (strlen($r['keep'])) {
}
}
}
}

10
lib/Rule/Exception.php Normal file
View file

@ -0,0 +1,10 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\Rule;
class Exception extends \JKingWeb\Arsse\AbstractException {
}

31
lib/Rule/Rule.php Normal file
View file

@ -0,0 +1,31 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\Rule;
abstract class Rule {
public static function prep(string $pattern): string {
if (preg_match_all("<`>", $pattern, $m, \PREG_OFFSET_CAPTURE)) {
// where necessary escape our chosen delimiter (backtick) in reverse order
foreach (array_reverse($m[0]) as [,$pos]) {
// count the number of backslashes preceding the delimiter character
$count = 0;
$p = $pos;
while ($p-- && $pattern[$p] === "\\" && ++$count);
// if the number is even (including zero), add a backslash
if ($count % 2 === 0) {
$pattern = substr($pattern, 0, $pos)."\\".substr($pattern, $pos);
}
}
}
// add the delimiters and test the pattern
$pattern = "`$pattern`u";
if (@preg_match($pattern, "") === false) {
throw new Exception("invalidPattern");
}
return $pattern;
}
}

View file

@ -194,4 +194,5 @@ return [
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderName' => 'Input data contains an invalid folder name',
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderCopy' => 'Input data contains multiple folders of the same name under the same parent',
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidTagName' => 'Input data contains an invalid tag name',
'Exception.JKingWeb/Arsse/Rule/Exception.invalidPattern' => 'Specified rule pattern is invalid'
];

View file

@ -0,0 +1,22 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Arsse\TestCase\Misc;
use JKingWeb\Arsse\Rule\Rule;
/** @covers \JKingWeb\Arsse\Rule\Rule */
class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
public function testPrepareAPattern(): void {
$exp = "`\\`..\\`..\\`..\\\\\\`..`u";
$this->assertSame($exp, Rule::prep("`..`..\\`..\\\\`.."));
}
public function testPrepareAnInvalidPattern(): void {
$this->assertException("invalidPattern", "Rule");
Rule::prep("[");
}
}

View file

@ -51,6 +51,7 @@
<file>cases/Misc/TestContext.php</file>
<file>cases/Misc/TestURL.php</file>
<file>cases/Misc/TestHTTP.php</file>
<file>cases/Misc/TestRule.php</file>
</testsuite>
<testsuite name="User management">
<file>cases/User/TestInternal.php</file>