mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 13:12:41 +00:00
Partial implementation of filter rule handling
This commit is contained in:
parent
7e17332714
commit
ffc5579a7a
7 changed files with 90 additions and 11 deletions
|
@ -100,6 +100,7 @@ abstract class AbstractException extends \Exception {
|
||||||
"ImportExport/Exception.invalidFolderName" => 10613,
|
"ImportExport/Exception.invalidFolderName" => 10613,
|
||||||
"ImportExport/Exception.invalidFolderCopy" => 10614,
|
"ImportExport/Exception.invalidFolderCopy" => 10614,
|
||||||
"ImportExport/Exception.invalidTagName" => 10615,
|
"ImportExport/Exception.invalidTagName" => 10615,
|
||||||
|
"Rule/Exception.invalidPattern" => 10701,
|
||||||
];
|
];
|
||||||
|
|
||||||
public function __construct(string $msgID = "", $vars = null, \Throwable $e = null) {
|
public function __construct(string $msgID = "", $vars = null, \Throwable $e = null) {
|
||||||
|
|
35
lib/Feed.php
35
lib/Feed.php
|
@ -79,10 +79,14 @@ class Feed {
|
||||||
// we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged
|
// we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged
|
||||||
if (!sizeof($this->newItems) && !sizeof($this->changedItems)) {
|
if (!sizeof($this->newItems) && !sizeof($this->changedItems)) {
|
||||||
$this->modified = false;
|
$this->modified = false;
|
||||||
}
|
} else {
|
||||||
// if requested, scrape full content for any new and changed items
|
if ($feedID) {
|
||||||
if ($scrape) {
|
$this->computeFilterRules($feedID);
|
||||||
$this->scrape();
|
}
|
||||||
|
// if requested, scrape full content for any new and changed items
|
||||||
|
if ($scrape) {
|
||||||
|
$this->scrape();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// compute the time at which the feed should next be fetched
|
// compute the time at which the feed should next be fetched
|
||||||
|
@ -119,7 +123,7 @@ class Feed {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parse(): bool {
|
protected function parse(): void {
|
||||||
try {
|
try {
|
||||||
$feed = $this->resource->reader->getParser(
|
$feed = $this->resource->reader->getParser(
|
||||||
$this->resource->getUrl(),
|
$this->resource->getUrl(),
|
||||||
|
@ -222,7 +226,6 @@ class Feed {
|
||||||
sort($f->categories);
|
sort($f->categories);
|
||||||
}
|
}
|
||||||
$this->data = $feed;
|
$this->data = $feed;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function deduplicateItems(array $items): array {
|
protected function deduplicateItems(array $items): array {
|
||||||
|
@ -269,13 +272,13 @@ class Feed {
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function matchToDatabase(int $feedID = null): bool {
|
protected function matchToDatabase(int $feedID = null): void {
|
||||||
// first perform deduplication on items
|
// first perform deduplication on items
|
||||||
$items = $this->deduplicateItems($this->data->items);
|
$items = $this->deduplicateItems($this->data->items);
|
||||||
// if we haven't been given a database feed ID to check against, all items are new
|
// if we haven't been given a database feed ID to check against, all items are new
|
||||||
if (is_null($feedID)) {
|
if (is_null($feedID)) {
|
||||||
$this->newItems = $items;
|
$this->newItems = $items;
|
||||||
return true;
|
return;
|
||||||
}
|
}
|
||||||
// get as many of the latest articles in the database as there are in the feed
|
// get as many of the latest articles in the database as there are in the feed
|
||||||
$articles = Arsse::$db->feedMatchLatest($feedID, sizeof($items))->getAll();
|
$articles = Arsse::$db->feedMatchLatest($feedID, sizeof($items))->getAll();
|
||||||
|
@ -303,7 +306,6 @@ class Feed {
|
||||||
// merge the two change-lists, preserving keys
|
// merge the two change-lists, preserving keys
|
||||||
$this->changedItems = array_combine(array_merge(array_keys($this->changedItems), array_keys($changed)), array_merge($this->changedItems, $changed));
|
$this->changedItems = array_combine(array_merge(array_keys($this->changedItems), array_keys($changed)), array_merge($this->changedItems, $changed));
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function matchItems(array $items, array $articles): array {
|
protected function matchItems(array $items, array $articles): array {
|
||||||
|
@ -438,7 +440,7 @@ class Feed {
|
||||||
return $dates;
|
return $dates;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function scrape(): bool {
|
protected function scrape(): void {
|
||||||
$scraper = new Scraper(self::configure());
|
$scraper = new Scraper(self::configure());
|
||||||
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
|
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
|
||||||
$scraper->setUrl($item->url);
|
$scraper->setUrl($item->url);
|
||||||
|
@ -447,6 +449,17 @@ class Feed {
|
||||||
$item->content = $scraper->getFilteredContent();
|
$item->content = $scraper->getFilteredContent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
}
|
||||||
|
|
||||||
|
protected function computeFilterRules(int $feedID): void {
|
||||||
|
return;
|
||||||
|
$rules = Arsse::$db->feedRulesGet($feedID);
|
||||||
|
foreach ($rules as $r) {
|
||||||
|
$keep = "";
|
||||||
|
$block = "";
|
||||||
|
if (strlen($r['keep'])) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
10
lib/Rule/Exception.php
Normal file
10
lib/Rule/Exception.php
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<?php
|
||||||
|
/** @license MIT
|
||||||
|
* Copyright 2017 J. King, Dustin Wilson et al.
|
||||||
|
* See LICENSE and AUTHORS files for details */
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
namespace JKingWeb\Arsse\Rule;
|
||||||
|
|
||||||
|
class Exception extends \JKingWeb\Arsse\AbstractException {
|
||||||
|
}
|
31
lib/Rule/Rule.php
Normal file
31
lib/Rule/Rule.php
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
<?php
|
||||||
|
/** @license MIT
|
||||||
|
* Copyright 2017 J. King, Dustin Wilson et al.
|
||||||
|
* See LICENSE and AUTHORS files for details */
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
namespace JKingWeb\Arsse\Rule;
|
||||||
|
|
||||||
|
abstract class Rule {
|
||||||
|
public static function prep(string $pattern): string {
|
||||||
|
if (preg_match_all("<`>", $pattern, $m, \PREG_OFFSET_CAPTURE)) {
|
||||||
|
// where necessary escape our chosen delimiter (backtick) in reverse order
|
||||||
|
foreach (array_reverse($m[0]) as [,$pos]) {
|
||||||
|
// count the number of backslashes preceding the delimiter character
|
||||||
|
$count = 0;
|
||||||
|
$p = $pos;
|
||||||
|
while ($p-- && $pattern[$p] === "\\" && ++$count);
|
||||||
|
// if the number is even (including zero), add a backslash
|
||||||
|
if ($count % 2 === 0) {
|
||||||
|
$pattern = substr($pattern, 0, $pos)."\\".substr($pattern, $pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add the delimiters and test the pattern
|
||||||
|
$pattern = "`$pattern`u";
|
||||||
|
if (@preg_match($pattern, "") === false) {
|
||||||
|
throw new Exception("invalidPattern");
|
||||||
|
}
|
||||||
|
return $pattern;
|
||||||
|
}
|
||||||
|
}
|
|
@ -194,4 +194,5 @@ return [
|
||||||
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderName' => 'Input data contains an invalid folder name',
|
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderName' => 'Input data contains an invalid folder name',
|
||||||
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderCopy' => 'Input data contains multiple folders of the same name under the same parent',
|
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidFolderCopy' => 'Input data contains multiple folders of the same name under the same parent',
|
||||||
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidTagName' => 'Input data contains an invalid tag name',
|
'Exception.JKingWeb/Arsse/ImportExport/Exception.invalidTagName' => 'Input data contains an invalid tag name',
|
||||||
|
'Exception.JKingWeb/Arsse/Rule/Exception.invalidPattern' => 'Specified rule pattern is invalid'
|
||||||
];
|
];
|
||||||
|
|
22
tests/cases/Misc/TestRule.php
Normal file
22
tests/cases/Misc/TestRule.php
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
<?php
|
||||||
|
/** @license MIT
|
||||||
|
* Copyright 2017 J. King, Dustin Wilson et al.
|
||||||
|
* See LICENSE and AUTHORS files for details */
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
namespace JKingWeb\Arsse\TestCase\Misc;
|
||||||
|
|
||||||
|
use JKingWeb\Arsse\Rule\Rule;
|
||||||
|
|
||||||
|
/** @covers \JKingWeb\Arsse\Rule\Rule */
|
||||||
|
class TestRule extends \JKingWeb\Arsse\Test\AbstractTest {
|
||||||
|
public function testPrepareAPattern(): void {
|
||||||
|
$exp = "`\\`..\\`..\\`..\\\\\\`..`u";
|
||||||
|
$this->assertSame($exp, Rule::prep("`..`..\\`..\\\\`.."));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testPrepareAnInvalidPattern(): void {
|
||||||
|
$this->assertException("invalidPattern", "Rule");
|
||||||
|
Rule::prep("[");
|
||||||
|
}
|
||||||
|
}
|
|
@ -51,6 +51,7 @@
|
||||||
<file>cases/Misc/TestContext.php</file>
|
<file>cases/Misc/TestContext.php</file>
|
||||||
<file>cases/Misc/TestURL.php</file>
|
<file>cases/Misc/TestURL.php</file>
|
||||||
<file>cases/Misc/TestHTTP.php</file>
|
<file>cases/Misc/TestHTTP.php</file>
|
||||||
|
<file>cases/Misc/TestRule.php</file>
|
||||||
</testsuite>
|
</testsuite>
|
||||||
<testsuite name="User management">
|
<testsuite name="User management">
|
||||||
<file>cases/User/TestInternal.php</file>
|
<file>cases/User/TestInternal.php</file>
|
||||||
|
|
Loading…
Reference in a new issue