1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 21:22:40 +00:00

Implemented cleanup of orphaned feeds; fixes #25

This commit is contained in:
J. King 2017-08-02 18:27:04 -04:00
parent 0773e21034
commit 3b018c89d1
7 changed files with 121 additions and 49 deletions

View file

@ -74,9 +74,13 @@ class Conf {
public $fetchSizeLimit = 2 * 1024 * 1024; public $fetchSizeLimit = 2 * 1024 * 1024;
/** @var boolean Whether to allow the possibility of fetching full article contents using an item's URL. Whether fetching will actually happen is also governed by a per-feed setting */ /** @var boolean Whether to allow the possibility of fetching full article contents using an item's URL. Whether fetching will actually happen is also governed by a per-feed setting */
public $fetchEnableScraping = true; public $fetchEnableScraping = true;
/** @var string User-Agent string to use when fetching feeds from foreign servers */ /** @var string|null User-Agent string to use when fetching feeds from foreign servers */
public $fetchUserAgentString; public $fetchUserAgentString;
/** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainFeeds = "PT24H";
/** Creates a new configuration object /** Creates a new configuration object
* @param string $import_file Optional file to read configuration data from * @param string $import_file Optional file to read configuration data from
* @see self::importFile() */ * @see self::importFile() */

View file

@ -147,7 +147,7 @@ class Database {
if(!Arsse::$user->authorize("", __FUNCTION__)) { if(!Arsse::$user->authorize("", __FUNCTION__)) {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => "global"]); throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => "global"]);
} }
foreach($this->db->prepare("SELECT id from arsse_users")->run() as $user) { foreach($this->db->query("SELECT id from arsse_users") as $user) {
$out[] = $user['id']; $out[] = $user['id'];
} }
} }
@ -501,7 +501,7 @@ class Database {
} }
public function feedListStale(): array { public function feedListStale(): array {
$feeds = $this->db->prepare("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->run()->getAll(); $feeds = $this->db->query("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->getAll();
return array_column($feeds,'id'); return array_column($feeds,'id');
} }
@ -624,6 +624,24 @@ class Database {
return true; return true;
} }
public function feedCleanup(): bool {
$tr = $this->begin();
// first unmark any feeds which are no longer orphaned
$this->db->query("UPDATE arsse_feeds set orphaned = null where exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)");
// next mark any newly orphaned feeds with the current date and time
$this->db->query("UPDATE arsse_feeds set orphaned = CURRENT_TIMESTAMP where orphaned is null and not exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)");
// finally delete feeds that have been orphaned longer than the retention period
$limit = Date::normalize("now");
if(Arsse::$conf->retainFeeds) {
// if there is a retention period specified, compute it; otherwise feed are deleted immediatelty
$limit->sub(new \DateInterval(Arsse::$conf->retainFeeds));
}
$out = (bool) $this->db->prepare("DELETE from arsse_feeds where orphaned <= ?", "datetime")->run($limit);
// commit changes and return
$tr->commit();
return $out;
}
public function feedMatchLatest(int $feedID, int $count): Db\Result { public function feedMatchLatest(int $feedID, int $count): Db\Result {
return $this->db->prepare( return $this->db->prepare(
"SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY modified desc, id desc limit ?", "SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY modified desc, id desc limit ?",
@ -644,43 +662,6 @@ class Database {
)->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC); )->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);
} }
public function articleStarredCount(string $user, array $context = []): int {
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
return $this->db->prepare(
"WITH RECURSIVE
user(user) as (SELECT ?),
subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ".
"SELECT count(*) from arsse_marks
join user on user is owner
join arsse_articles on arsse_marks.article is arsse_articles.id
join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id
where starred is 1",
"str"
)->run($user)->getValue();
}
public function editionLatest(string $user, Context $context = null): int {
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
if(!$context) {
$context = new Context;
}
$q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id");
if($context->subscription()) {
// if a subscription is specified, make sure it exists
$id = $this->subscriptionValidateId($user, $context->subscription)['feed'];
// a simple WHERE clause is required here
$q->setWhere("arsse_feeds.id is ?", "int", $id);
} else {
$q->setCTE("user(user)", "SELECT ?", "str", $user);
$q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed");
}
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
}
public function articleList(string $user, Context $context = null): Db\Result { public function articleList(string $user, Context $context = null): Db\Result {
if(!Arsse::$user->authorize($user, __FUNCTION__)) { if(!Arsse::$user->authorize($user, __FUNCTION__)) {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
@ -897,6 +878,23 @@ class Database {
return (bool) $out; return (bool) $out;
} }
public function articleStarredCount(string $user, array $context = []): int {
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
return $this->db->prepare(
"WITH RECURSIVE
user(user) as (SELECT ?),
subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ".
"SELECT count(*) from arsse_marks
join user on user is owner
join arsse_articles on arsse_marks.article is arsse_articles.id
join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id
where starred is 1",
"str"
)->run($user)->getValue();
}
protected function articleValidateId(string $user, int $id): array { protected function articleValidateId(string $user, int $id): array {
$out = $this->db->prepare( $out = $this->db->prepare(
"SELECT "SELECT
@ -934,4 +932,24 @@ class Database {
} }
return $out; return $out;
} }
public function editionLatest(string $user, Context $context = null): int {
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
if(!$context) {
$context = new Context;
}
$q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id");
if($context->subscription()) {
// if a subscription is specified, make sure it exists
$id = $this->subscriptionValidateId($user, $context->subscription)['feed'];
// a simple WHERE clause is required here
$q->setWhere("arsse_feeds.id is ?", "int", $id);
} else {
$q->setCTE("user(user)", "SELECT ?", "str", $user);
$q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed");
}
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
}
} }

View file

@ -3,6 +3,7 @@ declare(strict_types=1);
namespace JKingWeb\Arsse\REST\NextCloudNews; namespace JKingWeb\Arsse\REST\NextCloudNews;
use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\User; use JKingWeb\Arsse\User;
use JKingWeb\Arsse\Service;
use JKingWeb\Arsse\Misc\Context; use JKingWeb\Arsse\Misc\Context;
use JKingWeb\Arsse\AbstractException; use JKingWeb\Arsse\AbstractException;
use JKingWeb\Arsse\Db\ExceptionInput; use JKingWeb\Arsse\Db\ExceptionInput;
@ -658,7 +659,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) { if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) {
return new Response(403); return new Response(403);
} }
// FIXME: stub Service::cleanupPre();
return new Response(204); return new Response(204);
} }
@ -684,7 +685,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
'version' => self::VERSION, 'version' => self::VERSION,
'arsse_version' => \JKingWeb\Arsse\VERSION, 'arsse_version' => \JKingWeb\Arsse\VERSION,
'warnings' => [ 'warnings' => [
'improperlyConfiguredCron' => !\JKingWeb\Arsse\Service::hasCheckedIn(), 'improperlyConfiguredCron' => !Service::hasCheckedIn(),
] ]
]); ]);
} }

View file

@ -82,8 +82,8 @@ class Service {
} }
static function cleanupPre(): bool { static function cleanupPre(): bool {
// TODO: stub // mark unsubscribed feeds as orphaned and delete orphaned feeds that are beyond their retention period
return true; return Arsse::$db->feedCleanup();
} }
static function cleanupPost():bool { static function cleanupPost():bool {

View file

@ -43,6 +43,7 @@ create table arsse_feeds(
updated text, -- time at which the feed was last fetched updated text, -- time at which the feed was last fetched
modified text, -- time at which the feed last actually changed modified text, -- time at which the feed last actually changed
next_fetch text, -- time at which the feed should next be fetched next_fetch text, -- time at which the feed should next be fetched
orphaned text, -- time at which the feed last had no subscriptions
etag text not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes etag text not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes
err_count integer not null default 0, -- count of successive times update resulted in error since last successful update err_count integer not null default 0, -- count of successive times update resulted in error since last successful update
err_msg text, -- last error message err_msg text, -- last error message

View file

@ -799,4 +799,15 @@ class TestNCNV1_2 extends Test\AbstractTest {
$exp = new Response(200, $arr1); $exp = new Response(200, $arr1);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/status"))); $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/status")));
} }
function testCleanUpBeforeUpdate() {
Phake::when(Arsse::$db)->feedCleanup()->thenReturn(true);
$exp = new Response(204);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
Phake::verify(Arsse::$db)->feedCleanup();
// performing a cleanup when not an admin fails
Phake::when(Arsse::$user)->rightsGet->thenReturn(0);
$exp = new Response(403);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
}
} }

View file

@ -33,6 +33,8 @@ trait SeriesFeed {
$past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute")); $past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
$future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute")); $future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute"));
$now = gmdate("Y-m-d H:i:s",strtotime("now")); $now = gmdate("Y-m-d H:i:s",strtotime("now"));
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
$longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
$this->data = [ $this->data = [
'arsse_users' => [ 'arsse_users' => [
'columns' => [ 'columns' => [
@ -55,13 +57,36 @@ trait SeriesFeed {
'err_msg' => "str", 'err_msg' => "str",
'modified' => "datetime", 'modified' => "datetime",
'next_fetch' => "datetime", 'next_fetch' => "datetime",
'orphaned' => "datetime",
], ],
'rows' => [ 'rows' => [
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past], // feeds for update testing
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future], [1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null],
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now], [2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null],
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past], [3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null],
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future], [4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null],
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null],
// feeds for cleanup testing
[6,"http://example.com/1","",0,"",$now,$future,$longago],
[7,"http://example.com/2","",0,"",$now,$future,$yesterday],
[8,"http://example.com/3","",0,"",$now,$future,null],
[9,"http://example.com/4","",0,"",$now,$future,$past],
]
],
'arsse_subscriptions' => [
'columns' => [
'owner' => "str",
'feed' => "int",
],
'rows' => [
// the first five feeds need at least one subscription so they are not involved in the cleanup test
['john.doe@example.com',1],
['john.doe@example.com',2],
['john.doe@example.com',3],
['john.doe@example.com',4],
['john.doe@example.com',5],
// one feed previously marked for deletion has a subscription again, and so should not be deleted
['jane.doe@example.com',6],
] ]
], ],
'arsse_articles' => [ 'arsse_articles' => [
@ -235,4 +260,16 @@ trait SeriesFeed {
Arsse::$db->feedUpdate(4); Arsse::$db->feedUpdate(4);
$this->assertSame([1], Arsse::$db->feedListStale()); $this->assertSame([1], Arsse::$db->feedListStale());
} }
function testHandleOrphanedFeeds() {
Arsse::$db->feedCleanup();
$now = gmdate("Y-m-d H:i:s");
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ["id","orphaned"]
]);
$state['arsse_feeds']['rows'][5][1] = null;
unset($state['arsse_feeds']['rows'][6]);
$state['arsse_feeds']['rows'][7][1] = $now;
$this->compareExpectations($state);
}
} }