mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2025-01-08 17:02:41 +00:00
Implemented cleanup of orphaned feeds; fixes #25
This commit is contained in:
parent
0773e21034
commit
3b018c89d1
7 changed files with 121 additions and 49 deletions
|
@ -74,9 +74,13 @@ class Conf {
|
||||||
public $fetchSizeLimit = 2 * 1024 * 1024;
|
public $fetchSizeLimit = 2 * 1024 * 1024;
|
||||||
/** @var boolean Whether to allow the possibility of fetching full article contents using an item's URL. Whether fetching will actually happen is also governed by a per-feed setting */
|
/** @var boolean Whether to allow the possibility of fetching full article contents using an item's URL. Whether fetching will actually happen is also governed by a per-feed setting */
|
||||||
public $fetchEnableScraping = true;
|
public $fetchEnableScraping = true;
|
||||||
/** @var string User-Agent string to use when fetching feeds from foreign servers */
|
/** @var string|null User-Agent string to use when fetching feeds from foreign servers */
|
||||||
public $fetchUserAgentString;
|
public $fetchUserAgentString;
|
||||||
|
|
||||||
|
/** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours)
|
||||||
|
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
|
||||||
|
public $retainFeeds = "PT24H";
|
||||||
|
|
||||||
/** Creates a new configuration object
|
/** Creates a new configuration object
|
||||||
* @param string $import_file Optional file to read configuration data from
|
* @param string $import_file Optional file to read configuration data from
|
||||||
* @see self::importFile() */
|
* @see self::importFile() */
|
||||||
|
|
|
@ -147,7 +147,7 @@ class Database {
|
||||||
if(!Arsse::$user->authorize("", __FUNCTION__)) {
|
if(!Arsse::$user->authorize("", __FUNCTION__)) {
|
||||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => "global"]);
|
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => "global"]);
|
||||||
}
|
}
|
||||||
foreach($this->db->prepare("SELECT id from arsse_users")->run() as $user) {
|
foreach($this->db->query("SELECT id from arsse_users") as $user) {
|
||||||
$out[] = $user['id'];
|
$out[] = $user['id'];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -501,7 +501,7 @@ class Database {
|
||||||
}
|
}
|
||||||
|
|
||||||
public function feedListStale(): array {
|
public function feedListStale(): array {
|
||||||
$feeds = $this->db->prepare("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->run()->getAll();
|
$feeds = $this->db->query("SELECT id from arsse_feeds where next_fetch <= CURRENT_TIMESTAMP")->getAll();
|
||||||
return array_column($feeds,'id');
|
return array_column($feeds,'id');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -624,6 +624,24 @@ class Database {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function feedCleanup(): bool {
|
||||||
|
$tr = $this->begin();
|
||||||
|
// first unmark any feeds which are no longer orphaned
|
||||||
|
$this->db->query("UPDATE arsse_feeds set orphaned = null where exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)");
|
||||||
|
// next mark any newly orphaned feeds with the current date and time
|
||||||
|
$this->db->query("UPDATE arsse_feeds set orphaned = CURRENT_TIMESTAMP where orphaned is null and not exists(SELECT id from arsse_subscriptions where feed is arsse_feeds.id)");
|
||||||
|
// finally delete feeds that have been orphaned longer than the retention period
|
||||||
|
$limit = Date::normalize("now");
|
||||||
|
if(Arsse::$conf->retainFeeds) {
|
||||||
|
// if there is a retention period specified, compute it; otherwise feed are deleted immediatelty
|
||||||
|
$limit->sub(new \DateInterval(Arsse::$conf->retainFeeds));
|
||||||
|
}
|
||||||
|
$out = (bool) $this->db->prepare("DELETE from arsse_feeds where orphaned <= ?", "datetime")->run($limit);
|
||||||
|
// commit changes and return
|
||||||
|
$tr->commit();
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
public function feedMatchLatest(int $feedID, int $count): Db\Result {
|
public function feedMatchLatest(int $feedID, int $count): Db\Result {
|
||||||
return $this->db->prepare(
|
return $this->db->prepare(
|
||||||
"SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY modified desc, id desc limit ?",
|
"SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY modified desc, id desc limit ?",
|
||||||
|
@ -644,43 +662,6 @@ class Database {
|
||||||
)->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);
|
)->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function articleStarredCount(string $user, array $context = []): int {
|
|
||||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
|
||||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
|
||||||
}
|
|
||||||
return $this->db->prepare(
|
|
||||||
"WITH RECURSIVE
|
|
||||||
user(user) as (SELECT ?),
|
|
||||||
subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ".
|
|
||||||
"SELECT count(*) from arsse_marks
|
|
||||||
join user on user is owner
|
|
||||||
join arsse_articles on arsse_marks.article is arsse_articles.id
|
|
||||||
join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id
|
|
||||||
where starred is 1",
|
|
||||||
"str"
|
|
||||||
)->run($user)->getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function editionLatest(string $user, Context $context = null): int {
|
|
||||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
|
||||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
|
||||||
}
|
|
||||||
if(!$context) {
|
|
||||||
$context = new Context;
|
|
||||||
}
|
|
||||||
$q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id");
|
|
||||||
if($context->subscription()) {
|
|
||||||
// if a subscription is specified, make sure it exists
|
|
||||||
$id = $this->subscriptionValidateId($user, $context->subscription)['feed'];
|
|
||||||
// a simple WHERE clause is required here
|
|
||||||
$q->setWhere("arsse_feeds.id is ?", "int", $id);
|
|
||||||
} else {
|
|
||||||
$q->setCTE("user(user)", "SELECT ?", "str", $user);
|
|
||||||
$q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed");
|
|
||||||
}
|
|
||||||
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function articleList(string $user, Context $context = null): Db\Result {
|
public function articleList(string $user, Context $context = null): Db\Result {
|
||||||
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||||
|
@ -897,6 +878,23 @@ class Database {
|
||||||
return (bool) $out;
|
return (bool) $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function articleStarredCount(string $user, array $context = []): int {
|
||||||
|
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||||
|
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||||
|
}
|
||||||
|
return $this->db->prepare(
|
||||||
|
"WITH RECURSIVE
|
||||||
|
user(user) as (SELECT ?),
|
||||||
|
subscribed_feeds(id,sub) as (SELECT feed,id from arsse_subscriptions join user on user is owner) ".
|
||||||
|
"SELECT count(*) from arsse_marks
|
||||||
|
join user on user is owner
|
||||||
|
join arsse_articles on arsse_marks.article is arsse_articles.id
|
||||||
|
join subscribed_feeds on arsse_articles.feed is subscribed_feeds.id
|
||||||
|
where starred is 1",
|
||||||
|
"str"
|
||||||
|
)->run($user)->getValue();
|
||||||
|
}
|
||||||
|
|
||||||
protected function articleValidateId(string $user, int $id): array {
|
protected function articleValidateId(string $user, int $id): array {
|
||||||
$out = $this->db->prepare(
|
$out = $this->db->prepare(
|
||||||
"SELECT
|
"SELECT
|
||||||
|
@ -934,4 +932,24 @@ class Database {
|
||||||
}
|
}
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function editionLatest(string $user, Context $context = null): int {
|
||||||
|
if(!Arsse::$user->authorize($user, __FUNCTION__)) {
|
||||||
|
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||||
|
}
|
||||||
|
if(!$context) {
|
||||||
|
$context = new Context;
|
||||||
|
}
|
||||||
|
$q = new Query("SELECT max(arsse_editions.id) from arsse_editions left join arsse_articles on article is arsse_articles.id left join arsse_feeds on arsse_articles.feed is arsse_feeds.id");
|
||||||
|
if($context->subscription()) {
|
||||||
|
// if a subscription is specified, make sure it exists
|
||||||
|
$id = $this->subscriptionValidateId($user, $context->subscription)['feed'];
|
||||||
|
// a simple WHERE clause is required here
|
||||||
|
$q->setWhere("arsse_feeds.id is ?", "int", $id);
|
||||||
|
} else {
|
||||||
|
$q->setCTE("user(user)", "SELECT ?", "str", $user);
|
||||||
|
$q->setCTE("feeds(feed)", "SELECT feed from arsse_subscriptions join user on user is owner", [], [], "join feeds on arsse_articles.feed is feeds.feed");
|
||||||
|
}
|
||||||
|
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -3,6 +3,7 @@ declare(strict_types=1);
|
||||||
namespace JKingWeb\Arsse\REST\NextCloudNews;
|
namespace JKingWeb\Arsse\REST\NextCloudNews;
|
||||||
use JKingWeb\Arsse\Arsse;
|
use JKingWeb\Arsse\Arsse;
|
||||||
use JKingWeb\Arsse\User;
|
use JKingWeb\Arsse\User;
|
||||||
|
use JKingWeb\Arsse\Service;
|
||||||
use JKingWeb\Arsse\Misc\Context;
|
use JKingWeb\Arsse\Misc\Context;
|
||||||
use JKingWeb\Arsse\AbstractException;
|
use JKingWeb\Arsse\AbstractException;
|
||||||
use JKingWeb\Arsse\Db\ExceptionInput;
|
use JKingWeb\Arsse\Db\ExceptionInput;
|
||||||
|
@ -658,7 +659,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
|
||||||
if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) {
|
if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) {
|
||||||
return new Response(403);
|
return new Response(403);
|
||||||
}
|
}
|
||||||
// FIXME: stub
|
Service::cleanupPre();
|
||||||
return new Response(204);
|
return new Response(204);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -684,7 +685,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
|
||||||
'version' => self::VERSION,
|
'version' => self::VERSION,
|
||||||
'arsse_version' => \JKingWeb\Arsse\VERSION,
|
'arsse_version' => \JKingWeb\Arsse\VERSION,
|
||||||
'warnings' => [
|
'warnings' => [
|
||||||
'improperlyConfiguredCron' => !\JKingWeb\Arsse\Service::hasCheckedIn(),
|
'improperlyConfiguredCron' => !Service::hasCheckedIn(),
|
||||||
]
|
]
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,8 +82,8 @@ class Service {
|
||||||
}
|
}
|
||||||
|
|
||||||
static function cleanupPre(): bool {
|
static function cleanupPre(): bool {
|
||||||
// TODO: stub
|
// mark unsubscribed feeds as orphaned and delete orphaned feeds that are beyond their retention period
|
||||||
return true;
|
return Arsse::$db->feedCleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
static function cleanupPost():bool {
|
static function cleanupPost():bool {
|
||||||
|
|
|
@ -43,6 +43,7 @@ create table arsse_feeds(
|
||||||
updated text, -- time at which the feed was last fetched
|
updated text, -- time at which the feed was last fetched
|
||||||
modified text, -- time at which the feed last actually changed
|
modified text, -- time at which the feed last actually changed
|
||||||
next_fetch text, -- time at which the feed should next be fetched
|
next_fetch text, -- time at which the feed should next be fetched
|
||||||
|
orphaned text, -- time at which the feed last had no subscriptions
|
||||||
etag text not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes
|
etag text not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes
|
||||||
err_count integer not null default 0, -- count of successive times update resulted in error since last successful update
|
err_count integer not null default 0, -- count of successive times update resulted in error since last successful update
|
||||||
err_msg text, -- last error message
|
err_msg text, -- last error message
|
||||||
|
|
|
@ -799,4 +799,15 @@ class TestNCNV1_2 extends Test\AbstractTest {
|
||||||
$exp = new Response(200, $arr1);
|
$exp = new Response(200, $arr1);
|
||||||
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/status")));
|
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/status")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testCleanUpBeforeUpdate() {
|
||||||
|
Phake::when(Arsse::$db)->feedCleanup()->thenReturn(true);
|
||||||
|
$exp = new Response(204);
|
||||||
|
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
|
||||||
|
Phake::verify(Arsse::$db)->feedCleanup();
|
||||||
|
// performing a cleanup when not an admin fails
|
||||||
|
Phake::when(Arsse::$user)->rightsGet->thenReturn(0);
|
||||||
|
$exp = new Response(403);
|
||||||
|
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -33,6 +33,8 @@ trait SeriesFeed {
|
||||||
$past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
|
$past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
|
||||||
$future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute"));
|
$future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute"));
|
||||||
$now = gmdate("Y-m-d H:i:s",strtotime("now"));
|
$now = gmdate("Y-m-d H:i:s",strtotime("now"));
|
||||||
|
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
|
||||||
|
$longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
|
||||||
$this->data = [
|
$this->data = [
|
||||||
'arsse_users' => [
|
'arsse_users' => [
|
||||||
'columns' => [
|
'columns' => [
|
||||||
|
@ -55,13 +57,36 @@ trait SeriesFeed {
|
||||||
'err_msg' => "str",
|
'err_msg' => "str",
|
||||||
'modified' => "datetime",
|
'modified' => "datetime",
|
||||||
'next_fetch' => "datetime",
|
'next_fetch' => "datetime",
|
||||||
|
'orphaned' => "datetime",
|
||||||
],
|
],
|
||||||
'rows' => [
|
'rows' => [
|
||||||
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past],
|
// feeds for update testing
|
||||||
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future],
|
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null],
|
||||||
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now],
|
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null],
|
||||||
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past],
|
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null],
|
||||||
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future],
|
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null],
|
||||||
|
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null],
|
||||||
|
// feeds for cleanup testing
|
||||||
|
[6,"http://example.com/1","",0,"",$now,$future,$longago],
|
||||||
|
[7,"http://example.com/2","",0,"",$now,$future,$yesterday],
|
||||||
|
[8,"http://example.com/3","",0,"",$now,$future,null],
|
||||||
|
[9,"http://example.com/4","",0,"",$now,$future,$past],
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'arsse_subscriptions' => [
|
||||||
|
'columns' => [
|
||||||
|
'owner' => "str",
|
||||||
|
'feed' => "int",
|
||||||
|
],
|
||||||
|
'rows' => [
|
||||||
|
// the first five feeds need at least one subscription so they are not involved in the cleanup test
|
||||||
|
['john.doe@example.com',1],
|
||||||
|
['john.doe@example.com',2],
|
||||||
|
['john.doe@example.com',3],
|
||||||
|
['john.doe@example.com',4],
|
||||||
|
['john.doe@example.com',5],
|
||||||
|
// one feed previously marked for deletion has a subscription again, and so should not be deleted
|
||||||
|
['jane.doe@example.com',6],
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
'arsse_articles' => [
|
'arsse_articles' => [
|
||||||
|
@ -235,4 +260,16 @@ trait SeriesFeed {
|
||||||
Arsse::$db->feedUpdate(4);
|
Arsse::$db->feedUpdate(4);
|
||||||
$this->assertSame([1], Arsse::$db->feedListStale());
|
$this->assertSame([1], Arsse::$db->feedListStale());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testHandleOrphanedFeeds() {
|
||||||
|
Arsse::$db->feedCleanup();
|
||||||
|
$now = gmdate("Y-m-d H:i:s");
|
||||||
|
$state = $this->primeExpectations($this->data, [
|
||||||
|
'arsse_feeds' => ["id","orphaned"]
|
||||||
|
]);
|
||||||
|
$state['arsse_feeds']['rows'][5][1] = null;
|
||||||
|
unset($state['arsse_feeds']['rows'][6]);
|
||||||
|
$state['arsse_feeds']['rows'][7][1] = $now;
|
||||||
|
$this->compareExpectations($state);
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in a new issue