From 52104fb6476336017976c32773a6ee0e2cda3589 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Thu, 17 Aug 2017 22:36:15 -0400 Subject: [PATCH] Implement article cleanup; fixes #28 --- lib/Conf.php | 8 +- lib/Database.php | 38 ++++ lib/Misc/Date.php | 18 ++ lib/REST/NextCloudNews/V1_2.php | 2 +- lib/Service.php | 8 +- .../Database/TestDatabaseCleanupSQLite3.php | 10 ++ tests/REST/NextCloudNews/TestNCNV1_2.php | 11 ++ tests/lib/Database/SeriesArticle.php | 13 +- tests/lib/Database/SeriesCleanup.php | 168 ++++++++++++++++++ tests/lib/Database/SeriesFeed.php | 42 +---- tests/lib/Database/SeriesFolder.php | 6 +- tests/lib/Database/SeriesSubscription.php | 6 +- tests/phpunit.xml | 1 + 13 files changed, 273 insertions(+), 58 deletions(-) create mode 100644 tests/Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php create mode 100644 tests/lib/Database/SeriesCleanup.php diff --git a/lib/Conf.php b/lib/Conf.php index caf195af..0bbf9bcc 100644 --- a/lib/Conf.php +++ b/lib/Conf.php @@ -77,9 +77,15 @@ class Conf { /** @var string|null User-Agent string to use when fetching feeds from foreign servers */ public $fetchUserAgentString; - /** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours) + /** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours; empty string for forever) * @see https://en.wikipedia.org/wiki/ISO_8601#Durations */ public $retainFeeds = "PT24H"; + /** @var string Amount of time to keep an unstarred article in the database after it has been marked read by all users, as an ISO 8601 duration (default: 7 days; empty string for forever) + * @see https://en.wikipedia.org/wiki/ISO_8601#Durations */ + public $retainArticlesRead = "P7D"; + /** @var string Amount of time to keep an unstarred article in the database regardless of its read state, as an ISO 8601 duration (default: 21 days; empty string for forever) + * @see https://en.wikipedia.org/wiki/ISO_8601#Durations */ + public $retainArticlesUnread = "P21D"; /** Creates a new configuration object * @param string $import_file Optional file to read configuration data from diff --git a/lib/Database.php b/lib/Database.php index dc45b56a..71782e80 100644 --- a/lib/Database.php +++ b/lib/Database.php @@ -888,6 +888,44 @@ class Database { return $this->db->prepare("SELECT count(*) from arsse_marks where starred is 1 and subscription in (select id from arsse_subscriptions where owner is ?)", "str")->run($user)->getValue(); } + public function articleCleanup(): bool { + $query = $this->db->prepare( + "WITH target_feed(id,subs) as (". + "SELECT + id, (select count(*) from arsse_subscriptions where feed is arsse_feeds.id) as subs + from arsse_feeds where id is ?". + "), excepted_articles(id,edition) as (". + "SELECT + arsse_articles.id, (select max(id) from arsse_editions where article is arsse_articles.id) as edition + from arsse_articles + join target_feed on arsse_articles.feed is target_feed.id + order by edition desc limit ?". + ") ". + "DELETE from arsse_articles where + feed is (select max(id) from target_feed) + and id not in (select id from excepted_articles) + and (select count(*) from arsse_marks where article is arsse_articles.id and starred is 1) is 0 + and ( + coalesce((select max(modified) from arsse_marks where article is arsse_articles.id),modified) <= ? + or ((select max(subs) from target_feed) is (select count(*) from arsse_marks where article is arsse_articles.id and read is 1) and coalesce((select max(modified) from arsse_marks where article is arsse_articles.id),modified) <= ?) + ) + ", "int", "int", "datetime", "datetime" + ); + $limitRead = null; + $limitUnread = null; + if(Arsse::$conf->retainArticlesRead) { + $limitRead = Date::sub(Arsse::$conf->retainArticlesRead); + } + if(Arsse::$conf->retainArticlesUnread) { + $limitUnread = Date::sub(Arsse::$conf->retainArticlesUnread); + } + $feeds = $this->db->query("SELECT id, size from arsse_feeds")->getAll(); + foreach($feeds as $feed) { + $query->run($feed['id'], $feed['size'], $limitUnread, $limitRead); + } + return true; + } + protected function articleValidateId(string $user, int $id): array { $out = $this->db->prepare( "SELECT diff --git a/lib/Misc/Date.php b/lib/Misc/Date.php index 1bb20443..eaf70b5b 100644 --- a/lib/Misc/Date.php +++ b/lib/Misc/Date.php @@ -60,4 +60,22 @@ class Date { $d->setTimestamp($time); return $d; } + + static function add(string $interval, $date = null): \DateTimeInterface { + return self::modify("add", $interval, $date); + } + + static function sub(string $interval, $date = null): \DateTimeInterface { + return self::modify("sub", $interval, $date); + } + + static protected function modify(string $func, string $interval, $date = null): \DateTimeInterface { + $date = self::normalize($date ?? time()); + if($date instanceof \DateTimeImmutable) { + return $date->$func(new \DateInterval($interval)); + } else { + $date->$func(new \DateInterval($interval)); + return $date; + } + } } \ No newline at end of file diff --git a/lib/REST/NextCloudNews/V1_2.php b/lib/REST/NextCloudNews/V1_2.php index 9f1c72be..a9fa7456 100644 --- a/lib/REST/NextCloudNews/V1_2.php +++ b/lib/REST/NextCloudNews/V1_2.php @@ -668,7 +668,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler { if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) { return new Response(403); } - // FIXME: stub + Service::cleanupPost(); return new Response(204); } diff --git a/lib/Service.php b/lib/Service.php index e4876578..83f7f3ce 100644 --- a/lib/Service.php +++ b/lib/Service.php @@ -46,9 +46,9 @@ class Service { $this->drv->queue(...$list); $this->drv->exec(); $this->drv->clean(); - static::cleanupPost(); unset($list); } + static::cleanupPost(); $t->add($this->interval); if($loop) { do { @@ -86,8 +86,8 @@ class Service { return Arsse::$db->feedCleanup(); } - static function cleanupPost():bool { - // TODO: stub - return true; + static function cleanupPost(): bool { + // delete old articles, according to configured threasholds + return Arsse::$db->articleCleanup(); } } \ No newline at end of file diff --git a/tests/Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php b/tests/Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php new file mode 100644 index 00000000..b548c48d --- /dev/null +++ b/tests/Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php @@ -0,0 +1,10 @@ + */ +class TestDatabaseCleanupSQLite3 extends Test\AbstractTest { + use Test\Database\Setup; + use Test\Database\DriverSQLite3; + use Test\Database\SeriesCleanup; +} \ No newline at end of file diff --git a/tests/REST/NextCloudNews/TestNCNV1_2.php b/tests/REST/NextCloudNews/TestNCNV1_2.php index bf9edb61..6ff2f3bd 100644 --- a/tests/REST/NextCloudNews/TestNCNV1_2.php +++ b/tests/REST/NextCloudNews/TestNCNV1_2.php @@ -810,4 +810,15 @@ class TestNCNV1_2 extends Test\AbstractTest { $exp = new Response(403); $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update"))); } + + function testCleanUpAfterUpdate() { + Phake::when(Arsse::$db)->articleCleanup()->thenReturn(true); + $exp = new Response(204); + $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/after-update"))); + Phake::verify(Arsse::$db)->articleCleanup(); + // performing a cleanup when not an admin fails + Phake::when(Arsse::$user)->rightsGet->thenReturn(0); + $exp = new Response(403); + $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/after-update"))); + } } \ No newline at end of file diff --git a/tests/lib/Database/SeriesArticle.php b/tests/lib/Database/SeriesArticle.php index 41486371..7a566ded 100644 --- a/tests/lib/Database/SeriesArticle.php +++ b/tests/lib/Database/SeriesArticle.php @@ -2,10 +2,6 @@ declare(strict_types=1); namespace JKingWeb\Arsse\Test\Database; use JKingWeb\Arsse\Arsse; -use JKingWeb\Arsse\Feed; -use JKingWeb\Arsse\Test\Database; -use JKingWeb\Arsse\User\Driver as UserDriver; -use JKingWeb\Arsse\Feed\Exception as FeedException; use JKingWeb\Arsse\Misc\Context; use JKingWeb\Arsse\Misc\Date; use Phake; @@ -17,13 +13,12 @@ trait SeriesArticle { 'id' => 'str', 'password' => 'str', 'name' => 'str', - 'rights' => 'int', ], 'rows' => [ - ["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], - ["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], - ["john.doe@example.org", "", "John Doe", UserDriver::RIGHTS_NONE], - ["john.doe@example.net", "", "John Doe", UserDriver::RIGHTS_NONE], + ["jane.doe@example.com", "", "Jane Doe"], + ["john.doe@example.com", "", "John Doe"], + ["john.doe@example.org", "", "John Doe"], + ["john.doe@example.net", "", "John Doe"], ], ], 'arsse_folders' => [ diff --git a/tests/lib/Database/SeriesCleanup.php b/tests/lib/Database/SeriesCleanup.php new file mode 100644 index 00000000..9db8b3ef --- /dev/null +++ b/tests/lib/Database/SeriesCleanup.php @@ -0,0 +1,168 @@ +data = [ + 'arsse_users' => [ + 'columns' => [ + 'id' => 'str', + 'password' => 'str', + 'name' => 'str', + ], + 'rows' => [ + ["jane.doe@example.com", "", "Jane Doe"], + ["john.doe@example.com", "", "John Doe"], + ], + ], + 'arsse_feeds' => [ + 'columns' => [ + 'id' => "int", + 'url' => "str", + 'title' => "str", + 'orphaned' => "datetime", + 'size' => "int", + ], + 'rows' => [ + [1,"http://example.com/1","",$daybefore,2], //latest two articles should be kept + [2,"http://example.com/2","",$yesterday,0], + [3,"http://example.com/3","",null,0], + [4,"http://example.com/4","",$nowish,0], + ] + ], + 'arsse_subscriptions' => [ + 'columns' => [ + 'id' => "int", + 'owner' => "str", + 'feed' => "int", + ], + 'rows' => [ + // one feed previously marked for deletion has a subscription again, and so should not be deleted + [1,'jane.doe@example.com',1], + // other subscriptions exist for article cleanup tests + [2,'john.doe@example.com',1], + ] + ], + 'arsse_articles' => [ + 'columns' => [ + 'id' => "int", + 'feed' => "int", + 'url_title_hash' => "str", + 'url_content_hash' => "str", + 'title_content_hash' => "str", + 'modified' => "datetime", + ], + 'rows' => [ + [1,1,"","","",$weeksago], // is the latest article, thus is kept + [2,1,"","","",$weeksago], // is the second latest article, thus is kept + [3,1,"","","",$weeksago], // is starred by one user, thus is kept + [4,1,"","","",$weeksago], // does not meet the unread threshold due to a recent mark, thus is kept + [5,1,"","","",$daysago], // does not meet the unread threshold due to age, thus is kept + [6,1,"","","",$weeksago], // does not meet the read threshold due to a recent mark, thus is kept + [7,1,"","","",$weeksago], // meets the unread threshold without marks, thus is deleted + [8,1,"","","",$weeksago], // meets the unread threshold even with marks, thus is deleted + [9,1,"","","",$weeksago], // meets the read threshold, thus is deleted + ] + ], + 'arsse_editions' => [ + 'columns' => [ + 'id' => "int", + 'article' => "int", + ], + 'rows' => [ + [1,1], + [2,2], + [3,3], + [4,4], + [201,1], + [102,2], + ] + ], + 'arsse_marks' => [ + 'columns' => [ + 'article' => "int", + 'subscription' => "int", + 'read' => "bool", + 'starred' => "bool", + 'modified' => "datetime", + ], + 'rows' => [ + [3,1,0,1,$weeksago], + [4,1,1,0,$daysago], + [6,1,1,0,$nowish], + [6,2,1,0,$weeksago], + [8,1,1,0,$weeksago], + [9,1,1,0,$daysago], + [9,2,1,0,$daysago], + ] + ], + ]; + } + + function testCleanUpOrphanedFeeds() { + Arsse::$db->feedCleanup(); + $now = gmdate("Y-m-d H:i:s"); + $state = $this->primeExpectations($this->data, [ + 'arsse_feeds' => ["id","orphaned"] + ]); + $state['arsse_feeds']['rows'][0][1] = null; + unset($state['arsse_feeds']['rows'][1]); + $state['arsse_feeds']['rows'][2][1] = $now; + $this->compareExpectations($state); + } + + function testCleanUpOldArticlesWithStandardRetention() { + Arsse::$db->articleCleanup(); + $state = $this->primeExpectations($this->data, [ + 'arsse_articles' => ["id"] + ]); + foreach([7,8,9] as $id) { + unset($state['arsse_articles']['rows'][$id - 1]); + } + $this->compareExpectations($state); + } + + function testCleanUpOldArticlesWithUnlimitedReadRetention() { + Arsse::$conf->retainArticlesRead = ""; + Arsse::$db->articleCleanup(); + $state = $this->primeExpectations($this->data, [ + 'arsse_articles' => ["id"] + ]); + foreach([7,8] as $id) { + unset($state['arsse_articles']['rows'][$id - 1]); + } + $this->compareExpectations($state); + } + + function testCleanUpOldArticlesWithUnlimitedUnreadRetention() { + Arsse::$conf->retainArticlesUnread = ""; + Arsse::$db->articleCleanup(); + $state = $this->primeExpectations($this->data, [ + 'arsse_articles' => ["id"] + ]); + foreach([9] as $id) { + unset($state['arsse_articles']['rows'][$id - 1]); + } + $this->compareExpectations($state); + } + + function testCleanUpOldArticlesWithUnlimitedRetention() { + Arsse::$conf->retainArticlesRead = ""; + Arsse::$conf->retainArticlesUnread = ""; + Arsse::$db->articleCleanup(); + $state = $this->primeExpectations($this->data, [ + 'arsse_articles' => ["id"] + ]); + $this->compareExpectations($state); + } +} \ No newline at end of file diff --git a/tests/lib/Database/SeriesFeed.php b/tests/lib/Database/SeriesFeed.php index 40b1a4b3..532e00e6 100644 --- a/tests/lib/Database/SeriesFeed.php +++ b/tests/lib/Database/SeriesFeed.php @@ -3,8 +3,6 @@ declare(strict_types=1); namespace JKingWeb\Arsse\Test\Database; use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Feed; -use JKingWeb\Arsse\Test\Database; -use JKingWeb\Arsse\User\Driver as UserDriver; use JKingWeb\Arsse\Feed\Exception as FeedException; use Phake; @@ -33,19 +31,16 @@ trait SeriesFeed { $past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute")); $future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute")); $now = gmdate("Y-m-d H:i:s",strtotime("now")); - $yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day")); - $longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days")); $this->data = [ 'arsse_users' => [ 'columns' => [ 'id' => 'str', 'password' => 'str', 'name' => 'str', - 'rights' => 'int', ], 'rows' => [ - ["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], - ["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], + ["jane.doe@example.com", "", "Jane Doe"], + ["john.doe@example.com", "", "John Doe"], ], ], 'arsse_feeds' => [ @@ -57,21 +52,14 @@ trait SeriesFeed { 'err_msg' => "str", 'modified' => "datetime", 'next_fetch' => "datetime", - 'orphaned' => "datetime", 'size' => "int", ], 'rows' => [ - // feeds for update testing - [1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null,0], - [2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null,0], - [3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null,0], - [4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null,0], - [5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null,0], - // feeds for cleanup testing - [6,"http://example.com/1","",0,"",$now,$future,$longago,0], - [7,"http://example.com/2","",0,"",$now,$future,$yesterday,0], - [8,"http://example.com/3","",0,"",$now,$future,null,0], - [9,"http://example.com/4","",0,"",$now,$future,$past,0], + [1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,0], + [2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,0], + [3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,0], + [4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,0], + [5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,0], ] ], 'arsse_subscriptions' => [ @@ -81,16 +69,12 @@ trait SeriesFeed { 'feed' => "int", ], 'rows' => [ - // the first five feeds need at least one subscription so they are not involved in the cleanup test [1,'john.doe@example.com',1], [2,'john.doe@example.com',2], [3,'john.doe@example.com',3], [4,'john.doe@example.com',4], [5,'john.doe@example.com',5], - // Jane also needs a subscription to the first feed, for marks [6,'jane.doe@example.com',1], - // one feed previously marked for deletion has a subscription again, and so should not be deleted - [7,'jane.doe@example.com',6], ] ], 'arsse_articles' => [ @@ -267,16 +251,4 @@ trait SeriesFeed { Arsse::$db->feedUpdate(4); $this->assertSame([1], Arsse::$db->feedListStale()); } - - function testHandleOrphanedFeeds() { - Arsse::$db->feedCleanup(); - $now = gmdate("Y-m-d H:i:s"); - $state = $this->primeExpectations($this->data, [ - 'arsse_feeds' => ["id","orphaned"] - ]); - $state['arsse_feeds']['rows'][5][1] = null; - unset($state['arsse_feeds']['rows'][6]); - $state['arsse_feeds']['rows'][7][1] = $now; - $this->compareExpectations($state); - } } \ No newline at end of file diff --git a/tests/lib/Database/SeriesFolder.php b/tests/lib/Database/SeriesFolder.php index 4c5be19f..ed993347 100644 --- a/tests/lib/Database/SeriesFolder.php +++ b/tests/lib/Database/SeriesFolder.php @@ -2,7 +2,6 @@ declare(strict_types=1); namespace JKingWeb\Arsse\Test\Database; use JKingWeb\Arsse\Arsse; -use JKingWeb\Arsse\User\Driver as UserDriver; use Phake; trait SeriesFolder { @@ -12,11 +11,10 @@ trait SeriesFolder { 'id' => 'str', 'password' => 'str', 'name' => 'str', - 'rights' => 'int', ], 'rows' => [ - ["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], - ["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], + ["jane.doe@example.com", "", "Jane Doe"], + ["john.doe@example.com", "", "John Doe"], ], ], 'arsse_folders' => [ diff --git a/tests/lib/Database/SeriesSubscription.php b/tests/lib/Database/SeriesSubscription.php index e51cc0ff..39925d29 100644 --- a/tests/lib/Database/SeriesSubscription.php +++ b/tests/lib/Database/SeriesSubscription.php @@ -3,7 +3,6 @@ declare(strict_types=1); namespace JKingWeb\Arsse\Test\Database; use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Test\Database; -use JKingWeb\Arsse\User\Driver as UserDriver; use JKingWeb\Arsse\Feed\Exception as FeedException; use Phake; @@ -14,11 +13,10 @@ trait SeriesSubscription { 'id' => 'str', 'password' => 'str', 'name' => 'str', - 'rights' => 'int', ], 'rows' => [ - ["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], - ["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], + ["jane.doe@example.com", "", "Jane Doe"], + ["john.doe@example.com", "", "John Doe"], ], ], 'arsse_folders' => [ diff --git a/tests/phpunit.xml b/tests/phpunit.xml index c17cfd88..2a2b4434 100644 --- a/tests/phpunit.xml +++ b/tests/phpunit.xml @@ -60,6 +60,7 @@ Db/SQLite3/Database/TestDatabaseFeedSQLite3.php Db/SQLite3/Database/TestDatabaseSubscriptionSQLite3.php Db/SQLite3/Database/TestDatabaseArticleSQLite3.php + Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php REST/NextCloudNews/TestNCNVersionDiscovery.php