1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2025-01-03 14:32:40 +00:00

Implement article cleanup; fixes #28

This commit is contained in:
J. King 2017-08-17 22:36:15 -04:00
parent eeb834fe4c
commit 52104fb647
13 changed files with 273 additions and 58 deletions

View file

@ -77,9 +77,15 @@ class Conf {
/** @var string|null User-Agent string to use when fetching feeds from foreign servers */ /** @var string|null User-Agent string to use when fetching feeds from foreign servers */
public $fetchUserAgentString; public $fetchUserAgentString;
/** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours) /** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours; empty string for forever)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */ * @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainFeeds = "PT24H"; public $retainFeeds = "PT24H";
/** @var string Amount of time to keep an unstarred article in the database after it has been marked read by all users, as an ISO 8601 duration (default: 7 days; empty string for forever)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainArticlesRead = "P7D";
/** @var string Amount of time to keep an unstarred article in the database regardless of its read state, as an ISO 8601 duration (default: 21 days; empty string for forever)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainArticlesUnread = "P21D";
/** Creates a new configuration object /** Creates a new configuration object
* @param string $import_file Optional file to read configuration data from * @param string $import_file Optional file to read configuration data from

View file

@ -888,6 +888,44 @@ class Database {
return $this->db->prepare("SELECT count(*) from arsse_marks where starred is 1 and subscription in (select id from arsse_subscriptions where owner is ?)", "str")->run($user)->getValue(); return $this->db->prepare("SELECT count(*) from arsse_marks where starred is 1 and subscription in (select id from arsse_subscriptions where owner is ?)", "str")->run($user)->getValue();
} }
public function articleCleanup(): bool {
$query = $this->db->prepare(
"WITH target_feed(id,subs) as (".
"SELECT
id, (select count(*) from arsse_subscriptions where feed is arsse_feeds.id) as subs
from arsse_feeds where id is ?".
"), excepted_articles(id,edition) as (".
"SELECT
arsse_articles.id, (select max(id) from arsse_editions where article is arsse_articles.id) as edition
from arsse_articles
join target_feed on arsse_articles.feed is target_feed.id
order by edition desc limit ?".
") ".
"DELETE from arsse_articles where
feed is (select max(id) from target_feed)
and id not in (select id from excepted_articles)
and (select count(*) from arsse_marks where article is arsse_articles.id and starred is 1) is 0
and (
coalesce((select max(modified) from arsse_marks where article is arsse_articles.id),modified) <= ?
or ((select max(subs) from target_feed) is (select count(*) from arsse_marks where article is arsse_articles.id and read is 1) and coalesce((select max(modified) from arsse_marks where article is arsse_articles.id),modified) <= ?)
)
", "int", "int", "datetime", "datetime"
);
$limitRead = null;
$limitUnread = null;
if(Arsse::$conf->retainArticlesRead) {
$limitRead = Date::sub(Arsse::$conf->retainArticlesRead);
}
if(Arsse::$conf->retainArticlesUnread) {
$limitUnread = Date::sub(Arsse::$conf->retainArticlesUnread);
}
$feeds = $this->db->query("SELECT id, size from arsse_feeds")->getAll();
foreach($feeds as $feed) {
$query->run($feed['id'], $feed['size'], $limitUnread, $limitRead);
}
return true;
}
protected function articleValidateId(string $user, int $id): array { protected function articleValidateId(string $user, int $id): array {
$out = $this->db->prepare( $out = $this->db->prepare(
"SELECT "SELECT

View file

@ -60,4 +60,22 @@ class Date {
$d->setTimestamp($time); $d->setTimestamp($time);
return $d; return $d;
} }
static function add(string $interval, $date = null): \DateTimeInterface {
return self::modify("add", $interval, $date);
}
static function sub(string $interval, $date = null): \DateTimeInterface {
return self::modify("sub", $interval, $date);
}
static protected function modify(string $func, string $interval, $date = null): \DateTimeInterface {
$date = self::normalize($date ?? time());
if($date instanceof \DateTimeImmutable) {
return $date->$func(new \DateInterval($interval));
} else {
$date->$func(new \DateInterval($interval));
return $date;
}
}
} }

View file

@ -668,7 +668,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) { if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) {
return new Response(403); return new Response(403);
} }
// FIXME: stub Service::cleanupPost();
return new Response(204); return new Response(204);
} }

View file

@ -46,9 +46,9 @@ class Service {
$this->drv->queue(...$list); $this->drv->queue(...$list);
$this->drv->exec(); $this->drv->exec();
$this->drv->clean(); $this->drv->clean();
static::cleanupPost();
unset($list); unset($list);
} }
static::cleanupPost();
$t->add($this->interval); $t->add($this->interval);
if($loop) { if($loop) {
do { do {
@ -86,8 +86,8 @@ class Service {
return Arsse::$db->feedCleanup(); return Arsse::$db->feedCleanup();
} }
static function cleanupPost():bool { static function cleanupPost(): bool {
// TODO: stub // delete old articles, according to configured threasholds
return true; return Arsse::$db->articleCleanup();
} }
} }

View file

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace JKingWeb\Arsse;
/** @covers \JKingWeb\Arsse\Database<extended> */
class TestDatabaseCleanupSQLite3 extends Test\AbstractTest {
use Test\Database\Setup;
use Test\Database\DriverSQLite3;
use Test\Database\SeriesCleanup;
}

View file

@ -810,4 +810,15 @@ class TestNCNV1_2 extends Test\AbstractTest {
$exp = new Response(403); $exp = new Response(403);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update"))); $this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
} }
function testCleanUpAfterUpdate() {
Phake::when(Arsse::$db)->articleCleanup()->thenReturn(true);
$exp = new Response(204);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/after-update")));
Phake::verify(Arsse::$db)->articleCleanup();
// performing a cleanup when not an admin fails
Phake::when(Arsse::$user)->rightsGet->thenReturn(0);
$exp = new Response(403);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/after-update")));
}
} }

View file

@ -2,10 +2,6 @@
declare(strict_types=1); declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database; namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\Feed;
use JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\User\Driver as UserDriver;
use JKingWeb\Arsse\Feed\Exception as FeedException;
use JKingWeb\Arsse\Misc\Context; use JKingWeb\Arsse\Misc\Context;
use JKingWeb\Arsse\Misc\Date; use JKingWeb\Arsse\Misc\Date;
use Phake; use Phake;
@ -17,13 +13,12 @@ trait SeriesArticle {
'id' => 'str', 'id' => 'str',
'password' => 'str', 'password' => 'str',
'name' => 'str', 'name' => 'str',
'rights' => 'int',
], ],
'rows' => [ 'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], ["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], ["john.doe@example.com", "", "John Doe"],
["john.doe@example.org", "", "John Doe", UserDriver::RIGHTS_NONE], ["john.doe@example.org", "", "John Doe"],
["john.doe@example.net", "", "John Doe", UserDriver::RIGHTS_NONE], ["john.doe@example.net", "", "John Doe"],
], ],
], ],
'arsse_folders' => [ 'arsse_folders' => [

View file

@ -0,0 +1,168 @@
<?php
declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse;
use Phake;
trait SeriesCleanup {
function setUpSeries() {
// set up the test data
$nowish = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
$daybefore = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
$daysago = gmdate("Y-m-d H:i:s",strtotime("now - 7 days"));
$weeksago = gmdate("Y-m-d H:i:s",strtotime("now - 21 days"));
$this->data = [
'arsse_users' => [
'columns' => [
'id' => 'str',
'password' => 'str',
'name' => 'str',
],
'rows' => [
["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe"],
],
],
'arsse_feeds' => [
'columns' => [
'id' => "int",
'url' => "str",
'title' => "str",
'orphaned' => "datetime",
'size' => "int",
],
'rows' => [
[1,"http://example.com/1","",$daybefore,2], //latest two articles should be kept
[2,"http://example.com/2","",$yesterday,0],
[3,"http://example.com/3","",null,0],
[4,"http://example.com/4","",$nowish,0],
]
],
'arsse_subscriptions' => [
'columns' => [
'id' => "int",
'owner' => "str",
'feed' => "int",
],
'rows' => [
// one feed previously marked for deletion has a subscription again, and so should not be deleted
[1,'jane.doe@example.com',1],
// other subscriptions exist for article cleanup tests
[2,'john.doe@example.com',1],
]
],
'arsse_articles' => [
'columns' => [
'id' => "int",
'feed' => "int",
'url_title_hash' => "str",
'url_content_hash' => "str",
'title_content_hash' => "str",
'modified' => "datetime",
],
'rows' => [
[1,1,"","","",$weeksago], // is the latest article, thus is kept
[2,1,"","","",$weeksago], // is the second latest article, thus is kept
[3,1,"","","",$weeksago], // is starred by one user, thus is kept
[4,1,"","","",$weeksago], // does not meet the unread threshold due to a recent mark, thus is kept
[5,1,"","","",$daysago], // does not meet the unread threshold due to age, thus is kept
[6,1,"","","",$weeksago], // does not meet the read threshold due to a recent mark, thus is kept
[7,1,"","","",$weeksago], // meets the unread threshold without marks, thus is deleted
[8,1,"","","",$weeksago], // meets the unread threshold even with marks, thus is deleted
[9,1,"","","",$weeksago], // meets the read threshold, thus is deleted
]
],
'arsse_editions' => [
'columns' => [
'id' => "int",
'article' => "int",
],
'rows' => [
[1,1],
[2,2],
[3,3],
[4,4],
[201,1],
[102,2],
]
],
'arsse_marks' => [
'columns' => [
'article' => "int",
'subscription' => "int",
'read' => "bool",
'starred' => "bool",
'modified' => "datetime",
],
'rows' => [
[3,1,0,1,$weeksago],
[4,1,1,0,$daysago],
[6,1,1,0,$nowish],
[6,2,1,0,$weeksago],
[8,1,1,0,$weeksago],
[9,1,1,0,$daysago],
[9,2,1,0,$daysago],
]
],
];
}
function testCleanUpOrphanedFeeds() {
Arsse::$db->feedCleanup();
$now = gmdate("Y-m-d H:i:s");
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ["id","orphaned"]
]);
$state['arsse_feeds']['rows'][0][1] = null;
unset($state['arsse_feeds']['rows'][1]);
$state['arsse_feeds']['rows'][2][1] = $now;
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithStandardRetention() {
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
foreach([7,8,9] as $id) {
unset($state['arsse_articles']['rows'][$id - 1]);
}
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithUnlimitedReadRetention() {
Arsse::$conf->retainArticlesRead = "";
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
foreach([7,8] as $id) {
unset($state['arsse_articles']['rows'][$id - 1]);
}
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithUnlimitedUnreadRetention() {
Arsse::$conf->retainArticlesUnread = "";
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
foreach([9] as $id) {
unset($state['arsse_articles']['rows'][$id - 1]);
}
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithUnlimitedRetention() {
Arsse::$conf->retainArticlesRead = "";
Arsse::$conf->retainArticlesUnread = "";
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
$this->compareExpectations($state);
}
}

View file

@ -3,8 +3,6 @@ declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database; namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\Feed; use JKingWeb\Arsse\Feed;
use JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\User\Driver as UserDriver;
use JKingWeb\Arsse\Feed\Exception as FeedException; use JKingWeb\Arsse\Feed\Exception as FeedException;
use Phake; use Phake;
@ -33,19 +31,16 @@ trait SeriesFeed {
$past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute")); $past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
$future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute")); $future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute"));
$now = gmdate("Y-m-d H:i:s",strtotime("now")); $now = gmdate("Y-m-d H:i:s",strtotime("now"));
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
$longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
$this->data = [ $this->data = [
'arsse_users' => [ 'arsse_users' => [
'columns' => [ 'columns' => [
'id' => 'str', 'id' => 'str',
'password' => 'str', 'password' => 'str',
'name' => 'str', 'name' => 'str',
'rights' => 'int',
], ],
'rows' => [ 'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], ["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], ["john.doe@example.com", "", "John Doe"],
], ],
], ],
'arsse_feeds' => [ 'arsse_feeds' => [
@ -57,21 +52,14 @@ trait SeriesFeed {
'err_msg' => "str", 'err_msg' => "str",
'modified' => "datetime", 'modified' => "datetime",
'next_fetch' => "datetime", 'next_fetch' => "datetime",
'orphaned' => "datetime",
'size' => "int", 'size' => "int",
], ],
'rows' => [ 'rows' => [
// feeds for update testing [1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,0],
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null,0], [2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,0],
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null,0], [3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,0],
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null,0], [4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,0],
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null,0], [5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,0],
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null,0],
// feeds for cleanup testing
[6,"http://example.com/1","",0,"",$now,$future,$longago,0],
[7,"http://example.com/2","",0,"",$now,$future,$yesterday,0],
[8,"http://example.com/3","",0,"",$now,$future,null,0],
[9,"http://example.com/4","",0,"",$now,$future,$past,0],
] ]
], ],
'arsse_subscriptions' => [ 'arsse_subscriptions' => [
@ -81,16 +69,12 @@ trait SeriesFeed {
'feed' => "int", 'feed' => "int",
], ],
'rows' => [ 'rows' => [
// the first five feeds need at least one subscription so they are not involved in the cleanup test
[1,'john.doe@example.com',1], [1,'john.doe@example.com',1],
[2,'john.doe@example.com',2], [2,'john.doe@example.com',2],
[3,'john.doe@example.com',3], [3,'john.doe@example.com',3],
[4,'john.doe@example.com',4], [4,'john.doe@example.com',4],
[5,'john.doe@example.com',5], [5,'john.doe@example.com',5],
// Jane also needs a subscription to the first feed, for marks
[6,'jane.doe@example.com',1], [6,'jane.doe@example.com',1],
// one feed previously marked for deletion has a subscription again, and so should not be deleted
[7,'jane.doe@example.com',6],
] ]
], ],
'arsse_articles' => [ 'arsse_articles' => [
@ -267,16 +251,4 @@ trait SeriesFeed {
Arsse::$db->feedUpdate(4); Arsse::$db->feedUpdate(4);
$this->assertSame([1], Arsse::$db->feedListStale()); $this->assertSame([1], Arsse::$db->feedListStale());
} }
function testHandleOrphanedFeeds() {
Arsse::$db->feedCleanup();
$now = gmdate("Y-m-d H:i:s");
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ["id","orphaned"]
]);
$state['arsse_feeds']['rows'][5][1] = null;
unset($state['arsse_feeds']['rows'][6]);
$state['arsse_feeds']['rows'][7][1] = $now;
$this->compareExpectations($state);
}
} }

View file

@ -2,7 +2,6 @@
declare(strict_types=1); declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database; namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\User\Driver as UserDriver;
use Phake; use Phake;
trait SeriesFolder { trait SeriesFolder {
@ -12,11 +11,10 @@ trait SeriesFolder {
'id' => 'str', 'id' => 'str',
'password' => 'str', 'password' => 'str',
'name' => 'str', 'name' => 'str',
'rights' => 'int',
], ],
'rows' => [ 'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], ["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], ["john.doe@example.com", "", "John Doe"],
], ],
], ],
'arsse_folders' => [ 'arsse_folders' => [

View file

@ -3,7 +3,6 @@ declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database; namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse; use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\Test\Database; use JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\User\Driver as UserDriver;
use JKingWeb\Arsse\Feed\Exception as FeedException; use JKingWeb\Arsse\Feed\Exception as FeedException;
use Phake; use Phake;
@ -14,11 +13,10 @@ trait SeriesSubscription {
'id' => 'str', 'id' => 'str',
'password' => 'str', 'password' => 'str',
'name' => 'str', 'name' => 'str',
'rights' => 'int',
], ],
'rows' => [ 'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE], ["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE], ["john.doe@example.com", "", "John Doe"],
], ],
], ],
'arsse_folders' => [ 'arsse_folders' => [

View file

@ -60,6 +60,7 @@
<file>Db/SQLite3/Database/TestDatabaseFeedSQLite3.php</file> <file>Db/SQLite3/Database/TestDatabaseFeedSQLite3.php</file>
<file>Db/SQLite3/Database/TestDatabaseSubscriptionSQLite3.php</file> <file>Db/SQLite3/Database/TestDatabaseSubscriptionSQLite3.php</file>
<file>Db/SQLite3/Database/TestDatabaseArticleSQLite3.php</file> <file>Db/SQLite3/Database/TestDatabaseArticleSQLite3.php</file>
<file>Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php</file>
</testsuite> </testsuite>
<testsuite name="NextCloud News API"> <testsuite name="NextCloud News API">
<file>REST/NextCloudNews/TestNCNVersionDiscovery.php</file> <file>REST/NextCloudNews/TestNCNVersionDiscovery.php</file>