1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 13:12:41 +00:00

Implement article cleanup; fixes #28

This commit is contained in:
J. King 2017-08-17 22:36:15 -04:00
parent eeb834fe4c
commit 52104fb647
13 changed files with 273 additions and 58 deletions

View file

@ -77,9 +77,15 @@ class Conf {
/** @var string|null User-Agent string to use when fetching feeds from foreign servers */
public $fetchUserAgentString;
/** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours)
/** @var string Amount of time to keep a feed's articles in the database after all its subscriptions have been deleted, as an ISO 8601 duration (default: 24 hours; empty string for forever)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainFeeds = "PT24H";
/** @var string Amount of time to keep an unstarred article in the database after it has been marked read by all users, as an ISO 8601 duration (default: 7 days; empty string for forever)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainArticlesRead = "P7D";
/** @var string Amount of time to keep an unstarred article in the database regardless of its read state, as an ISO 8601 duration (default: 21 days; empty string for forever)
* @see https://en.wikipedia.org/wiki/ISO_8601#Durations */
public $retainArticlesUnread = "P21D";
/** Creates a new configuration object
* @param string $import_file Optional file to read configuration data from

View file

@ -888,6 +888,44 @@ class Database {
return $this->db->prepare("SELECT count(*) from arsse_marks where starred is 1 and subscription in (select id from arsse_subscriptions where owner is ?)", "str")->run($user)->getValue();
}
public function articleCleanup(): bool {
$query = $this->db->prepare(
"WITH target_feed(id,subs) as (".
"SELECT
id, (select count(*) from arsse_subscriptions where feed is arsse_feeds.id) as subs
from arsse_feeds where id is ?".
"), excepted_articles(id,edition) as (".
"SELECT
arsse_articles.id, (select max(id) from arsse_editions where article is arsse_articles.id) as edition
from arsse_articles
join target_feed on arsse_articles.feed is target_feed.id
order by edition desc limit ?".
") ".
"DELETE from arsse_articles where
feed is (select max(id) from target_feed)
and id not in (select id from excepted_articles)
and (select count(*) from arsse_marks where article is arsse_articles.id and starred is 1) is 0
and (
coalesce((select max(modified) from arsse_marks where article is arsse_articles.id),modified) <= ?
or ((select max(subs) from target_feed) is (select count(*) from arsse_marks where article is arsse_articles.id and read is 1) and coalesce((select max(modified) from arsse_marks where article is arsse_articles.id),modified) <= ?)
)
", "int", "int", "datetime", "datetime"
);
$limitRead = null;
$limitUnread = null;
if(Arsse::$conf->retainArticlesRead) {
$limitRead = Date::sub(Arsse::$conf->retainArticlesRead);
}
if(Arsse::$conf->retainArticlesUnread) {
$limitUnread = Date::sub(Arsse::$conf->retainArticlesUnread);
}
$feeds = $this->db->query("SELECT id, size from arsse_feeds")->getAll();
foreach($feeds as $feed) {
$query->run($feed['id'], $feed['size'], $limitUnread, $limitRead);
}
return true;
}
protected function articleValidateId(string $user, int $id): array {
$out = $this->db->prepare(
"SELECT

View file

@ -60,4 +60,22 @@ class Date {
$d->setTimestamp($time);
return $d;
}
static function add(string $interval, $date = null): \DateTimeInterface {
return self::modify("add", $interval, $date);
}
static function sub(string $interval, $date = null): \DateTimeInterface {
return self::modify("sub", $interval, $date);
}
static protected function modify(string $func, string $interval, $date = null): \DateTimeInterface {
$date = self::normalize($date ?? time());
if($date instanceof \DateTimeImmutable) {
return $date->$func(new \DateInterval($interval));
} else {
$date->$func(new \DateInterval($interval));
return $date;
}
}
}

View file

@ -668,7 +668,7 @@ class V1_2 extends \JKingWeb\Arsse\REST\AbstractHandler {
if(Arsse::$user->rightsGet(Arsse::$user->id)==User::RIGHTS_NONE) {
return new Response(403);
}
// FIXME: stub
Service::cleanupPost();
return new Response(204);
}

View file

@ -46,9 +46,9 @@ class Service {
$this->drv->queue(...$list);
$this->drv->exec();
$this->drv->clean();
static::cleanupPost();
unset($list);
}
static::cleanupPost();
$t->add($this->interval);
if($loop) {
do {
@ -86,8 +86,8 @@ class Service {
return Arsse::$db->feedCleanup();
}
static function cleanupPost():bool {
// TODO: stub
return true;
static function cleanupPost(): bool {
// delete old articles, according to configured threasholds
return Arsse::$db->articleCleanup();
}
}

View file

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace JKingWeb\Arsse;
/** @covers \JKingWeb\Arsse\Database<extended> */
class TestDatabaseCleanupSQLite3 extends Test\AbstractTest {
use Test\Database\Setup;
use Test\Database\DriverSQLite3;
use Test\Database\SeriesCleanup;
}

View file

@ -810,4 +810,15 @@ class TestNCNV1_2 extends Test\AbstractTest {
$exp = new Response(403);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/before-update")));
}
function testCleanUpAfterUpdate() {
Phake::when(Arsse::$db)->articleCleanup()->thenReturn(true);
$exp = new Response(204);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/after-update")));
Phake::verify(Arsse::$db)->articleCleanup();
// performing a cleanup when not an admin fails
Phake::when(Arsse::$user)->rightsGet->thenReturn(0);
$exp = new Response(403);
$this->assertEquals($exp, $this->h->dispatch(new Request("GET", "/cleanup/after-update")));
}
}

View file

@ -2,10 +2,6 @@
declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\Feed;
use JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\User\Driver as UserDriver;
use JKingWeb\Arsse\Feed\Exception as FeedException;
use JKingWeb\Arsse\Misc\Context;
use JKingWeb\Arsse\Misc\Date;
use Phake;
@ -17,13 +13,12 @@ trait SeriesArticle {
'id' => 'str',
'password' => 'str',
'name' => 'str',
'rights' => 'int',
],
'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE],
["john.doe@example.org", "", "John Doe", UserDriver::RIGHTS_NONE],
["john.doe@example.net", "", "John Doe", UserDriver::RIGHTS_NONE],
["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe"],
["john.doe@example.org", "", "John Doe"],
["john.doe@example.net", "", "John Doe"],
],
],
'arsse_folders' => [

View file

@ -0,0 +1,168 @@
<?php
declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse;
use Phake;
trait SeriesCleanup {
function setUpSeries() {
// set up the test data
$nowish = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
$daybefore = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
$daysago = gmdate("Y-m-d H:i:s",strtotime("now - 7 days"));
$weeksago = gmdate("Y-m-d H:i:s",strtotime("now - 21 days"));
$this->data = [
'arsse_users' => [
'columns' => [
'id' => 'str',
'password' => 'str',
'name' => 'str',
],
'rows' => [
["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe"],
],
],
'arsse_feeds' => [
'columns' => [
'id' => "int",
'url' => "str",
'title' => "str",
'orphaned' => "datetime",
'size' => "int",
],
'rows' => [
[1,"http://example.com/1","",$daybefore,2], //latest two articles should be kept
[2,"http://example.com/2","",$yesterday,0],
[3,"http://example.com/3","",null,0],
[4,"http://example.com/4","",$nowish,0],
]
],
'arsse_subscriptions' => [
'columns' => [
'id' => "int",
'owner' => "str",
'feed' => "int",
],
'rows' => [
// one feed previously marked for deletion has a subscription again, and so should not be deleted
[1,'jane.doe@example.com',1],
// other subscriptions exist for article cleanup tests
[2,'john.doe@example.com',1],
]
],
'arsse_articles' => [
'columns' => [
'id' => "int",
'feed' => "int",
'url_title_hash' => "str",
'url_content_hash' => "str",
'title_content_hash' => "str",
'modified' => "datetime",
],
'rows' => [
[1,1,"","","",$weeksago], // is the latest article, thus is kept
[2,1,"","","",$weeksago], // is the second latest article, thus is kept
[3,1,"","","",$weeksago], // is starred by one user, thus is kept
[4,1,"","","",$weeksago], // does not meet the unread threshold due to a recent mark, thus is kept
[5,1,"","","",$daysago], // does not meet the unread threshold due to age, thus is kept
[6,1,"","","",$weeksago], // does not meet the read threshold due to a recent mark, thus is kept
[7,1,"","","",$weeksago], // meets the unread threshold without marks, thus is deleted
[8,1,"","","",$weeksago], // meets the unread threshold even with marks, thus is deleted
[9,1,"","","",$weeksago], // meets the read threshold, thus is deleted
]
],
'arsse_editions' => [
'columns' => [
'id' => "int",
'article' => "int",
],
'rows' => [
[1,1],
[2,2],
[3,3],
[4,4],
[201,1],
[102,2],
]
],
'arsse_marks' => [
'columns' => [
'article' => "int",
'subscription' => "int",
'read' => "bool",
'starred' => "bool",
'modified' => "datetime",
],
'rows' => [
[3,1,0,1,$weeksago],
[4,1,1,0,$daysago],
[6,1,1,0,$nowish],
[6,2,1,0,$weeksago],
[8,1,1,0,$weeksago],
[9,1,1,0,$daysago],
[9,2,1,0,$daysago],
]
],
];
}
function testCleanUpOrphanedFeeds() {
Arsse::$db->feedCleanup();
$now = gmdate("Y-m-d H:i:s");
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ["id","orphaned"]
]);
$state['arsse_feeds']['rows'][0][1] = null;
unset($state['arsse_feeds']['rows'][1]);
$state['arsse_feeds']['rows'][2][1] = $now;
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithStandardRetention() {
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
foreach([7,8,9] as $id) {
unset($state['arsse_articles']['rows'][$id - 1]);
}
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithUnlimitedReadRetention() {
Arsse::$conf->retainArticlesRead = "";
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
foreach([7,8] as $id) {
unset($state['arsse_articles']['rows'][$id - 1]);
}
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithUnlimitedUnreadRetention() {
Arsse::$conf->retainArticlesUnread = "";
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
foreach([9] as $id) {
unset($state['arsse_articles']['rows'][$id - 1]);
}
$this->compareExpectations($state);
}
function testCleanUpOldArticlesWithUnlimitedRetention() {
Arsse::$conf->retainArticlesRead = "";
Arsse::$conf->retainArticlesUnread = "";
Arsse::$db->articleCleanup();
$state = $this->primeExpectations($this->data, [
'arsse_articles' => ["id"]
]);
$this->compareExpectations($state);
}
}

View file

@ -3,8 +3,6 @@ declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\Feed;
use JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\User\Driver as UserDriver;
use JKingWeb\Arsse\Feed\Exception as FeedException;
use Phake;
@ -33,19 +31,16 @@ trait SeriesFeed {
$past = gmdate("Y-m-d H:i:s",strtotime("now - 1 minute"));
$future = gmdate("Y-m-d H:i:s",strtotime("now + 1 minute"));
$now = gmdate("Y-m-d H:i:s",strtotime("now"));
$yesterday = gmdate("Y-m-d H:i:s",strtotime("now - 1 day"));
$longago = gmdate("Y-m-d H:i:s",strtotime("now - 2 days"));
$this->data = [
'arsse_users' => [
'columns' => [
'id' => 'str',
'password' => 'str',
'name' => 'str',
'rights' => 'int',
],
'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE],
["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe"],
],
],
'arsse_feeds' => [
@ -57,21 +52,14 @@ trait SeriesFeed {
'err_msg' => "str",
'modified' => "datetime",
'next_fetch' => "datetime",
'orphaned' => "datetime",
'size' => "int",
],
'rows' => [
// feeds for update testing
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,null,0],
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,null,0],
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,null,0],
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,null,0],
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,null,0],
// feeds for cleanup testing
[6,"http://example.com/1","",0,"",$now,$future,$longago,0],
[7,"http://example.com/2","",0,"",$now,$future,$yesterday,0],
[8,"http://example.com/3","",0,"",$now,$future,null,0],
[9,"http://example.com/4","",0,"",$now,$future,$past,0],
[1,"http://localhost:8000/Feed/Matching/3","Ook",0,"",$past,$past,0],
[2,"http://localhost:8000/Feed/Matching/1","Eek",5,"There was an error last time",$past,$future,0],
[3,"http://localhost:8000/Feed/Fetching/Error?code=404","Ack",0,"",$past,$now,0],
[4,"http://localhost:8000/Feed/NextFetch/NotModified?t=".time(),"Ooook",0,"",$past,$past,0],
[5,"http://localhost:8000/Feed/Parsing/Valid","Ooook",0,"",$past,$future,0],
]
],
'arsse_subscriptions' => [
@ -81,16 +69,12 @@ trait SeriesFeed {
'feed' => "int",
],
'rows' => [
// the first five feeds need at least one subscription so they are not involved in the cleanup test
[1,'john.doe@example.com',1],
[2,'john.doe@example.com',2],
[3,'john.doe@example.com',3],
[4,'john.doe@example.com',4],
[5,'john.doe@example.com',5],
// Jane also needs a subscription to the first feed, for marks
[6,'jane.doe@example.com',1],
// one feed previously marked for deletion has a subscription again, and so should not be deleted
[7,'jane.doe@example.com',6],
]
],
'arsse_articles' => [
@ -267,16 +251,4 @@ trait SeriesFeed {
Arsse::$db->feedUpdate(4);
$this->assertSame([1], Arsse::$db->feedListStale());
}
function testHandleOrphanedFeeds() {
Arsse::$db->feedCleanup();
$now = gmdate("Y-m-d H:i:s");
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ["id","orphaned"]
]);
$state['arsse_feeds']['rows'][5][1] = null;
unset($state['arsse_feeds']['rows'][6]);
$state['arsse_feeds']['rows'][7][1] = $now;
$this->compareExpectations($state);
}
}

View file

@ -2,7 +2,6 @@
declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\User\Driver as UserDriver;
use Phake;
trait SeriesFolder {
@ -12,11 +11,10 @@ trait SeriesFolder {
'id' => 'str',
'password' => 'str',
'name' => 'str',
'rights' => 'int',
],
'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE],
["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe"],
],
],
'arsse_folders' => [

View file

@ -3,7 +3,6 @@ declare(strict_types=1);
namespace JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\Test\Database;
use JKingWeb\Arsse\User\Driver as UserDriver;
use JKingWeb\Arsse\Feed\Exception as FeedException;
use Phake;
@ -14,11 +13,10 @@ trait SeriesSubscription {
'id' => 'str',
'password' => 'str',
'name' => 'str',
'rights' => 'int',
],
'rows' => [
["jane.doe@example.com", "", "Jane Doe", UserDriver::RIGHTS_NONE],
["john.doe@example.com", "", "John Doe", UserDriver::RIGHTS_NONE],
["jane.doe@example.com", "", "Jane Doe"],
["john.doe@example.com", "", "John Doe"],
],
],
'arsse_folders' => [

View file

@ -60,6 +60,7 @@
<file>Db/SQLite3/Database/TestDatabaseFeedSQLite3.php</file>
<file>Db/SQLite3/Database/TestDatabaseSubscriptionSQLite3.php</file>
<file>Db/SQLite3/Database/TestDatabaseArticleSQLite3.php</file>
<file>Db/SQLite3/Database/TestDatabaseCleanupSQLite3.php</file>
</testsuite>
<testsuite name="NextCloud News API">
<file>REST/NextCloudNews/TestNCNVersionDiscovery.php</file>