1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-31 21:12:41 +00:00

Integrated adaptive update intervals Fixes #51

This commit is contained in:
J. King 2017-04-30 17:54:29 -04:00
parent 3b4d79abc8
commit 3bc9082cfa
2 changed files with 82 additions and 38 deletions

View file

@ -433,26 +433,25 @@ class Database {
$this->db->begin();
try {
// check to make sure the feed exists
$f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow();
$f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag, err_count FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow();
if(!$f) throw new Db\ExceptionInput("idMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]);
// the Feed object throws an exception when there are problems, but that isn't ideal
// here. When an exception is thrown it should update the database with the
// error instead of failing; if other exceptions are thrown, we should simply roll back
try {
$feed = new Feed($f['url'], (string)$f['lastmodified'], $f['etag'], $f['username'], $f['password']);
if($feed->resource->isModified()) {
$feed->parse($feedID);
} else {
$feed = new Feed($feedID, $f['url'], (string)$f['lastmodified'], $f['etag'], $f['username'], $f['password']);
if(!$feed->modified) {
// if the feed hasn't changed, just compute the next fetch time and record it
$next = $this->feedNextFetch($feedID);
$this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?', 'datetime', 'int')->run($next, $feedID);
$this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?', 'datetime', 'int')->run($feed->nextFetch, $feedID);
$this->db->commit();
return false;
}
} catch (Feed\Exception $e) {
// update the database with the resultant error and the next fetch time, incrementing the error count
$next = $this->feedNextFetch($feedID);
$this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ?, err_count = err_count + 1, err_msg = ? WHERE id is ?', 'datetime', 'str', 'int')->run($next, $e->getMessage(),$feedID);
$this->db->prepare(
'UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ?, err_count = err_count + 1, err_msg = ? WHERE id is ?',
'datetime', 'str', 'int'
)->run(Feed::nextFetchOnError($f['err_count']), $e->getMessage(),$feedID);
$this->db->commit();
return false;
} catch(\Throwable $e) {
@ -468,14 +467,17 @@ class Database {
}
// lastly update the feed database itself with updated information.
$next = $this->feedNextFetch($feedID, $feed);
$this->db->prepare('UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = CURRENT_TIMESTAMP, modified = ?, etag = ?, err_count = 0, err_msg = "", next_fetch = ? WHERE id is ?', 'str', 'str', 'str', 'str', 'datetime', 'str', 'datetime', 'int')->run(
$this->db->prepare(
'UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = CURRENT_TIMESTAMP, modified = ?, etag = ?, err_count = 0, err_msg = "", next_fetch = ? WHERE id is ?',
'str', 'str', 'str', 'str', 'datetime', 'str', 'datetime', 'int'
)->run(
$feed->data->feedUrl,
$feed->data->title,
$feed->favicon,
$feed->data->siteUrl,
\DateTime::createFromFormat("!D, d M Y H:i:s e", $feed->resource->getLastModified()),
$feed->lastModified,
$feed->resource->getEtag(),
$next,
$feed->nextFetch,
$feedID
);
} catch(\Throwable $e) {
@ -486,11 +488,6 @@ class Database {
return true;
}
protected function feedNextFetch(int $feedID, Feed $feed = null): \DateTime {
// FIXME: stub
return new \DateTime("now + 3 hours", new \DateTimeZone("UTC"));
}
public function articleMatchLatest(int $feedID, int $count): Db\Result {
return $this->db->prepare(
'SELECT id, DATEFORMAT("unix", edited) AS edited_date, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY edited desc limit ?',

View file

@ -13,11 +13,35 @@ class Feed {
public $reader;
public $resource;
public $modified = false;
public $lastModified = null;
public $lastModified;
public $nextFetch;
public $newItems = [];
public $changedItems = [];
public function __construct(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {
// fetch the feed
$this->download($url, $lastModified, $etag, $username, $password);
// format the HTTP Last-Modified date returned
$lastMod = $this->resource->getLastModified();
if(strlen($lastMod)) {
$this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod);
}
$this->modified = $this->resource->isModified();
//parse the feed, if it has been modified
if($this->modified) {
$this->parse();
// ascertain whether there are any articles not in the database
$this->matchToDatabase($feedID);
// if caching header fields are not sent by the server, try to ascertain a last-modified date from the feed contents
if(!$this->lastModified) $this->lastModified = $this->computeLastModified();
// we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged
if(!sizeof($this->newItems) && !sizeof($this->changedItems)) $this->modified = false;
}
// compute the time at which the feed should next be fetched
$this->nextFetch = $this->computeNextFetch();
}
public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool {
try {
$config = new Config;
$config->setClientUserAgent(Data::$conf->userAgentString);
@ -25,17 +49,13 @@ class Feed {
$this->reader = new Reader($config);
$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
$lastMod = $this->resource->getLastModified();
if(strlen($lastMod)) {
$this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod);
}
$this->modified = $this->resource->isModified();
} catch (PicoFeedException $e) {
throw new Feed\Exception($url, $e);
}
return true;
}
public function parse(int $feedID = null): bool {
public function parse(): bool {
try {
$this->parser = $this->reader->getParser(
$this->resource->getUrl(),
@ -90,10 +110,6 @@ class Feed {
$f->id = '';
}
$this->data = $feed;
// if a feedID is supplied, determine which items are already in the database, which are not, and which might have been edited
if(!is_null($feedID)) {
$this->matchToDatabase($feedID);
}
return true;
}
@ -137,7 +153,7 @@ class Feed {
return $out;
}
protected function matchToDatabase(int $feedID): bool {
public function matchToDatabase(int $feedID): bool {
// first perform deduplication on items
$items = $this->deduplicateItems($this->data->items);
// get as many of the latest articles in the database as there are in the feed
@ -226,21 +242,15 @@ class Feed {
return true;
}
public function nextFetch(): \DateTime {
public function computeNextFetch(): \DateTime {
$now = new \DateTime();
if(!$this->modified) {
$diff = $now->getTimestamp() - $this->lastModified->getTimestamp();
$offset = $this->normalizeDateDiff($diff);
$now->modify("+".$offset);
} else {
$dates = [];
$offsets = [];
foreach($this->data->items as $item) {
if($item->updatedDate) $dates[] = $item->updatedDate->getTimestamp();
if($item->publishedDate) $dates[] = $item->publishedDate->getTimestamp();
}
$dates = array_unique($dates, \SORT_NUMERIC);
rsort($dates);
$dates = $this->gatherDates();
if(sizeof($dates) > 3) {
for($a = 0; $a < 3; $a++) {
$diff = $dates[$a+1] - $dates[$a];
@ -260,6 +270,17 @@ class Feed {
return $now;
}
public static function nextFetchOnError($errCount): \DateTime {
if($errCount < 3) {
$offset = "5 minutes";
} else if($errCount < 15) {
$offset = "3 hours";
} else {
$offset = "1 day";
}
return new \DateTime("now + ".$offset);
}
protected function normalizeDateDiff(int $diff): string {
if($diff < (30 * 60)) { // less than 30 minutes
$offset = "15 minutes";
@ -274,4 +295,30 @@ class Feed {
}
return $offset;
}
public function computeLastModified() {
if(!$this->modified) {
return $this->lastModified;
} else {
$dates = $this->gatherDates();
}
if(sizeof($dates)) {
$now = new \DateTime();
$now->setTimestamp($dates[0]);
return $now;
} else {
return null;
}
}
protected function gatherDates(): array {
$dates = [];
foreach($this->data->items as $item) {
if($item->updatedDate) $dates[] = $item->updatedDate->getTimestamp();
if($item->publishedDate) $dates[] = $item->publishedDate->getTimestamp();
}
$dates = array_unique($dates, \SORT_NUMERIC);
rsort($dates);
return $dates;
}
}