mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2025-01-18 17:10:33 +00:00
Integrated adaptive update intervals Fixes #51
This commit is contained in:
parent
3b4d79abc8
commit
3bc9082cfa
2 changed files with 82 additions and 38 deletions
|
@ -433,26 +433,25 @@ class Database {
|
||||||
$this->db->begin();
|
$this->db->begin();
|
||||||
try {
|
try {
|
||||||
// check to make sure the feed exists
|
// check to make sure the feed exists
|
||||||
$f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow();
|
$f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag, err_count FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow();
|
||||||
if(!$f) throw new Db\ExceptionInput("idMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]);
|
if(!$f) throw new Db\ExceptionInput("idMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]);
|
||||||
// the Feed object throws an exception when there are problems, but that isn't ideal
|
// the Feed object throws an exception when there are problems, but that isn't ideal
|
||||||
// here. When an exception is thrown it should update the database with the
|
// here. When an exception is thrown it should update the database with the
|
||||||
// error instead of failing; if other exceptions are thrown, we should simply roll back
|
// error instead of failing; if other exceptions are thrown, we should simply roll back
|
||||||
try {
|
try {
|
||||||
$feed = new Feed($f['url'], (string)$f['lastmodified'], $f['etag'], $f['username'], $f['password']);
|
$feed = new Feed($feedID, $f['url'], (string)$f['lastmodified'], $f['etag'], $f['username'], $f['password']);
|
||||||
if($feed->resource->isModified()) {
|
if(!$feed->modified) {
|
||||||
$feed->parse($feedID);
|
|
||||||
} else {
|
|
||||||
// if the feed hasn't changed, just compute the next fetch time and record it
|
// if the feed hasn't changed, just compute the next fetch time and record it
|
||||||
$next = $this->feedNextFetch($feedID);
|
$this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?', 'datetime', 'int')->run($feed->nextFetch, $feedID);
|
||||||
$this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?', 'datetime', 'int')->run($next, $feedID);
|
|
||||||
$this->db->commit();
|
$this->db->commit();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (Feed\Exception $e) {
|
} catch (Feed\Exception $e) {
|
||||||
// update the database with the resultant error and the next fetch time, incrementing the error count
|
// update the database with the resultant error and the next fetch time, incrementing the error count
|
||||||
$next = $this->feedNextFetch($feedID);
|
$this->db->prepare(
|
||||||
$this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ?, err_count = err_count + 1, err_msg = ? WHERE id is ?', 'datetime', 'str', 'int')->run($next, $e->getMessage(),$feedID);
|
'UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ?, err_count = err_count + 1, err_msg = ? WHERE id is ?',
|
||||||
|
'datetime', 'str', 'int'
|
||||||
|
)->run(Feed::nextFetchOnError($f['err_count']), $e->getMessage(),$feedID);
|
||||||
$this->db->commit();
|
$this->db->commit();
|
||||||
return false;
|
return false;
|
||||||
} catch(\Throwable $e) {
|
} catch(\Throwable $e) {
|
||||||
|
@ -468,14 +467,17 @@ class Database {
|
||||||
}
|
}
|
||||||
// lastly update the feed database itself with updated information.
|
// lastly update the feed database itself with updated information.
|
||||||
$next = $this->feedNextFetch($feedID, $feed);
|
$next = $this->feedNextFetch($feedID, $feed);
|
||||||
$this->db->prepare('UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = CURRENT_TIMESTAMP, modified = ?, etag = ?, err_count = 0, err_msg = "", next_fetch = ? WHERE id is ?', 'str', 'str', 'str', 'str', 'datetime', 'str', 'datetime', 'int')->run(
|
$this->db->prepare(
|
||||||
|
'UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = CURRENT_TIMESTAMP, modified = ?, etag = ?, err_count = 0, err_msg = "", next_fetch = ? WHERE id is ?',
|
||||||
|
'str', 'str', 'str', 'str', 'datetime', 'str', 'datetime', 'int'
|
||||||
|
)->run(
|
||||||
$feed->data->feedUrl,
|
$feed->data->feedUrl,
|
||||||
$feed->data->title,
|
$feed->data->title,
|
||||||
$feed->favicon,
|
$feed->favicon,
|
||||||
$feed->data->siteUrl,
|
$feed->data->siteUrl,
|
||||||
\DateTime::createFromFormat("!D, d M Y H:i:s e", $feed->resource->getLastModified()),
|
$feed->lastModified,
|
||||||
$feed->resource->getEtag(),
|
$feed->resource->getEtag(),
|
||||||
$next,
|
$feed->nextFetch,
|
||||||
$feedID
|
$feedID
|
||||||
);
|
);
|
||||||
} catch(\Throwable $e) {
|
} catch(\Throwable $e) {
|
||||||
|
@ -486,11 +488,6 @@ class Database {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function feedNextFetch(int $feedID, Feed $feed = null): \DateTime {
|
|
||||||
// FIXME: stub
|
|
||||||
return new \DateTime("now + 3 hours", new \DateTimeZone("UTC"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public function articleMatchLatest(int $feedID, int $count): Db\Result {
|
public function articleMatchLatest(int $feedID, int $count): Db\Result {
|
||||||
return $this->db->prepare(
|
return $this->db->prepare(
|
||||||
'SELECT id, DATEFORMAT("unix", edited) AS edited_date, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY edited desc limit ?',
|
'SELECT id, DATEFORMAT("unix", edited) AS edited_date, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY edited desc limit ?',
|
||||||
|
|
89
lib/Feed.php
89
lib/Feed.php
|
@ -13,11 +13,35 @@ class Feed {
|
||||||
public $reader;
|
public $reader;
|
||||||
public $resource;
|
public $resource;
|
||||||
public $modified = false;
|
public $modified = false;
|
||||||
public $lastModified = null;
|
public $lastModified;
|
||||||
|
public $nextFetch;
|
||||||
public $newItems = [];
|
public $newItems = [];
|
||||||
public $changedItems = [];
|
public $changedItems = [];
|
||||||
|
|
||||||
public function __construct(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {
|
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {
|
||||||
|
// fetch the feed
|
||||||
|
$this->download($url, $lastModified, $etag, $username, $password);
|
||||||
|
// format the HTTP Last-Modified date returned
|
||||||
|
$lastMod = $this->resource->getLastModified();
|
||||||
|
if(strlen($lastMod)) {
|
||||||
|
$this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod);
|
||||||
|
}
|
||||||
|
$this->modified = $this->resource->isModified();
|
||||||
|
//parse the feed, if it has been modified
|
||||||
|
if($this->modified) {
|
||||||
|
$this->parse();
|
||||||
|
// ascertain whether there are any articles not in the database
|
||||||
|
$this->matchToDatabase($feedID);
|
||||||
|
// if caching header fields are not sent by the server, try to ascertain a last-modified date from the feed contents
|
||||||
|
if(!$this->lastModified) $this->lastModified = $this->computeLastModified();
|
||||||
|
// we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged
|
||||||
|
if(!sizeof($this->newItems) && !sizeof($this->changedItems)) $this->modified = false;
|
||||||
|
}
|
||||||
|
// compute the time at which the feed should next be fetched
|
||||||
|
$this->nextFetch = $this->computeNextFetch();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool {
|
||||||
try {
|
try {
|
||||||
$config = new Config;
|
$config = new Config;
|
||||||
$config->setClientUserAgent(Data::$conf->userAgentString);
|
$config->setClientUserAgent(Data::$conf->userAgentString);
|
||||||
|
@ -25,17 +49,13 @@ class Feed {
|
||||||
|
|
||||||
$this->reader = new Reader($config);
|
$this->reader = new Reader($config);
|
||||||
$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
|
$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
|
||||||
$lastMod = $this->resource->getLastModified();
|
|
||||||
if(strlen($lastMod)) {
|
|
||||||
$this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod);
|
|
||||||
}
|
|
||||||
$this->modified = $this->resource->isModified();
|
|
||||||
} catch (PicoFeedException $e) {
|
} catch (PicoFeedException $e) {
|
||||||
throw new Feed\Exception($url, $e);
|
throw new Feed\Exception($url, $e);
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function parse(int $feedID = null): bool {
|
public function parse(): bool {
|
||||||
try {
|
try {
|
||||||
$this->parser = $this->reader->getParser(
|
$this->parser = $this->reader->getParser(
|
||||||
$this->resource->getUrl(),
|
$this->resource->getUrl(),
|
||||||
|
@ -90,10 +110,6 @@ class Feed {
|
||||||
$f->id = '';
|
$f->id = '';
|
||||||
}
|
}
|
||||||
$this->data = $feed;
|
$this->data = $feed;
|
||||||
// if a feedID is supplied, determine which items are already in the database, which are not, and which might have been edited
|
|
||||||
if(!is_null($feedID)) {
|
|
||||||
$this->matchToDatabase($feedID);
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,7 +153,7 @@ class Feed {
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function matchToDatabase(int $feedID): bool {
|
public function matchToDatabase(int $feedID): bool {
|
||||||
// first perform deduplication on items
|
// first perform deduplication on items
|
||||||
$items = $this->deduplicateItems($this->data->items);
|
$items = $this->deduplicateItems($this->data->items);
|
||||||
// get as many of the latest articles in the database as there are in the feed
|
// get as many of the latest articles in the database as there are in the feed
|
||||||
|
@ -226,21 +242,15 @@ class Feed {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function nextFetch(): \DateTime {
|
public function computeNextFetch(): \DateTime {
|
||||||
$now = new \DateTime();
|
$now = new \DateTime();
|
||||||
if(!$this->modified) {
|
if(!$this->modified) {
|
||||||
$diff = $now->getTimestamp() - $this->lastModified->getTimestamp();
|
$diff = $now->getTimestamp() - $this->lastModified->getTimestamp();
|
||||||
$offset = $this->normalizeDateDiff($diff);
|
$offset = $this->normalizeDateDiff($diff);
|
||||||
$now->modify("+".$offset);
|
$now->modify("+".$offset);
|
||||||
} else {
|
} else {
|
||||||
$dates = [];
|
|
||||||
$offsets = [];
|
$offsets = [];
|
||||||
foreach($this->data->items as $item) {
|
$dates = $this->gatherDates();
|
||||||
if($item->updatedDate) $dates[] = $item->updatedDate->getTimestamp();
|
|
||||||
if($item->publishedDate) $dates[] = $item->publishedDate->getTimestamp();
|
|
||||||
}
|
|
||||||
$dates = array_unique($dates, \SORT_NUMERIC);
|
|
||||||
rsort($dates);
|
|
||||||
if(sizeof($dates) > 3) {
|
if(sizeof($dates) > 3) {
|
||||||
for($a = 0; $a < 3; $a++) {
|
for($a = 0; $a < 3; $a++) {
|
||||||
$diff = $dates[$a+1] - $dates[$a];
|
$diff = $dates[$a+1] - $dates[$a];
|
||||||
|
@ -260,6 +270,17 @@ class Feed {
|
||||||
return $now;
|
return $now;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function nextFetchOnError($errCount): \DateTime {
|
||||||
|
if($errCount < 3) {
|
||||||
|
$offset = "5 minutes";
|
||||||
|
} else if($errCount < 15) {
|
||||||
|
$offset = "3 hours";
|
||||||
|
} else {
|
||||||
|
$offset = "1 day";
|
||||||
|
}
|
||||||
|
return new \DateTime("now + ".$offset);
|
||||||
|
}
|
||||||
|
|
||||||
protected function normalizeDateDiff(int $diff): string {
|
protected function normalizeDateDiff(int $diff): string {
|
||||||
if($diff < (30 * 60)) { // less than 30 minutes
|
if($diff < (30 * 60)) { // less than 30 minutes
|
||||||
$offset = "15 minutes";
|
$offset = "15 minutes";
|
||||||
|
@ -274,4 +295,30 @@ class Feed {
|
||||||
}
|
}
|
||||||
return $offset;
|
return $offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function computeLastModified() {
|
||||||
|
if(!$this->modified) {
|
||||||
|
return $this->lastModified;
|
||||||
|
} else {
|
||||||
|
$dates = $this->gatherDates();
|
||||||
|
}
|
||||||
|
if(sizeof($dates)) {
|
||||||
|
$now = new \DateTime();
|
||||||
|
$now->setTimestamp($dates[0]);
|
||||||
|
return $now;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function gatherDates(): array {
|
||||||
|
$dates = [];
|
||||||
|
foreach($this->data->items as $item) {
|
||||||
|
if($item->updatedDate) $dates[] = $item->updatedDate->getTimestamp();
|
||||||
|
if($item->publishedDate) $dates[] = $item->publishedDate->getTimestamp();
|
||||||
|
}
|
||||||
|
$dates = array_unique($dates, \SORT_NUMERIC);
|
||||||
|
rsort($dates);
|
||||||
|
return $dates;
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Add table
Reference in a new issue