From 3bc9082cfab748f07ae7e5bd0dc2bb842f0be94f Mon Sep 17 00:00:00 2001 From: "J. King" Date: Sun, 30 Apr 2017 17:54:29 -0400 Subject: [PATCH] Integrated adaptive update intervals Fixes #51 --- lib/Database.php | 31 ++++++++--------- lib/Feed.php | 89 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 82 insertions(+), 38 deletions(-) diff --git a/lib/Database.php b/lib/Database.php index b35ddd1c..63982d19 100644 --- a/lib/Database.php +++ b/lib/Database.php @@ -433,26 +433,25 @@ class Database { $this->db->begin(); try { // check to make sure the feed exists - $f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow(); + $f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag, err_count FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow(); if(!$f) throw new Db\ExceptionInput("idMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]); // the Feed object throws an exception when there are problems, but that isn't ideal // here. When an exception is thrown it should update the database with the // error instead of failing; if other exceptions are thrown, we should simply roll back try { - $feed = new Feed($f['url'], (string)$f['lastmodified'], $f['etag'], $f['username'], $f['password']); - if($feed->resource->isModified()) { - $feed->parse($feedID); - } else { + $feed = new Feed($feedID, $f['url'], (string)$f['lastmodified'], $f['etag'], $f['username'], $f['password']); + if(!$feed->modified) { // if the feed hasn't changed, just compute the next fetch time and record it - $next = $this->feedNextFetch($feedID); - $this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?', 'datetime', 'int')->run($next, $feedID); + $this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?', 'datetime', 'int')->run($feed->nextFetch, $feedID); $this->db->commit(); return false; } } catch (Feed\Exception $e) { // update the database with the resultant error and the next fetch time, incrementing the error count - $next = $this->feedNextFetch($feedID); - $this->db->prepare('UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ?, err_count = err_count + 1, err_msg = ? WHERE id is ?', 'datetime', 'str', 'int')->run($next, $e->getMessage(),$feedID); + $this->db->prepare( + 'UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ?, err_count = err_count + 1, err_msg = ? WHERE id is ?', + 'datetime', 'str', 'int' + )->run(Feed::nextFetchOnError($f['err_count']), $e->getMessage(),$feedID); $this->db->commit(); return false; } catch(\Throwable $e) { @@ -468,14 +467,17 @@ class Database { } // lastly update the feed database itself with updated information. $next = $this->feedNextFetch($feedID, $feed); - $this->db->prepare('UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = CURRENT_TIMESTAMP, modified = ?, etag = ?, err_count = 0, err_msg = "", next_fetch = ? WHERE id is ?', 'str', 'str', 'str', 'str', 'datetime', 'str', 'datetime', 'int')->run( + $this->db->prepare( + 'UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = CURRENT_TIMESTAMP, modified = ?, etag = ?, err_count = 0, err_msg = "", next_fetch = ? WHERE id is ?', + 'str', 'str', 'str', 'str', 'datetime', 'str', 'datetime', 'int' + )->run( $feed->data->feedUrl, $feed->data->title, $feed->favicon, $feed->data->siteUrl, - \DateTime::createFromFormat("!D, d M Y H:i:s e", $feed->resource->getLastModified()), + $feed->lastModified, $feed->resource->getEtag(), - $next, + $feed->nextFetch, $feedID ); } catch(\Throwable $e) { @@ -485,11 +487,6 @@ class Database { $this->db->commit(); return true; } - - protected function feedNextFetch(int $feedID, Feed $feed = null): \DateTime { - // FIXME: stub - return new \DateTime("now + 3 hours", new \DateTimeZone("UTC")); - } public function articleMatchLatest(int $feedID, int $count): Db\Result { return $this->db->prepare( diff --git a/lib/Feed.php b/lib/Feed.php index 6a600ada..7dbbe185 100644 --- a/lib/Feed.php +++ b/lib/Feed.php @@ -13,11 +13,35 @@ class Feed { public $reader; public $resource; public $modified = false; - public $lastModified = null; + public $lastModified; + public $nextFetch; public $newItems = []; public $changedItems = []; - public function __construct(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') { + public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') { + // fetch the feed + $this->download($url, $lastModified, $etag, $username, $password); + // format the HTTP Last-Modified date returned + $lastMod = $this->resource->getLastModified(); + if(strlen($lastMod)) { + $this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod); + } + $this->modified = $this->resource->isModified(); + //parse the feed, if it has been modified + if($this->modified) { + $this->parse(); + // ascertain whether there are any articles not in the database + $this->matchToDatabase($feedID); + // if caching header fields are not sent by the server, try to ascertain a last-modified date from the feed contents + if(!$this->lastModified) $this->lastModified = $this->computeLastModified(); + // we only really care if articles have been modified; if there are no new articles, act as if the feed is unchanged + if(!sizeof($this->newItems) && !sizeof($this->changedItems)) $this->modified = false; + } + // compute the time at which the feed should next be fetched + $this->nextFetch = $this->computeNextFetch(); + } + + public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool { try { $config = new Config; $config->setClientUserAgent(Data::$conf->userAgentString); @@ -25,17 +49,13 @@ class Feed { $this->reader = new Reader($config); $this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password); - $lastMod = $this->resource->getLastModified(); - if(strlen($lastMod)) { - $this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod); - } - $this->modified = $this->resource->isModified(); } catch (PicoFeedException $e) { throw new Feed\Exception($url, $e); } + return true; } - public function parse(int $feedID = null): bool { + public function parse(): bool { try { $this->parser = $this->reader->getParser( $this->resource->getUrl(), @@ -90,10 +110,6 @@ class Feed { $f->id = ''; } $this->data = $feed; - // if a feedID is supplied, determine which items are already in the database, which are not, and which might have been edited - if(!is_null($feedID)) { - $this->matchToDatabase($feedID); - } return true; } @@ -137,7 +153,7 @@ class Feed { return $out; } - protected function matchToDatabase(int $feedID): bool { + public function matchToDatabase(int $feedID): bool { // first perform deduplication on items $items = $this->deduplicateItems($this->data->items); // get as many of the latest articles in the database as there are in the feed @@ -226,21 +242,15 @@ class Feed { return true; } - public function nextFetch(): \DateTime { + public function computeNextFetch(): \DateTime { $now = new \DateTime(); if(!$this->modified) { $diff = $now->getTimestamp() - $this->lastModified->getTimestamp(); $offset = $this->normalizeDateDiff($diff); $now->modify("+".$offset); } else { - $dates = []; $offsets = []; - foreach($this->data->items as $item) { - if($item->updatedDate) $dates[] = $item->updatedDate->getTimestamp(); - if($item->publishedDate) $dates[] = $item->publishedDate->getTimestamp(); - } - $dates = array_unique($dates, \SORT_NUMERIC); - rsort($dates); + $dates = $this->gatherDates(); if(sizeof($dates) > 3) { for($a = 0; $a < 3; $a++) { $diff = $dates[$a+1] - $dates[$a]; @@ -260,6 +270,17 @@ class Feed { return $now; } + public static function nextFetchOnError($errCount): \DateTime { + if($errCount < 3) { + $offset = "5 minutes"; + } else if($errCount < 15) { + $offset = "3 hours"; + } else { + $offset = "1 day"; + } + return new \DateTime("now + ".$offset); + } + protected function normalizeDateDiff(int $diff): string { if($diff < (30 * 60)) { // less than 30 minutes $offset = "15 minutes"; @@ -274,4 +295,30 @@ class Feed { } return $offset; } + + public function computeLastModified() { + if(!$this->modified) { + return $this->lastModified; + } else { + $dates = $this->gatherDates(); + } + if(sizeof($dates)) { + $now = new \DateTime(); + $now->setTimestamp($dates[0]); + return $now; + } else { + return null; + } + } + + protected function gatherDates(): array { + $dates = []; + foreach($this->data->items as $item) { + if($item->updatedDate) $dates[] = $item->updatedDate->getTimestamp(); + if($item->publishedDate) $dates[] = $item->publishedDate->getTimestamp(); + } + $dates = array_unique($dates, \SORT_NUMERIC); + rsort($dates); + return $dates; + } } \ No newline at end of file