Arsse/lib/Feed.php

<?php
declare(strict_types=1);
namespace JKingWeb\Arsse;
use PicoFeed\Reader\Reader;
use PicoFeed\PicoFeedException;
use PicoFeed\Reader\Favicon;
use PicoFeed\Config\Config;

class Feed {
    public $data = null;
    public $favicon;
    public $parser;
    public $reader;
    public $resource;
    public $modified = false;
    public $lastModified = null;
    public $newItems = [];
    public $changedItems = [];

    public function __construct(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {
        try {
            $config = new Config;
            $config->setClientUserAgent(Data::$conf->userAgentString);
            $config->setGrabberUserAgent(Data::$conf->userAgentString);

            $this->reader = new Reader($config);
            $this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
            $lastMod = $this->resource->getLastModified();
            if(strlen($lastMod)) {
                $this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod);
            }
            $this->modified = $this->resource->isModified();
        } catch (PicoFeedException $e) {
            throw new Feed\Exception($url, $e);
        }
    }

    public function parse(int $feedID = null): bool {
        try {
            $this->parser = $this->reader->getParser(
                $this->resource->getUrl(),
                $this->resource->getContent(),
                $this->resource->getEncoding()
            );
            $feed = $this->parser->execute();

            // Grab the favicon for the feed; returns an empty string if it cannot find one.
            // Some feeds might use a different domain (eg: feedburner), so the site url is
            // used instead of the feed's url.
            $this->favicon = (new Favicon)->find($feed->siteUrl);
        } catch (PicoFeedException $e) {
            throw new Feed\Exception($url, $e);
        }

        // PicoFeed does not provide valid ids when there is no id element. Its solution
        // of hashing the url, title, and content together for the id if there is no id
        // element is stupid. Many feeds are frankenstein mixtures of Atom and RSS, but
        // some are pure RSS with guid elements while others use the Dublin Core spec for
        // identification. These feeds shouldn't be duplicated when updated. That should
        // only be reserved for severely broken feeds.

        foreach ($feed->items as $f) {
            // Hashes used for comparison to check for updates and also to identify when an
            // id doesn't exist.
            $f->urlTitleHash = hash('sha256', $f->url.$f->title);
            $f->urlContentHash = hash('sha256', $f->url.$f->content.$f->enclosureUrl.$f->enclosureType);
            $f->titleContentHash = hash('sha256', $f->title.$f->content.$f->enclosureUrl.$f->enclosureType);

            // If there is an id element then continue. The id is used already.
            $id = (string)$f->xml->id;
            if ($id !== '') {
                continue;
            }

            // If there is a guid element use it as the id.
            $id = (string)$f->xml->guid;
            if ($id !== '') {
                $f->id = hash('sha256', $id);
                continue;
            }

            // If there is a Dublin Core identifier use it.
            $id = (string)$f->xml->children('http://purl.org/dc/elements/1.1/')->identifier;
            if ($id !== '') {
                $f->id = hash('sha256', $id);
                continue;
            }

            // If there aren't any of those there is no id.
            $f->id = '';
        }
        $this->data = $feed;
        // if a feedID is supplied, determine which items are already in the database, which are not, and which might have been edited
        if(!is_null($feedID)) {
            $this->matchToDatabase($feedID);
        }
        return true;
    }

    protected function deduplicateItems(array $items): array {
        /* Rationale:
            Some newsfeeds (notably Planet) include multiple versions of an 
            item if it is updated. As we only care about the latest, we
            try to remove any "old" versions of an item that might also be 
            present within the feed.
        */
        $out = [];
        foreach($items as $item) {
            foreach($out as $index => $check) {
                // if the two items have the same ID or any one hash matches, they are two versions of the same item
                if(
                    ($item->id && $check->id && $item->id == $check->id) ||
                    $item->urlTitleHash     == $check->urlTitleHash      ||
                    $item->urlContentHash   == $check->urlContentHash    ||
                    $item->titleContentHash == $check->titleContentHash
                ) {
                    if(// because newsfeeds are usually order newest-first, the later item should only be used if...
                        // the later item has an update date and the existing item does not
                        ($item->updatedDate && !$check->updatedDate) ||
                        // the later item has an update date newer than the existing item's
                        ($item->updatedDate && $check->updatedDate && $item->updatedDate->getTimestamp() > $check->updatedDate->getTimestamp()) ||
                        // neither item has update dates, both have publish dates, and the later item has a newer publish date
                        (!$item->updatedDate && !$check->updatedDate && $item->publishedDate && $check->publishedDate && $item->publishedDate->getTimestamp() > $check->publishedDate->getTimestamp())
                    ) {
                        // if the later item should be used, replace the existing one
                        $out[$index] = $item;
                        continue 2;
                    } else {
                        // otherwise skip the item
                        continue 2;
                    }
                }
            }
            // if there was no match, add the item
            $out[] = $item;
        }
        return $out;
    }

    protected function matchToDatabase(int $feedID): bool {
        // first perform deduplication on items
        $items = $this->deduplicateItems($this->data->items);
        // get as many of the latest articles in the database as there are in the feed
        $articles = Data::$db->articleMatchLatest($feedID, sizeof($items));
        // arrays holding new, edited, and tentatively new items; items may be tentatively new because we perform two passes
        $new = $tentative = $edited = [];
        // iterate through the articles and for each determine whether it is existing, edited, or entirely new
        foreach($items as $index => $i) {
            foreach($articles as $a) {
                if(
                    // the item matches if the GUID matches...
                    ($i->id && $i->id === $a['guid']) ||
                    // ... or if any one of the hashes match
                    $i->urlTitleHash     === $a['url_title_hash']     ||
                    $i->urlContentHash   === $a['url_content_hash']   ||
                    $i->titleContentHash === $a['title_content_hash']
                ) {
                    if($i->updatedDate && $i->updatedDate->getTimestamp() !== $match['edited_date']) {
                        // if the item has an edit timestamp and it doesn't match that of the article in the database, the the article has been edited
                        // we store the item index and database record ID as a key/value pair
                        $edited[$index] = $a['id'];
                        break;
                    } else if($i->urlTitleHash !== $a['url_title_hash'] || $i->urlContentHash !== $a['url_content_hash'] || $i->titleContentHash !== $a['title_content_hash']) {
                        // if any of the hashes do not match, then the article has been edited
                        $edited[$index] = $a['id'];
                        break;
                    } else {
                        // otherwise the item is unchanged and we can ignore it
                        break;
                    }
                } else {
                    // if we don't have a match, add the item to the tentatively new list
                    $tentative[] = $index;
                }
            }
        }
        if(sizeof($tentative)) {
            // if we need to, perform a second pass on the database looking specifically for IDs and hashes of the new items
            $ids = $hashesUT = $hashesUC = $hashesTC = [];
            foreach($tentative as $index) {
                $i = $items[$index];
                if($i->id) $ids[] = $id->id;
                $hashesUT[] = $i->urlTitleHash;
                $hashesUC[] = $i->urlContentHash;
                $hashesTC[] = $i->titleContentHash;
            }
            $articles = Data::$db->articleMatchIds($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);
            foreach($tentative as $index) {
                $i = $items[$index];
                foreach($articles as $a) {
                    if(
                        // the item matches if the GUID matches...
                        ($i->id && $i->id === $a['guid']) ||
                        // ... or if any one of the hashes match
                        $i->urlTitleHash     === $a['url_title_hash']     ||
                        $i->urlContentHash   === $a['url_content_hash']   ||
                        $i->titleContentHash === $a['title_content_hash']
                    ) {
                        if($i->updatedDate && $i->updatedDate->getTimestamp() !== $match['edited_date']) {
                            // if the item has an edit timestamp and it doesn't match that of the article in the database, the the article has been edited
                            // we store the item index and database record ID as a key/value pair
                            $edited[$index] = $a['id'];
                            break;
                        } else if($i->urlTitleHash !== $a['url_title_hash'] || $i->urlContentHash !== $a['url_content_hash'] || $i->titleContentHash !== $a['title_content_hash']) {
                            // if any of the hashes do not match, then the article has been edited
                            $edited[$index] = $a['id'];
                            break;
                        } else {
                            // otherwise the item is unchanged and we can ignore it
                            break;
                        }
                    } else {
                        // if we don't have a match, add the item to the definite new list
                        $new[] = $index;
                    }
                }
            }
        }
        // FIXME: fetch full content when appropriate
        foreach($new as $index) {
            $this->newItems[] = $items[$index];
        }
        foreach($edited as $index => $id) {
            $this->changedItems[$id] = $items[$index];
        }
        return true;
    }

    public function nextFetch(): \DateTime {
        if(!$this->modified) {
            $now = time();
            $diff = $now - $this->lastModified->getTimestamp();
            if($diff < (30 * 60)) { // less than 30 minutes
                $offset = "15 minutes";
            } else if($diff < (60 * 60)) { // less than an hour
                $offset = "30 minutes";
            } else if($diff < (3 * 60 * 60)) { // less than three hours
                $offset = "1 hour";
            } else if($diff > (36 * 60 * 60)) { // more than 36 hours
                $offset = "1 day";
            } else {
                $offset = "3 hours";
            }
            $t = new \DateTime();
            $t->setTimestamp($now);
            $t->modify("+".$offset);
            return $t;
        } else {
            // FIXME: implement algorithm to use when a feed has been updated
            return new \DateTime("now + 3 hours");
        }
    }
}
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`<?php`
New UA string • Now Arsse exposes itself properly • Conf->userAgentString exists to allow for specifying the string through configuration 2017-03-28 23:19:12 +00:00			`declare(strict_types=1);`
Changed "NewsSync" to "Arsse" 2017-03-28 04:12:12 +00:00			`namespace JKingWeb\Arsse;`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`use PicoFeed\Reader\Reader;`
			`use PicoFeed\PicoFeedException;`
			`use PicoFeed\Reader\Favicon;`
New UA string • Now Arsse exposes itself properly • Conf->userAgentString exists to allow for specifying the string through configuration 2017-03-28 23:19:12 +00:00			`use PicoFeed\Config\Config;`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00
			`class Feed {`
Added Feed Updating • Started implementing feed updating (Database->updateFeeds()) • Moved hashing to the Feed object, now done when parsing • Moved adding of articles to the database to its own method (Database->articleAdd()) 2017-03-26 20:16:15 +00:00			`public $data = null;`
			`public $favicon;`
			`public $parser;`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`public $reader;`
			`public $resource;`
Implement part of adaptive update interval; improves #51 Implements part of algorithm used when a feed has not been updated; this is much simpler than when a feed has been modified 2017-04-25 01:51:56 +00:00			`public $modified = false;`
			`public $lastModified = null;`
Move feed item change detection to Feed class 2017-04-23 03:40:57 +00:00			`public $newItems = [];`
			`public $changedItems = [];`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00
Added Feed Updating • Started implementing feed updating (Database->updateFeeds()) • Moved hashing to the Feed object, now done when parsing • Moved adding of articles to the database to its own method (Database->articleAdd()) 2017-03-26 20:16:15 +00:00			`public function __construct(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`try {`
New UA string • Now Arsse exposes itself properly • Conf->userAgentString exists to allow for specifying the string through configuration 2017-03-28 23:19:12 +00:00			`$config = new Config;`
			`$config->setClientUserAgent(Data::$conf->userAgentString);`
			`$config->setGrabberUserAgent(Data::$conf->userAgentString);`

			`$this->reader = new Reader($config);`
Fix adding a feed Also start on handling the v1-2 API 2017-04-01 19:42:10 +00:00			`$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);`
Implement part of adaptive update interval; improves #51 Implements part of algorithm used when a feed has not been updated; this is much simpler than when a feed has been modified 2017-04-25 01:51:56 +00:00			`$lastMod = $this->resource->getLastModified();`
			`if(strlen($lastMod)) {`
			`$this->$lastModified = \DateTime::createFromFormat("!D, d M Y H:i:s e", $lastMod);`
			`}`
			`$this->modified = $this->resource->isModified();`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`} catch (PicoFeedException $e) {`
			`throw new Feed\Exception($url, $e);`
			`}`
			`}`

Move feed item change detection to Feed class 2017-04-23 03:40:57 +00:00			`public function parse(int $feedID = null): bool {`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`try {`
			`$this->parser = $this->reader->getParser(`
Fix adding a feed Also start on handling the v1-2 API 2017-04-01 19:42:10 +00:00			`$this->resource->getUrl(),`
			`$this->resource->getContent(),`
			`$this->resource->getEncoding()`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`);`
			`$feed = $this->parser->execute();`
Grab favicon from site's URL, not the feed's • Changed Feed to where the favicon is gotten when parsing so the feed’s site URL is used instead of the feed’s for favicons. Some feeds don’t have the same domain. • Fixed a bug in Database. 2017-04-03 02:23:15 +00:00
			`// Grab the favicon for the feed; returns an empty string if it cannot find one.`
			`// Some feeds might use a different domain (eg: feedburner), so the site url is`
			`// used instead of the feed's url.`
Fix adding a subscription (again) Also ensured HTTP last modified date is parsed properly. 2017-04-07 00:50:47 +00:00			`$this->favicon = (new Favicon)->find($feed->siteUrl);`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`} catch (PicoFeedException $e) {`
			`throw new Feed\Exception($url, $e);`
			`}`

			`// PicoFeed does not provide valid ids when there is no id element. Its solution`
			`// of hashing the url, title, and content together for the id if there is no id`
			`// element is stupid. Many feeds are frankenstein mixtures of Atom and RSS, but`
			`// some are pure RSS with guid elements while others use the Dublin Core spec for`
			`// identification. These feeds shouldn't be duplicated when updated. That should`
			`// only be reserved for severely broken feeds.`

Move feed item change detection to Feed class 2017-04-23 03:40:57 +00:00			`foreach ($feed->items as $f) {`
Added Feed Updating • Started implementing feed updating (Database->updateFeeds()) • Moved hashing to the Feed object, now done when parsing • Moved adding of articles to the database to its own method (Database->articleAdd()) 2017-03-26 20:16:15 +00:00			`// Hashes used for comparison to check for updates and also to identify when an`
			`// id doesn't exist.`
Fix adding a feed Also start on handling the v1-2 API 2017-04-01 19:42:10 +00:00			`$f->urlTitleHash = hash('sha256', $f->url.$f->title);`
			`$f->urlContentHash = hash('sha256', $f->url.$f->content.$f->enclosureUrl.$f->enclosureType);`
			`$f->titleContentHash = hash('sha256', $f->title.$f->content.$f->enclosureUrl.$f->enclosureType);`
Added Feed Updating • Started implementing feed updating (Database->updateFeeds()) • Moved hashing to the Feed object, now done when parsing • Moved adding of articles to the database to its own method (Database->articleAdd()) 2017-03-26 20:16:15 +00:00
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`// If there is an id element then continue. The id is used already.`
			`$id = (string)$f->xml->id;`
			`if ($id !== '') {`
			`continue;`
			`}`

			`// If there is a guid element use it as the id.`
			`$id = (string)$f->xml->guid;`
			`if ($id !== '') {`
			`$f->id = hash('sha256', $id);`
			`continue;`
			`}`

			`// If there is a Dublin Core identifier use it.`
			`$id = (string)$f->xml->children('http://purl.org/dc/elements/1.1/')->identifier;`
			`if ($id !== '') {`
			`$f->id = hash('sha256', $id);`
			`continue;`
			`}`

Added Feed Updating • Started implementing feed updating (Database->updateFeeds()) • Moved hashing to the Feed object, now done when parsing • Moved adding of articles to the database to its own method (Database->articleAdd()) 2017-03-26 20:16:15 +00:00			`// If there aren't any of those there is no id.`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`$f->id = '';`
			`}`
Deduplicate feed items within the feed itself 2017-04-23 17:12:33 +00:00			`$this->data = $feed;`
Move feed item change detection to Feed class 2017-04-23 03:40:57 +00:00			`// if a feedID is supplied, determine which items are already in the database, which are not, and which might have been edited`
			`if(!is_null($feedID)) {`
Deduplicate feed items within the feed itself 2017-04-23 17:12:33 +00:00			`$this->matchToDatabase($feedID);`
			`}`
			`return true;`
			`}`

			`protected function deduplicateItems(array $items): array {`
			`/* Rationale:`
			`Some newsfeeds (notably Planet) include multiple versions of an`
			`item if it is updated. As we only care about the latest, we`
			`try to remove any "old" versions of an item that might also be`
			`present within the feed.`
			`*/`
			`$out = [];`
			`foreach($items as $item) {`
			`foreach($out as $index => $check) {`
			`// if the two items have the same ID or any one hash matches, they are two versions of the same item`
			`if(`
			`($item->id && $check->id && $item->id == $check->id) \|\|`
			`$item->urlTitleHash == $check->urlTitleHash \|\|`
			`$item->urlContentHash == $check->urlContentHash \|\|`
			`$item->titleContentHash == $check->titleContentHash`
			`) {`
			`if(// because newsfeeds are usually order newest-first, the later item should only be used if...`
			`// the later item has an update date and the existing item does not`
			`($item->updatedDate && !$check->updatedDate) \|\|`
			`// the later item has an update date newer than the existing item's`
			`($item->updatedDate && $check->updatedDate && $item->updatedDate->getTimestamp() > $check->updatedDate->getTimestamp()) \|\|`
			`// neither item has update dates, both have publish dates, and the later item has a newer publish date`
			`(!$item->updatedDate && !$check->updatedDate && $item->publishedDate && $check->publishedDate && $item->publishedDate->getTimestamp() > $check->publishedDate->getTimestamp())`
			`) {`
			`// if the later item should be used, replace the existing one`
			`$out[$index] = $item;`
			`continue 2;`
			`} else {`
			`// otherwise skip the item`
			`continue 2;`
			`}`
			`}`
			`}`
			`// if there was no match, add the item`
			`$out[] = $item;`
			`}`
			`return $out;`
			`}`

			`protected function matchToDatabase(int $feedID): bool {`
			`// first perform deduplication on items`
			`$items = $this->deduplicateItems($this->data->items);`
			`// get as many of the latest articles in the database as there are in the feed`
			`$articles = Data::$db->articleMatchLatest($feedID, sizeof($items));`
			`// arrays holding new, edited, and tentatively new items; items may be tentatively new because we perform two passes`
			`$new = $tentative = $edited = [];`
			`// iterate through the articles and for each determine whether it is existing, edited, or entirely new`
			`foreach($items as $index => $i) {`
			`foreach($articles as $a) {`
			`if(`
			`// the item matches if the GUID matches...`
			`($i->id && $i->id === $a['guid']) \|\|`
			`// ... or if any one of the hashes match`
			`$i->urlTitleHash === $a['url_title_hash'] \|\|`
			`$i->urlContentHash === $a['url_content_hash'] \|\|`
			`$i->titleContentHash === $a['title_content_hash']`
			`) {`
			`if($i->updatedDate && $i->updatedDate->getTimestamp() !== $match['edited_date']) {`
			`// if the item has an edit timestamp and it doesn't match that of the article in the database, the the article has been edited`
			`// we store the item index and database record ID as a key/value pair`
			`$edited[$index] = $a['id'];`
			`break;`
			`} else if($i->urlTitleHash !== $a['url_title_hash'] \|\| $i->urlContentHash !== $a['url_content_hash'] \|\| $i->titleContentHash !== $a['title_content_hash']) {`
			`// if any of the hashes do not match, then the article has been edited`
			`$edited[$index] = $a['id'];`
			`break;`
			`} else {`
			`// otherwise the item is unchanged and we can ignore it`
			`break;`
			`}`
			`} else {`
			`// if we don't have a match, add the item to the tentatively new list`
			`$tentative[] = $index;`
			`}`
			`}`
			`}`
			`if(sizeof($tentative)) {`
			`// if we need to, perform a second pass on the database looking specifically for IDs and hashes of the new items`
			`$ids = $hashesUT = $hashesUC = $hashesTC = [];`
			`foreach($tentative as $index) {`
			`$i = $items[$index];`
			`if($i->id) $ids[] = $id->id;`
			`$hashesUT[] = $i->urlTitleHash;`
			`$hashesUC[] = $i->urlContentHash;`
			`$hashesTC[] = $i->titleContentHash;`
			`}`
			`$articles = Data::$db->articleMatchIds($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);`
			`foreach($tentative as $index) {`
			`$i = $items[$index];`
Move feed item change detection to Feed class 2017-04-23 03:40:57 +00:00			`foreach($articles as $a) {`
			`if(`
			`// the item matches if the GUID matches...`
			`($i->id && $i->id === $a['guid']) \|\|`
			`// ... or if any one of the hashes match`
			`$i->urlTitleHash === $a['url_title_hash'] \|\|`
			`$i->urlContentHash === $a['url_content_hash'] \|\|`
			`$i->titleContentHash === $a['title_content_hash']`
			`) {`
			`if($i->updatedDate && $i->updatedDate->getTimestamp() !== $match['edited_date']) {`
			`// if the item has an edit timestamp and it doesn't match that of the article in the database, the the article has been edited`
			`// we store the item index and database record ID as a key/value pair`
			`$edited[$index] = $a['id'];`
			`break;`
			`} else if($i->urlTitleHash !== $a['url_title_hash'] \|\| $i->urlContentHash !== $a['url_content_hash'] \|\| $i->titleContentHash !== $a['title_content_hash']) {`
			`// if any of the hashes do not match, then the article has been edited`
			`$edited[$index] = $a['id'];`
			`break;`
			`} else {`
			`// otherwise the item is unchanged and we can ignore it`
			`break;`
			`}`
			`} else {`
Deduplicate feed items within the feed itself 2017-04-23 17:12:33 +00:00			`// if we don't have a match, add the item to the definite new list`
			`$new[] = $index;`
Move feed item change detection to Feed class 2017-04-23 03:40:57 +00:00			`}`
			`}`
			`}`
			`}`
Deduplicate feed items within the feed itself 2017-04-23 17:12:33 +00:00			`// FIXME: fetch full content when appropriate`
			`foreach($new as $index) {`
			`$this->newItems[] = $items[$index];`
			`}`
			`foreach($edited as $index => $id) {`
			`$this->changedItems[$id] = $items[$index];`
			`}`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`return true;`
			`}`
Implement part of adaptive update interval; improves #51 Implements part of algorithm used when a feed has not been updated; this is much simpler than when a feed has been modified 2017-04-25 01:51:56 +00:00
			`public function nextFetch(): \DateTime {`
			`if(!$this->modified) {`
			`$now = time();`
			`$diff = $now - $this->lastModified->getTimestamp();`
			`if($diff < (30 * 60)) { // less than 30 minutes`
			`$offset = "15 minutes";`
			`} else if($diff < (60 * 60)) { // less than an hour`
			`$offset = "30 minutes";`
			`} else if($diff < (3 * 60 * 60)) { // less than three hours`
			`$offset = "1 hour";`
			`} else if($diff > (36 * 60 * 60)) { // more than 36 hours`
			`$offset = "1 day";`
			`} else {`
			`$offset = "3 hours";`
			`}`
			`$t = new \DateTime();`
			`$t->setTimestamp($now);`
			`$t->modify("+".$offset);`
			`return $t;`
			`} else {`
			`// FIXME: implement algorithm to use when a feed has been updated`
			`return new \DateTime("now + 3 hours");`
			`}`
			`}`
Added picoFeed wrapper • Implemented a simple wrapper for picoFeed which fixes the id problems and keeps error handling within its own class • Updated Database.php to use the new class • Replaced mentions of ownCloud with NextCloud in the schema • Added hashes to schema for identification and change detection; removed NextCloud hash and fingerprint; removed enclosure and category hashes 2017-03-18 16:01:23 +00:00			`}`