mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 13:12:41 +00:00
Start on refactoring of feed processing
This commit is contained in:
parent
557d17ef5d
commit
4e57e56ca7
3 changed files with 149 additions and 161 deletions
286
lib/Database.php
286
lib/Database.php
|
@ -387,52 +387,16 @@ class Database {
|
|||
}
|
||||
|
||||
public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = ""): int {
|
||||
// If the user isn't authorized to perform this action then throw an exception.
|
||||
if(!Data::$user->authorize($user, __FUNCTION__)) {
|
||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
if(!Data::$user->authorize($user, __FUNCTION__)) throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||
if(!$this->userExists($user)) throw new User\Exception("doesNotExist", ["user" => $user, "action" => __FUNCTION__]);
|
||||
// If the feed doesn't already exist in the database then add it to the database
|
||||
// after determining its validity with PicoFeed.
|
||||
$feedID = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str")->run($url, $fetchUser, $fetchPassword)->getValue();
|
||||
if($feedID === null) {
|
||||
$feedID = $this->feedAdd($url, $fetchUser, $fetchPassword);
|
||||
}
|
||||
// If the user doesn't exist throw an exception.
|
||||
if(!$this->userExists($user)) {
|
||||
throw new User\Exception("doesNotExist", ["user" => $user, "action" => __FUNCTION__]);
|
||||
}
|
||||
$this->db->begin();
|
||||
try {
|
||||
// If the feed doesn't already exist in the database then add it to the database
|
||||
// after determining its validity with PicoFeed.
|
||||
$feedID = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str")->run($url, $fetchUser, $fetchPassword)->getValue();
|
||||
if($feedID === null) {
|
||||
$feed = new Feed($url);
|
||||
$feed->parse();
|
||||
// Add the feed to the database and return its Id which will be used when adding
|
||||
// its articles to the database.
|
||||
$feedID = $this->db->prepare(
|
||||
'INSERT INTO arsse_feeds(url,title,favicon,source,updated,modified,etag,username,password) values(?,?,?,?,?,?,?,?,?)',
|
||||
'str', 'str', 'str', 'str', 'datetime', 'datetime', 'str', 'str', 'str'
|
||||
)->run(
|
||||
$url,
|
||||
$feed->data->title,
|
||||
// Grab the favicon for the feed; returns an empty string if it cannot find one.
|
||||
$feed->favicon,
|
||||
$feed->data->siteUrl,
|
||||
$feed->data->date,
|
||||
\DateTime::createFromFormat("!D, d M Y H:i:s e", $feed->resource->getLastModified()),
|
||||
$feed->resource->getEtag(),
|
||||
$fetchUser,
|
||||
$fetchPassword
|
||||
)->lastId();
|
||||
// Add each of the articles to the database.
|
||||
foreach($feed->data->items as $i) {
|
||||
$this->articleAdd($feedID, $i);
|
||||
}
|
||||
}
|
||||
// Add the feed to the user's subscriptions.
|
||||
$sub = $this->db->prepare('INSERT INTO arsse_subscriptions(owner,feed) values(?,?)', 'str', 'int')->run($user, $feedID)->lastId();
|
||||
} catch(\Throwable $e) {
|
||||
$this->db->rollback();
|
||||
throw $e;
|
||||
}
|
||||
$this->db->commit();
|
||||
return $sub;
|
||||
// Add the feed to the user's subscriptions.
|
||||
return $this->db->prepare('INSERT INTO arsse_subscriptions(owner,feed) values(?,?)', 'str', 'int')->run($user, $feedID)->lastId();
|
||||
}
|
||||
|
||||
public function subscriptionRemove(string $user, int $id): bool {
|
||||
|
@ -440,6 +404,131 @@ class Database {
|
|||
return (bool) $this->db->prepare("DELETE from arsse_subscriptions where owner is ? and id is ?", "str", "int")->run($user, $id)->changes();
|
||||
}
|
||||
|
||||
public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = ""): int {
|
||||
$feed = new Feed($url, "", "", $fetchUser, $fetchPassword);
|
||||
$feed->parse();
|
||||
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
|
||||
// Add the feed to the database and return its Id which will be used when adding
|
||||
// its articles to the database.
|
||||
try {
|
||||
$this->feedUpdate($feedID, $feed);
|
||||
} catch(\Throwable $e) {
|
||||
$this->db->prepare('DELETE from arsse_feeds where id is ?', 'int')->run($feedID);
|
||||
throw $e;
|
||||
}
|
||||
return $feedID;
|
||||
}
|
||||
|
||||
public function feedUpdate(int $feedID, Feed $feed = null): bool {
|
||||
$this->db->begin();
|
||||
try {
|
||||
// upon the very first update of a feed the $feed object is already supplied and already parsed; for all other updates we must parse it ourselves here
|
||||
if(!$feed) {
|
||||
$f = $this->db->prepare('SELECT url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag FROM arsse_feeds where id is ?', "int")->run($feedID)->getRow();
|
||||
if(!$f) throw new Db\ExceptionInput("idMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]);
|
||||
// Feed object throws an exception when there are problems, but that isn't ideal
|
||||
// here. When an exception is occurred it should update the database with the
|
||||
// error instead of failing.
|
||||
try {
|
||||
$feed = new Feed($f['url'], $f['lastmodified'], $f['etag'], $f['username'], $f['password']);
|
||||
if($feed->resource->isModified()) {
|
||||
$feed->parse();
|
||||
} else {
|
||||
$this->db->rollback();
|
||||
return false;
|
||||
}
|
||||
} catch (Feed\Exception $e) {
|
||||
$this->db->prepare('UPDATE arsse_feeds SET err_count = err_count + 1, err_msg = ? WHERE id is ?', 'str', 'int')->run($e->getMessage(),$feedID);
|
||||
$this->db->commit();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
$articles = $this->db->prepare('SELECT id, url, title, author, DATEFORMAT("http", edited) AS edited_date, guid, content, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY id', 'int')->run($feedID)->getAll();
|
||||
|
||||
foreach($feed->data->items as $i) {
|
||||
// Iterate through the articles in the database to determine a match for the one
|
||||
// in the just-parsed feed.
|
||||
$match = null;
|
||||
foreach($articles as $a) {
|
||||
// If the id exists and is equal to one in the database then this is the post.
|
||||
if($i->id) {
|
||||
if($i->id === $a['guid']) {
|
||||
$match = $a;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise if the id doesn't exist and any of the hashes match then this is
|
||||
// the post.
|
||||
elseif($i->urlTitleHash === $a['url_title_hash'] || $i->urlContentHash === $a['url_content_hash'] || $i->titleContentHash === $a['title_content_hash']) {
|
||||
$match = $a;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is no match then this is a new post and must be added to the
|
||||
// database.
|
||||
if(!$match) {
|
||||
$this->articleAdd($feedID, $i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// With that out of the way determine if the post has been updated.
|
||||
// If there is an updated date, and it doesn't match the database's then update
|
||||
// the post.
|
||||
$update = false;
|
||||
if($i->updatedDate) {
|
||||
if($i->updatedDate !== $match['edited_date']) {
|
||||
$update = true;
|
||||
}
|
||||
}
|
||||
// Otherwise if there isn't an updated date and any of the hashes don't match
|
||||
// then update the post.
|
||||
elseif($i->urlTitleHash !== $match['url_title_hash'] || $i->urlContentHash !== $match['url_content_hash'] || $i->titleContentHash !== $match['title_content_hash']) {
|
||||
$update = true;
|
||||
}
|
||||
|
||||
if($update) {
|
||||
$this->db->prepare(
|
||||
'UPDATE arsse_articles SET url = ?, title = ?, author = ?, published = ?, edited = ?, modified = CURRENT_TIMESTAMP, guid = ?, content = ?, url_title_hash = ?, url_content_hash = ?, title_content_hash = ? WHERE id is ?',
|
||||
'str', 'str', 'str', 'datetime', 'datetime', 'str', 'str', 'str', 'str', 'str', 'int'
|
||||
)->run(
|
||||
$i->url,
|
||||
$i->title,
|
||||
$i->author,
|
||||
$i->publishedDate,
|
||||
$i->updatedDate,
|
||||
$i->id,
|
||||
$i->content,
|
||||
$i->urlTitleHash,
|
||||
$i->urlContentHash,
|
||||
$i->titleContentHash,
|
||||
$match['id']
|
||||
);
|
||||
|
||||
// If the article has categories update them.
|
||||
$this->db->prepare('DELETE FROM arsse_categories WHERE article is ?', 'int')->run($match['id']);
|
||||
$this->categoriesAdd($i, $match['id']);
|
||||
}
|
||||
}
|
||||
|
||||
// Lastly update the feed database itself with updated information.
|
||||
$this->db->prepare('UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = ?, modified = ?, etag = ?, err_count = 0, err_msg = "" WHERE id is ?', 'str', 'str', 'str', 'str', 'datetime', 'datetime', 'str', 'int')->run(
|
||||
$feed->data->feedUrl,
|
||||
$feed->data->title,
|
||||
$feed->favicon,
|
||||
$feed->data->siteUrl,
|
||||
$feed->data->date,
|
||||
\DateTime::createFromFormat("!D, d M Y H:i:s e", $feed->resource->getLastModified()),
|
||||
$feed->resource->getEtag(),
|
||||
$feedID
|
||||
);
|
||||
} catch(\Throwable $e) {
|
||||
$this->db->rollback();
|
||||
throw $e;
|
||||
}
|
||||
$this->db->commit();
|
||||
return true;
|
||||
}
|
||||
|
||||
public function articleAdd(int $feedID, \PicoFeed\Parser\Item $article): int {
|
||||
$this->db->begin();
|
||||
try {
|
||||
|
@ -485,111 +574,4 @@ class Database {
|
|||
$this->db->commit();
|
||||
return count($categories);
|
||||
}
|
||||
|
||||
public function updateFeeds(): int {
|
||||
$feeds = $this->db->query('SELECT id, url, username, password, DATEFORMAT("http", modified) AS lastmodified, etag FROM arsse_feeds')->getAll();
|
||||
foreach($feeds as $f) {
|
||||
// Feed object throws an exception when there are problems, but that isn't ideal
|
||||
// here. When an exception is occurred it should update the database with the
|
||||
// error instead of failing.
|
||||
try {
|
||||
$feed = new Feed($f['url'], $f['lastmodified'], $f['etag'], $f['username'], $f['password']);
|
||||
} catch (Feed\Exception $e) {
|
||||
$this->db->prepare('UPDATE arsse_feeds SET err_count = err_count + 1, err_msg = ? WHERE id is ?', 'str', 'int')->run(
|
||||
$e->getMessage(),
|
||||
$f['id']
|
||||
);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the feed has been updated then update the database.
|
||||
if($feed->resource->isModified()) {
|
||||
$feed->parse();
|
||||
|
||||
$this->db->begin();
|
||||
$articles = $this->db->prepare('SELECT id, url, title, author, DATEFORMAT("http", edited) AS edited_date, guid, content, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed is ? ORDER BY id', 'int')->run($f['id'])->getAll();
|
||||
|
||||
foreach($feed->data->items as $i) {
|
||||
// Iterate through the articles in the database to determine a match for the one
|
||||
// in the just-parsed feed.
|
||||
$match = null;
|
||||
foreach($articles as $a) {
|
||||
// If the id exists and is equal to one in the database then this is the post.
|
||||
if($i->id) {
|
||||
if($i->id === $a['guid']) {
|
||||
$match = $a;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise if the id doesn't exist and any of the hashes match then this is
|
||||
// the post.
|
||||
elseif($i->urlTitleHash === $a['url_title_hash'] || $i->urlContentHash === $a['url_content_hash'] || $i->titleContentHash === $a['title_content_hash']) {
|
||||
$match = $a;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is no match then this is a new post and must be added to the
|
||||
// database.
|
||||
if(!$match) {
|
||||
$this->articleAdd($i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// With that out of the way determine if the post has been updated.
|
||||
// If there is an updated date, and it doesn't match the database's then update
|
||||
// the post.
|
||||
$update = false;
|
||||
if($i->updatedDate) {
|
||||
if($i->updatedDate !== $match['edited_date']) {
|
||||
$update = true;
|
||||
}
|
||||
}
|
||||
// Otherwise if there isn't an updated date and any of the hashes don't match
|
||||
// then update the post.
|
||||
elseif($i->urlTitleHash !== $match['url_title_hash'] || $i->urlContentHash !== $match['url_content_hash'] || $i->titleContentHash !== $match['title_content_hash']) {
|
||||
$update = true;
|
||||
}
|
||||
|
||||
if($update) {
|
||||
$this->db->prepare(
|
||||
'UPDATE arsse_articles SET url = ?, title = ?, author = ?, published = ?, edited = ?, modified = CURRENT_TIMESTAMP, guid = ?, content = ?, url_title_hash = ?, url_content_hash = ?, title_content_hash = ? WHERE id is ?',
|
||||
'str', 'str', 'str', 'datetime', 'datetime', 'str', 'str', 'str', 'str', 'str', 'int'
|
||||
)->run(
|
||||
$i->url,
|
||||
$i->title,
|
||||
$i->author,
|
||||
$i->publishedDate,
|
||||
$i->updatedDate,
|
||||
$i->id,
|
||||
$i->content,
|
||||
$i->urlTitleHash,
|
||||
$i->urlContentHash,
|
||||
$i->titleContentHash,
|
||||
$match['id']
|
||||
);
|
||||
|
||||
// If the article has categories update them.
|
||||
$this->db->prepare('DELETE FROM arsse_categories WHERE article is ?', 'int')->run($match['id']);
|
||||
$this->categoriesAdd($i, $match['id']);
|
||||
}
|
||||
}
|
||||
|
||||
// Lastly update the feed database itself with updated information.
|
||||
$this->db->prepare('UPDATE arsse_feeds SET url = ?, title = ?, favicon = ?, source = ?, updated = ?, modified = ?, etag = ?, err_count = 0, err_msg = "" WHERE id is ?', 'str', 'str', 'str', 'str', 'datetime', 'datetime', 'str', 'int')->run(
|
||||
$feed->feedUrl,
|
||||
$feed->title,
|
||||
$feed->favicon,
|
||||
$feed->siteUrl,
|
||||
$feed->date,
|
||||
$feed->resource->getLastModified(),
|
||||
$feed->resource->getEtag(),
|
||||
$f['id']
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$this->db->commit();
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -79,7 +79,7 @@ return [
|
|||
other {Authenticated user is not authorized to perform the action "{action}" on behalf of {user}}
|
||||
}',
|
||||
'Exception.JKingWeb/Arsse/Feed/Exception.invalidCertificate' => 'Could not download feed "{url}" because its server is serving an invalid SSL certificate',
|
||||
'Exception.JKingWeb/Arsse/Feed/Exception.invalidURL' => 'Feed URL "{url}" is invalid',
|
||||
'Exception.JKingWeb/Arsse/Feed/Exception.invalidUrl' => 'Feed URL "{url}" is invalid',
|
||||
'Exception.JKingWeb/Arsse/Feed/Exception.maxRedirect' => 'Could not download feed "{url}" because its server reached its maximum number of HTTP redirections',
|
||||
'Exception.JKingWeb/Arsse/Feed/Exception.maxSize' => 'Could not download feed "{url}" because its size exceeds the maximum allowed on its server',
|
||||
'Exception.JKingWeb/Arsse/Feed/Exception.timeout' => 'Could not download feed "{url}" because its server timed out',
|
||||
|
|
|
@ -20,14 +20,14 @@ create table arsse_users(
|
|||
|
||||
-- newsfeeds, deduplicated
|
||||
create table arsse_feeds(
|
||||
id integer primary key not null, -- sequence number
|
||||
id integer primary key, -- sequence number
|
||||
url TEXT not null, -- URL of feed
|
||||
title TEXT, -- default title of feed
|
||||
favicon TEXT, -- URL of favicon
|
||||
source TEXT, -- URL of site to which the feed belongs
|
||||
updated datetime, -- time at which the feed was last fetched
|
||||
modified datetime, -- time at which the feed last actually changed
|
||||
etag TEXT, -- HTTP ETag hash used for cache validation, changes each time the content changes
|
||||
etag TEXT not null default '', -- HTTP ETag hash used for cache validation, changes each time the content changes
|
||||
err_count integer not null default 0, -- count of successive times update resulted in error since last successful update
|
||||
err_msg TEXT, -- last error message
|
||||
username TEXT not null default '', -- HTTP authentication username
|
||||
|
@ -37,7 +37,7 @@ create table arsse_feeds(
|
|||
|
||||
-- users' subscriptions to newsfeeds, with settings
|
||||
create table arsse_subscriptions(
|
||||
id integer primary key not null, -- sequence number
|
||||
id integer primary key, -- sequence number
|
||||
owner TEXT not null references arsse_users(id) on delete cascade on update cascade, -- owner of subscription
|
||||
feed integer not null references arsse_feeds(id) on delete cascade, -- feed for the subscription
|
||||
added datetime not null default CURRENT_TIMESTAMP, -- time at which feed was added
|
||||
|
@ -51,7 +51,7 @@ create table arsse_subscriptions(
|
|||
|
||||
-- TT-RSS categories and NextCloud folders
|
||||
create table arsse_folders(
|
||||
id integer primary key not null, -- sequence number
|
||||
id integer primary key, -- sequence number
|
||||
owner TEXT not null references arsse_users(id) on delete cascade on update cascade, -- owner of folder
|
||||
parent integer references arsse_folders(id) on delete cascade, -- parent folder id
|
||||
name TEXT not null, -- folder name
|
||||
|
@ -61,7 +61,7 @@ create table arsse_folders(
|
|||
|
||||
-- entries in newsfeeds
|
||||
create table arsse_articles(
|
||||
id integer primary key not null, -- sequence number
|
||||
id integer primary key, -- sequence number
|
||||
feed integer not null references arsse_feeds(id) on delete cascade, -- feed for the subscription
|
||||
url TEXT not null, -- URL of article
|
||||
title TEXT, -- article title
|
||||
|
@ -85,17 +85,23 @@ create table arsse_enclosures(
|
|||
|
||||
-- users' actions on newsfeed entries
|
||||
create table arsse_subscription_articles(
|
||||
id integer primary key not null,
|
||||
id integer primary key,
|
||||
article integer not null references arsse_articles(id) on delete cascade,
|
||||
owner TEXT not null references arsse_users(id) on delete cascade on update cascade,
|
||||
read boolean not null default 0,
|
||||
starred boolean not null default 0,
|
||||
modified datetime not null default CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- IDs for specific editions of articles (required for at least NextCloud News)
|
||||
create table arsse_editions(
|
||||
id integer primary key,
|
||||
article integer not null references arsse_articles(id) on delete cascade
|
||||
);
|
||||
|
||||
-- user labels associated with newsfeed entries
|
||||
create table arsse_labels(
|
||||
sub_article integer not null references arsse_subscription_articles(id) on delete cascade, --
|
||||
owner TEXT not null references arsse_users(id) on delete cascade on update cascade,
|
||||
sub_article integer not null references arsse_subscription_articles(id) on delete cascade,
|
||||
name TEXT
|
||||
);
|
||||
create index arsse_label_names on arsse_labels(name);
|
||||
|
|
Loading…
Reference in a new issue