diff --git a/lib/Database.php b/lib/Database.php index ea70d953..a69d2465 100644 --- a/lib/Database.php +++ b/lib/Database.php @@ -898,6 +898,7 @@ class Database { * - "title": The title of the subscription * - "folder": The numeric identifier (or null) of the subscription's folder * - "pinned": Whether the subscription is pinned + * - "scrape": Whether to scrape full article contents from the HTML article * - "order_type": Whether articles should be sorted in reverse cronological order (2), chronological order (1), or the default (0) * - "keep_rule": The subscription's "keep" filter rule; articles which do not match this are hidden * - "block_rule": The subscription's "block" filter rule; articles which match this are hidden @@ -948,6 +949,7 @@ class Database { 'pinned' => "strict bool", 'keep_rule' => "str", 'block_rule' => "str", + 'scrape' => "bool", ]; [$setClause, $setTypes, $setValues] = $this->generateSet($data, $valid); if (!$setClause) { diff --git a/sql/MySQL/6.sql b/sql/MySQL/6.sql index 7d9eb128..789900ef 100644 --- a/sql/MySQL/6.sql +++ b/sql/MySQL/6.sql @@ -33,6 +33,7 @@ create table arsse_user_meta( ) character set utf8mb4 collate utf8mb4_unicode_ci; alter table arsse_subscriptions add column scrape boolean not null default 0; +update arsse_subscriptions set scrape = 1 where feed in (select id from arsse_feeds where scrape = 1); alter table arsse_feeds drop column scrape; alter table arsse_articles add column content_scraped longtext; diff --git a/sql/PostgreSQL/6.sql b/sql/PostgreSQL/6.sql index 825f67de..0f559a87 100644 --- a/sql/PostgreSQL/6.sql +++ b/sql/PostgreSQL/6.sql @@ -33,6 +33,7 @@ create table arsse_user_meta( ); alter table arsse_subscriptions add column scrape smallint not null default 0; +update arsse_subscriptions set scrape = 1 where feed in (select id from arsse_feeds where scrape = 1); alter table arsse_feeds drop column scrape; alter table arsse_articles add column content_scraped text; diff --git a/sql/SQLite3/6.sql b/sql/SQLite3/6.sql index e43c4ea3..2be4fed5 100644 --- a/sql/SQLite3/6.sql +++ b/sql/SQLite3/6.sql @@ -44,8 +44,31 @@ create table arsse_user_meta( primary key(owner,key) ) without rowid; --- Add a "scrape" column for subscriptions +-- Add a "scrape" column for subscriptions and copy any existing scraping alter table arsse_subscriptions add column scrape boolean not null default 0; +update arsse_subscriptions set scrape = 1 where feed in (select id from arsse_feeds where scrape = 1); + +-- Add a column for scraped article content, and re-order some columns +create table arsse_articles_new( +-- entries in newsfeeds + id integer primary key, -- sequence number + feed integer not null references arsse_feeds(id) on delete cascade, -- feed for the subscription + url text, -- URL of article + title text collate nocase, -- article title + author text collate nocase, -- author's name + published text, -- time of original publication + edited text, -- time of last edit by author + modified text not null default CURRENT_TIMESTAMP, -- time when article was last modified in database + guid text, -- GUID + url_title_hash text not null, -- hash of URL + title; used when checking for updates and for identification if there is no guid. + url_content_hash text not null, -- hash of URL + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid. + title_content_hash text not null, -- hash of title + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid. + content_scraped text, -- scraped content, as HTML + content text -- content, as HTML +); +insert into arsse_articles_new select id, feed, url, title, author, published, edited, modified, guid, url_title_hash, url_content_hash, title_content_hash, null, content from arsse_articles; +drop table arsse_articles; +alter table arsse_articles_new rename to arsse_articles; -- Add a separate table for feed icons and replace their URLs in the feeds table with their IDs -- Also remove the "scrape" column of the feeds table, which was never an advertised feature @@ -88,28 +111,6 @@ insert into arsse_feeds_new drop table arsse_feeds; alter table arsse_feeds_new rename to arsse_feeds; --- Add a column for scraped article content, and re-order some column -create table arsse_articles_new( --- entries in newsfeeds - id integer primary key, -- sequence number - feed integer not null references arsse_feeds(id) on delete cascade, -- feed for the subscription - url text, -- URL of article - title text collate nocase, -- article title - author text collate nocase, -- author's name - published text, -- time of original publication - edited text, -- time of last edit by author - modified text not null default CURRENT_TIMESTAMP, -- time when article was last modified in database - guid text, -- GUID - url_title_hash text not null, -- hash of URL + title; used when checking for updates and for identification if there is no guid. - url_content_hash text not null, -- hash of URL + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid. - title_content_hash text not null, -- hash of title + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid. - content_scraped text, -- scraped content, as HTML - content text -- content, as HTML -); -insert into arsse_articles_new select id, feed, url, title, author, published, edited, modified, guid, url_title_hash, url_content_hash, title_content_hash, null, content from arsse_articles; -drop table arsse_articles; -alter table arsse_articles_new rename to arsse_articles; - -- set version marker pragma user_version = 7; update arsse_meta set value = '7' where "key" = 'schema_version'; diff --git a/tests/cases/Database/SeriesSubscription.php b/tests/cases/Database/SeriesSubscription.php index abbdab39..389495d3 100644 --- a/tests/cases/Database/SeriesSubscription.php +++ b/tests/cases/Database/SeriesSubscription.php @@ -80,13 +80,14 @@ trait SeriesSubscription { 'order_type' => "int", 'keep_rule' => "str", 'block_rule' => "str", + 'scrape' => "bool", ], 'rows' => [ - [1,"john.doe@example.com",2,null,null,1,2,null,null], - [2,"jane.doe@example.com",2,null,null,0,0,null,null], - [3,"john.doe@example.com",3,"Ook",2,0,1,null,null], - [4,"jill.doe@example.com",2,null,null,0,0,null,null], - [5,"jack.doe@example.com",2,null,null,1,2,"","3|E"], + [1,"john.doe@example.com",2,null,null,1,2,null,null,0], + [2,"jane.doe@example.com",2,null,null,0,0,null,null,0], + [3,"john.doe@example.com",3,"Ook",2,0,1,null,null,0], + [4,"jill.doe@example.com",2,null,null,0,0,null,null,0], + [5,"jack.doe@example.com",2,null,null,1,2,"","3|E",0], ], ], 'arsse_tags' => [ @@ -409,22 +410,23 @@ trait SeriesSubscription { 'title' => "Ook Ook", 'folder' => 3, 'pinned' => false, + 'scrape' => true, 'order_type' => 0, 'keep_rule' => "ook", 'block_rule' => "eek", ]); $state = $this->primeExpectations($this->data, [ 'arsse_feeds' => ['id','url','username','password','title'], - 'arsse_subscriptions' => ['id','owner','feed','title','folder','pinned','order_type','keep_rule','block_rule'], + 'arsse_subscriptions' => ['id','owner','feed','title','folder','pinned','order_type','keep_rule','block_rule','scrape'], ]); - $state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,"Ook Ook",3,0,0,"ook","eek"]; + $state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,"Ook Ook",3,0,0,"ook","eek",1]; $this->compareExpectations(static::$drv, $state); Arsse::$db->subscriptionPropertiesSet($this->user, 1, [ 'title' => null, 'keep_rule' => null, 'block_rule' => null, ]); - $state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,null,3,0,0,null,null]; + $state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,null,3,0,0,null,null,1]; $this->compareExpectations(static::$drv, $state); // making no changes is a valid result Arsse::$db->subscriptionPropertiesSet($this->user, 1, ['unhinged' => true]); diff --git a/tests/cases/Db/BaseUpdate.php b/tests/cases/Db/BaseUpdate.php index bce4dbcf..4e1ed79b 100644 --- a/tests/cases/Db/BaseUpdate.php +++ b/tests/cases/Db/BaseUpdate.php @@ -139,14 +139,22 @@ class BaseUpdate extends \JKingWeb\Arsse\Test\AbstractTest { $this->drv->schemaUpdate(6); $this->drv->exec( <<drv->schemaUpdate(7); @@ -168,9 +176,16 @@ QUERY_TEXT ['url' => 'https://example.com/', 'icon' => 1], ['url' => 'http://example.net/', 'icon' => null], ]; + $subs = [ + ['id' => 1, 'scrape' => 1], + ['id' => 2, 'scrape' => 1], + ['id' => 3, 'scrape' => 0], + ['id' => 4, 'scrape' => 0], + ]; $this->assertEquals($users, $this->drv->query("SELECT id, password, num from arsse_users order by id")->getAll()); $this->assertEquals($folders, $this->drv->query("SELECT owner, name from arsse_folders order by owner")->getAll()); $this->assertEquals($icons, $this->drv->query("SELECT id, url from arsse_icons order by id")->getAll()); $this->assertEquals($feeds, $this->drv->query("SELECT url, icon from arsse_feeds order by id")->getAll()); + $this->assertEquals($subs, $this->drv->query("SELECT id, scrape from arsse_subscriptions order by id")->getAll()); } }