mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 05:02:40 +00:00
Add ability to enable scraper
Also transfer any existing scraper booleans on database upgrade. It was previously possible to enable scraping manually by editing the database, and these settings will be honoured.
This commit is contained in:
parent
7897585d98
commit
86897af0b3
6 changed files with 61 additions and 39 deletions
|
@ -898,6 +898,7 @@ class Database {
|
|||
* - "title": The title of the subscription
|
||||
* - "folder": The numeric identifier (or null) of the subscription's folder
|
||||
* - "pinned": Whether the subscription is pinned
|
||||
* - "scrape": Whether to scrape full article contents from the HTML article
|
||||
* - "order_type": Whether articles should be sorted in reverse cronological order (2), chronological order (1), or the default (0)
|
||||
* - "keep_rule": The subscription's "keep" filter rule; articles which do not match this are hidden
|
||||
* - "block_rule": The subscription's "block" filter rule; articles which match this are hidden
|
||||
|
@ -948,6 +949,7 @@ class Database {
|
|||
'pinned' => "strict bool",
|
||||
'keep_rule' => "str",
|
||||
'block_rule' => "str",
|
||||
'scrape' => "bool",
|
||||
];
|
||||
[$setClause, $setTypes, $setValues] = $this->generateSet($data, $valid);
|
||||
if (!$setClause) {
|
||||
|
|
|
@ -33,6 +33,7 @@ create table arsse_user_meta(
|
|||
) character set utf8mb4 collate utf8mb4_unicode_ci;
|
||||
|
||||
alter table arsse_subscriptions add column scrape boolean not null default 0;
|
||||
update arsse_subscriptions set scrape = 1 where feed in (select id from arsse_feeds where scrape = 1);
|
||||
alter table arsse_feeds drop column scrape;
|
||||
alter table arsse_articles add column content_scraped longtext;
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ create table arsse_user_meta(
|
|||
);
|
||||
|
||||
alter table arsse_subscriptions add column scrape smallint not null default 0;
|
||||
update arsse_subscriptions set scrape = 1 where feed in (select id from arsse_feeds where scrape = 1);
|
||||
alter table arsse_feeds drop column scrape;
|
||||
alter table arsse_articles add column content_scraped text;
|
||||
|
||||
|
|
|
@ -44,8 +44,31 @@ create table arsse_user_meta(
|
|||
primary key(owner,key)
|
||||
) without rowid;
|
||||
|
||||
-- Add a "scrape" column for subscriptions
|
||||
-- Add a "scrape" column for subscriptions and copy any existing scraping
|
||||
alter table arsse_subscriptions add column scrape boolean not null default 0;
|
||||
update arsse_subscriptions set scrape = 1 where feed in (select id from arsse_feeds where scrape = 1);
|
||||
|
||||
-- Add a column for scraped article content, and re-order some columns
|
||||
create table arsse_articles_new(
|
||||
-- entries in newsfeeds
|
||||
id integer primary key, -- sequence number
|
||||
feed integer not null references arsse_feeds(id) on delete cascade, -- feed for the subscription
|
||||
url text, -- URL of article
|
||||
title text collate nocase, -- article title
|
||||
author text collate nocase, -- author's name
|
||||
published text, -- time of original publication
|
||||
edited text, -- time of last edit by author
|
||||
modified text not null default CURRENT_TIMESTAMP, -- time when article was last modified in database
|
||||
guid text, -- GUID
|
||||
url_title_hash text not null, -- hash of URL + title; used when checking for updates and for identification if there is no guid.
|
||||
url_content_hash text not null, -- hash of URL + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid.
|
||||
title_content_hash text not null, -- hash of title + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid.
|
||||
content_scraped text, -- scraped content, as HTML
|
||||
content text -- content, as HTML
|
||||
);
|
||||
insert into arsse_articles_new select id, feed, url, title, author, published, edited, modified, guid, url_title_hash, url_content_hash, title_content_hash, null, content from arsse_articles;
|
||||
drop table arsse_articles;
|
||||
alter table arsse_articles_new rename to arsse_articles;
|
||||
|
||||
-- Add a separate table for feed icons and replace their URLs in the feeds table with their IDs
|
||||
-- Also remove the "scrape" column of the feeds table, which was never an advertised feature
|
||||
|
@ -88,28 +111,6 @@ insert into arsse_feeds_new
|
|||
drop table arsse_feeds;
|
||||
alter table arsse_feeds_new rename to arsse_feeds;
|
||||
|
||||
-- Add a column for scraped article content, and re-order some column
|
||||
create table arsse_articles_new(
|
||||
-- entries in newsfeeds
|
||||
id integer primary key, -- sequence number
|
||||
feed integer not null references arsse_feeds(id) on delete cascade, -- feed for the subscription
|
||||
url text, -- URL of article
|
||||
title text collate nocase, -- article title
|
||||
author text collate nocase, -- author's name
|
||||
published text, -- time of original publication
|
||||
edited text, -- time of last edit by author
|
||||
modified text not null default CURRENT_TIMESTAMP, -- time when article was last modified in database
|
||||
guid text, -- GUID
|
||||
url_title_hash text not null, -- hash of URL + title; used when checking for updates and for identification if there is no guid.
|
||||
url_content_hash text not null, -- hash of URL + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid.
|
||||
title_content_hash text not null, -- hash of title + content, enclosure URL, & content type; used when checking for updates and for identification if there is no guid.
|
||||
content_scraped text, -- scraped content, as HTML
|
||||
content text -- content, as HTML
|
||||
);
|
||||
insert into arsse_articles_new select id, feed, url, title, author, published, edited, modified, guid, url_title_hash, url_content_hash, title_content_hash, null, content from arsse_articles;
|
||||
drop table arsse_articles;
|
||||
alter table arsse_articles_new rename to arsse_articles;
|
||||
|
||||
-- set version marker
|
||||
pragma user_version = 7;
|
||||
update arsse_meta set value = '7' where "key" = 'schema_version';
|
||||
|
|
|
@ -80,13 +80,14 @@ trait SeriesSubscription {
|
|||
'order_type' => "int",
|
||||
'keep_rule' => "str",
|
||||
'block_rule' => "str",
|
||||
'scrape' => "bool",
|
||||
],
|
||||
'rows' => [
|
||||
[1,"john.doe@example.com",2,null,null,1,2,null,null],
|
||||
[2,"jane.doe@example.com",2,null,null,0,0,null,null],
|
||||
[3,"john.doe@example.com",3,"Ook",2,0,1,null,null],
|
||||
[4,"jill.doe@example.com",2,null,null,0,0,null,null],
|
||||
[5,"jack.doe@example.com",2,null,null,1,2,"","3|E"],
|
||||
[1,"john.doe@example.com",2,null,null,1,2,null,null,0],
|
||||
[2,"jane.doe@example.com",2,null,null,0,0,null,null,0],
|
||||
[3,"john.doe@example.com",3,"Ook",2,0,1,null,null,0],
|
||||
[4,"jill.doe@example.com",2,null,null,0,0,null,null,0],
|
||||
[5,"jack.doe@example.com",2,null,null,1,2,"","3|E",0],
|
||||
],
|
||||
],
|
||||
'arsse_tags' => [
|
||||
|
@ -409,22 +410,23 @@ trait SeriesSubscription {
|
|||
'title' => "Ook Ook",
|
||||
'folder' => 3,
|
||||
'pinned' => false,
|
||||
'scrape' => true,
|
||||
'order_type' => 0,
|
||||
'keep_rule' => "ook",
|
||||
'block_rule' => "eek",
|
||||
]);
|
||||
$state = $this->primeExpectations($this->data, [
|
||||
'arsse_feeds' => ['id','url','username','password','title'],
|
||||
'arsse_subscriptions' => ['id','owner','feed','title','folder','pinned','order_type','keep_rule','block_rule'],
|
||||
'arsse_subscriptions' => ['id','owner','feed','title','folder','pinned','order_type','keep_rule','block_rule','scrape'],
|
||||
]);
|
||||
$state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,"Ook Ook",3,0,0,"ook","eek"];
|
||||
$state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,"Ook Ook",3,0,0,"ook","eek",1];
|
||||
$this->compareExpectations(static::$drv, $state);
|
||||
Arsse::$db->subscriptionPropertiesSet($this->user, 1, [
|
||||
'title' => null,
|
||||
'keep_rule' => null,
|
||||
'block_rule' => null,
|
||||
]);
|
||||
$state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,null,3,0,0,null,null];
|
||||
$state['arsse_subscriptions']['rows'][0] = [1,"john.doe@example.com",2,null,3,0,0,null,null,1];
|
||||
$this->compareExpectations(static::$drv, $state);
|
||||
// making no changes is a valid result
|
||||
Arsse::$db->subscriptionPropertiesSet($this->user, 1, ['unhinged' => true]);
|
||||
|
|
|
@ -139,14 +139,22 @@ class BaseUpdate extends \JKingWeb\Arsse\Test\AbstractTest {
|
|||
$this->drv->schemaUpdate(6);
|
||||
$this->drv->exec(
|
||||
<<<QUERY_TEXT
|
||||
INSERT INTO arsse_users values('a', 'xyz');
|
||||
INSERT INTO arsse_users values('b', 'abc');
|
||||
INSERT INTO arsse_folders(owner,name) values('a', '1');
|
||||
INSERT INTO arsse_folders(owner,name) values('b', '2');
|
||||
INSERT INTO arsse_feeds(url,favicon) values('http://example.com/', 'http://example.com/icon');
|
||||
INSERT INTO arsse_feeds(url,favicon) values('http://example.org/', 'http://example.org/icon');
|
||||
INSERT INTO arsse_feeds(url,favicon) values('https://example.com/', 'http://example.com/icon');
|
||||
INSERT INTO arsse_feeds(url,favicon) values('http://example.net/', null);
|
||||
INSERT INTO arsse_users values
|
||||
('a', 'xyz'),
|
||||
('b', 'abc');
|
||||
INSERT INTO arsse_folders(owner,name) values
|
||||
('a', '1'),
|
||||
('b', '2');
|
||||
INSERT INTO arsse_feeds(id,scrape,url,favicon) values
|
||||
(1, 1, 'http://example.com/', 'http://example.com/icon'),
|
||||
(2, 0, 'http://example.org/', 'http://example.org/icon'),
|
||||
(3, 0, 'https://example.com/', 'http://example.com/icon'),
|
||||
(4, 0, 'http://example.net/', null);
|
||||
INSERT INTO arsse_subscriptions(id,owner,feed) values
|
||||
(1, 'a', 1),
|
||||
(2, 'b', 1),
|
||||
(3, 'a', 2),
|
||||
(4, 'b', 2);
|
||||
QUERY_TEXT
|
||||
);
|
||||
$this->drv->schemaUpdate(7);
|
||||
|
@ -168,9 +176,16 @@ QUERY_TEXT
|
|||
['url' => 'https://example.com/', 'icon' => 1],
|
||||
['url' => 'http://example.net/', 'icon' => null],
|
||||
];
|
||||
$subs = [
|
||||
['id' => 1, 'scrape' => 1],
|
||||
['id' => 2, 'scrape' => 1],
|
||||
['id' => 3, 'scrape' => 0],
|
||||
['id' => 4, 'scrape' => 0],
|
||||
];
|
||||
$this->assertEquals($users, $this->drv->query("SELECT id, password, num from arsse_users order by id")->getAll());
|
||||
$this->assertEquals($folders, $this->drv->query("SELECT owner, name from arsse_folders order by owner")->getAll());
|
||||
$this->assertEquals($icons, $this->drv->query("SELECT id, url from arsse_icons order by id")->getAll());
|
||||
$this->assertEquals($feeds, $this->drv->query("SELECT url, icon from arsse_feeds order by id")->getAll());
|
||||
$this->assertEquals($subs, $this->drv->query("SELECT id, scrape from arsse_subscriptions order by id")->getAll());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue