1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 13:12:41 +00:00

Prototype feed creation

This commit is contained in:
J. King 2021-01-19 23:17:03 -05:00
parent 14d2d19ae1
commit e7b2f54183
6 changed files with 90 additions and 28 deletions

View file

@ -25,16 +25,17 @@ Miniflux version 2.0.27 is emulated, though not all features are implemented
- Custom User-Agent strings - Custom User-Agent strings
- The `disabled`, `ignore_http_cache`, and `fetch_via_proxy` flags - The `disabled`, `ignore_http_cache`, and `fetch_via_proxy` flags
- Changing the URL, username, or password of a feed - Changing the URL, username, or password of a feed
- Titles and types are not available during feed discovery and are filled with generic data
# Differences # Differences
- Various error messages differ due to significant implementation differences - Various error codes and messages differ due to significant implementation differences
- `PUT` requests which return a body respond with `200 OK` rather than `201 Created` - `PUT` requests which return a body respond with `200 OK` rather than `201 Created`
- Only the URL should be considered reliable in feed discovery results
- The "All" category is treated specially (see below for details) - The "All" category is treated specially (see below for details)
- Category names consisting only of whitespace are rejected along with the empty string - Category names consisting only of whitespace are rejected along with the empty string
- Filtering rules may not function identically (see below for details) - Filtering rules may not function identically (see below for details)
- The `checked_at` field of feeds indicates when the feed was last updated rather than when it was last checked - The `checked_at` field of feeds indicates when the feed was last updated rather than when it was last checked
- Creating a feed with the `scrape` property set to `true` might not return scraped content for the initial synchronization
# Behaviour of filtering (block and keep) rules # Behaviour of filtering (block and keep) rules

View file

@ -745,10 +745,11 @@ class Database {
* @param string $fetchUser The user name required to access the newsfeed, if applicable * @param string $fetchUser The user name required to access the newsfeed, if applicable
* @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext * @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext
* @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document * @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document
* @param boolean $scrape Whether the initial synchronization should scrape full-article content
*/ */
public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true): int { public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true, bool $scrape = false): int {
// get the ID of the underlying feed, or add it if it's not yet in the database // get the ID of the underlying feed, or add it if it's not yet in the database
$feedID = $this->feedAdd($url, $fetchUser, $fetchPassword, $discover); $feedID = $this->feedAdd($url, $fetchUser, $fetchPassword, $discover, $scrape);
// Add the feed to the user's subscriptions and return the new subscription's ID. // Add the feed to the user's subscriptions and return the new subscription's ID.
return $this->db->prepare('INSERT INTO arsse_subscriptions(owner,feed) values(?,?)', 'str', 'int')->run($user, $feedID)->lastId(); return $this->db->prepare('INSERT INTO arsse_subscriptions(owner,feed) values(?,?)', 'str', 'int')->run($user, $feedID)->lastId();
} }
@ -1089,8 +1090,9 @@ class Database {
* @param string $fetchUser The user name required to access the newsfeed, if applicable * @param string $fetchUser The user name required to access the newsfeed, if applicable
* @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext * @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext
* @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document * @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document
* @param boolean $scrape Whether the initial synchronization should scrape full-article content
*/ */
public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true): int { public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true, bool $scrape = false): int {
// normalize the input URL // normalize the input URL
$url = URL::normalize($url); $url = URL::normalize($url);
// check to see if the feed already exists // check to see if the feed already exists
@ -1106,7 +1108,7 @@ class Database {
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId(); $feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
try { try {
// perform an initial update on the newly added feed // perform an initial update on the newly added feed
$this->feedUpdate($feedID, true); $this->feedUpdate($feedID, true, $scrape);
} catch (\Throwable $e) { } catch (\Throwable $e) {
// if the update fails, delete the feed we just added // if the update fails, delete the feed we just added
$this->db->prepare('DELETE from arsse_feeds where id = ?', 'int')->run($feedID); $this->db->prepare('DELETE from arsse_feeds where id = ?', 'int')->run($feedID);
@ -1126,8 +1128,9 @@ class Database {
* *
* @param integer $feedID The numerical identifier of the newsfeed to refresh * @param integer $feedID The numerical identifier of the newsfeed to refresh
* @param boolean $throwError Whether to throw an exception on failure in addition to storing error information in the database * @param boolean $throwError Whether to throw an exception on failure in addition to storing error information in the database
* @param boolean|null $scrapeOverride If not null, overrides information in the database signaling whether or not to scrape full-article content. This is intended for when there are no subscriptions for the feed in the database yet
*/ */
public function feedUpdate($feedID, bool $throwError = false): bool { public function feedUpdate($feedID, bool $throwError = false, ?bool $scrapeOverride = null): bool {
// check to make sure the feed exists // check to make sure the feed exists
if (!V::id($feedID)) { if (!V::id($feedID)) {
throw new Db\ExceptionInput("typeViolation", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID, 'type' => "int > 0"]); throw new Db\ExceptionInput("typeViolation", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID, 'type' => "int > 0"]);
@ -1144,7 +1147,7 @@ class Database {
throw new Db\ExceptionInput("subjectMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]); throw new Db\ExceptionInput("subjectMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]);
} }
// determine whether the feed's items should be scraped for full content from the source Web site // determine whether the feed's items should be scraped for full content from the source Web site
$scrape = (Arsse::$conf->fetchEnableScraping && $f['scrapers']); $scrape = (Arsse::$conf->fetchEnableScraping && ($scrapeOverride ?? $f['scrapers']));
// the Feed object throws an exception when there are problems, but that isn't ideal // the Feed object throws an exception when there are problems, but that isn't ideal
// here. When an exception is thrown it should update the database with the // here. When an exception is thrown it should update the database with the
// error instead of failing; if other exceptions are thrown, we should simply roll back // error instead of failing; if other exceptions are thrown, we should simply roll back

View file

@ -14,8 +14,10 @@ use JKingWeb\Arsse\Context\Context;
use JKingWeb\Arsse\Db\ExceptionInput; use JKingWeb\Arsse\Db\ExceptionInput;
use JKingWeb\Arsse\Misc\HTTP; use JKingWeb\Arsse\Misc\HTTP;
use JKingWeb\Arsse\Misc\Date; use JKingWeb\Arsse\Misc\Date;
use JKingWeb\Arsse\Misc\URL;
use JKingWeb\Arsse\Misc\ValueInfo as V; use JKingWeb\Arsse\Misc\ValueInfo as V;
use JKingWeb\Arsse\REST\Exception; use JKingWeb\Arsse\REST\Exception;
use JKingWeb\Arsse\Rule\Rule;
use JKingWeb\Arsse\User\ExceptionConflict; use JKingWeb\Arsse\User\ExceptionConflict;
use JKingWeb\Arsse\User\Exception as UserException; use JKingWeb\Arsse\User\Exception as UserException;
use Psr\Http\Message\ServerRequestInterface; use Psr\Http\Message\ServerRequestInterface;
@ -31,12 +33,25 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
protected const ACCEPTED_TYPES_JSON = ["application/json"]; protected const ACCEPTED_TYPES_JSON = ["application/json"];
protected const TOKEN_LENGTH = 32; protected const TOKEN_LENGTH = 32;
protected const VALID_JSON = [ protected const VALID_JSON = [
// user properties which map directly to Arsse user metadata are listed separately // user properties which map directly to Arsse user metadata are listed separately;
'url' => "string", // not all these properties are used by our implementation, but they are treated
'username' => "string", // with the same strictness as in Miniflux to ease cross-compatibility
'password' => "string", 'url' => "string",
'user_agent' => "string", 'username' => "string",
'title' => "string", 'password' => "string",
'user_agent' => "string",
'title' => "string",
'feed_url' => "string",
'category_id' => "integer",
'crawler' => "boolean",
'user_agent' => "string",
'scraper_rules' => "string",
'rewrite_rules' => "string",
'keeplist_rules' => "string",
'blocklist_rules' => "string",
'disabled' => "boolean",
'ignore_http_cache' => "boolean",
'fetch_via_proxy' => "boolean",
]; ];
protected const USER_META_MAP = [ protected const USER_META_MAP = [
// Miniflux ID // Arsse ID Default value Extra // Miniflux ID // Arsse ID Default value Extra
@ -81,7 +96,7 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
], ],
'/feeds' => [ '/feeds' => [
'GET' => ["getFeeds", false, false, false, false, []], 'GET' => ["getFeeds", false, false, false, false, []],
'POST' => ["createFeed", false, false, true, false, []], 'POST' => ["createFeed", false, false, true, false, ["feed_url", "category_id"]],
], ],
'/feeds/1' => [ '/feeds/1' => [
'GET' => ["getFeed", false, true, false, false, []], 'GET' => ["getFeed", false, true, false, false, []],
@ -263,6 +278,10 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
$body[$k] = null; $body[$k] = null;
} elseif (gettype($body[$k]) !== $t) { } elseif (gettype($body[$k]) !== $t) {
return new ErrorResponse(["InvalidInputType", 'field' => $k, 'expected' => $t, 'actual' => gettype($body[$k])], 422); return new ErrorResponse(["InvalidInputType", 'field' => $k, 'expected' => $t, 'actual' => gettype($body[$k])], 422);
} elseif (in_array($k, ["keeplist_rules", "blocklist_rules"]) && !Rule::validate($body[$k])) {
return new ErrorResponse(["InvalidInputValue", 'field' => $k], 422);
} elseif (in_array($k, ["url", "feed_url"]) && !URL::absolute($body[$k])) {
return new ErrorResponse(["InvalidInputValue", 'field' => $k], 422);
} }
} }
//normalize user-specific input //normalize user-specific input
@ -377,7 +396,7 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
10506 => "Fetch403", 10506 => "Fetch403",
10507 => "Fetch401", 10507 => "Fetch401",
][$e->getCode()] ?? "FetchOther"; ][$e->getCode()] ?? "FetchOther";
return new ErrorResponse($msg, 500); return new ErrorResponse($msg, 502);
} }
$out = []; $out = [];
foreach ($list as $url) { foreach ($list as $url) {
@ -646,6 +665,37 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
return new Response($out); return new Response($out);
} }
protected function createFeed(array $data): ResponseInterface {
$props = [
'keep_rule' => $data['keeplist_rules'],
'block_rule' => $data['blocklist_rules'],
'folder' => $data['category_id'] - 1,
'scrape' => (bool) $data['crawler'],
];
try {
Arsse::$db->feedAdd($data['feed_url'], (string) $data['username'], (string) $data['password'], false, (bool) $data['crawler']);
$tr = Arsse::$db->begin();
$id = Arsse::$db->subscriptionAdd(Arsse::$user->id, $data['feed_url'], (string) $data['username'], (string) $data['password'], false, (bool) $data['crawler']);
Arsse::$db->subscriptionPropertiesSet(Arsse::$user->id, $id, $props);
$tr->commit();
} catch (FeedException $e) {
$msg = [
10502 => "Fetch404",
10506 => "Fetch403",
10507 => "Fetch401",
][$e->getCode()] ?? "FetchOther";
return new ErrorResponse($msg, 502);
} catch (ExceptionInput $e) {
switch ($e->getCode()) {
case 10235:
return new ErrorResponse("MissingCategory", 422);
case 10236:
return new ErrorResponse("DuplicateFeed", 409);
}
}
return new Response(['feed_id' => $id], 201);
}
public static function tokenGenerate(string $user, string $label): string { public static function tokenGenerate(string $user, string $label): string {
// Miniflux produces tokens in base64url alphabet // Miniflux produces tokens in base64url alphabet
$t = str_replace(["+", "/"], ["-", "_"], base64_encode(random_bytes(self::TOKEN_LENGTH))); $t = str_replace(["+", "/"], ["-", "_"], base64_encode(random_bytes(self::TOKEN_LENGTH)));

View file

@ -19,10 +19,12 @@ return [
'API.Miniflux.Error.Fetch401' => 'You are not authorized to access this resource (invalid username/password)', 'API.Miniflux.Error.Fetch401' => 'You are not authorized to access this resource (invalid username/password)',
'API.Miniflux.Error.Fetch403' => 'Unable to fetch this resource (Status Code = 403)', 'API.Miniflux.Error.Fetch403' => 'Unable to fetch this resource (Status Code = 403)',
'API.Miniflux.Error.FetchOther' => 'Unable to fetch this resource', 'API.Miniflux.Error.FetchOther' => 'Unable to fetch this resource',
'API.Miniflux.Error.DuplicateCategory' => 'Category "{title}" already exists', 'API.Miniflux.Error.DuplicateCategory' => 'This category already exists.',
'API.Miniflux.Error.InvalidCategory' => 'Invalid category title "{title}"', 'API.Miniflux.Error.InvalidCategory' => 'Invalid category title "{title}"',
'API.Miniflux.Error.MissingCategory' => 'This category does not exist or does not belong to this user.',
'API.Miniflux.Error.InvalidElevation' => 'Only administrators can change permissions of standard users', 'API.Miniflux.Error.InvalidElevation' => 'Only administrators can change permissions of standard users',
'API.Miniflux.Error.DuplicateUser' => 'The user name "{user}" already exists', 'API.Miniflux.Error.DuplicateUser' => 'The user name "{user}" already exists',
'API.Miniflux.Error.DuplicateFeed' => 'This feed already exists.',
'API.TTRSS.Category.Uncategorized' => 'Uncategorized', 'API.TTRSS.Category.Uncategorized' => 'Uncategorized',
'API.TTRSS.Category.Special' => 'Special', 'API.TTRSS.Category.Special' => 'Special',

View file

@ -221,7 +221,7 @@ trait SeriesSubscription {
$subID = $this->nextID("arsse_subscriptions"); $subID = $this->nextID("arsse_subscriptions");
\Phake::when(Arsse::$db)->feedUpdate->thenReturn(true); \Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false)); $this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false));
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true); \Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false);
$state = $this->primeExpectations($this->data, [ $state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'], 'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'], 'arsse_subscriptions' => ['id','owner','feed'],
@ -238,7 +238,7 @@ trait SeriesSubscription {
$subID = $this->nextID("arsse_subscriptions"); $subID = $this->nextID("arsse_subscriptions");
\Phake::when(Arsse::$db)->feedUpdate->thenReturn(true); \Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true)); $this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true));
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true); \Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false);
$state = $this->primeExpectations($this->data, [ $state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'], 'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'], 'arsse_subscriptions' => ['id','owner','feed'],
@ -256,7 +256,7 @@ trait SeriesSubscription {
try { try {
Arsse::$db->subscriptionAdd($this->user, $url, "", "", false); Arsse::$db->subscriptionAdd($this->user, $url, "", "", false);
} finally { } finally {
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true); \Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false);
$state = $this->primeExpectations($this->data, [ $state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'], 'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'], 'arsse_subscriptions' => ['id','owner','feed'],

View file

@ -172,16 +172,22 @@ class TestV1 extends \JKingWeb\Arsse\Test\AbstractTest {
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => 2112])); $this->assertMessage($exp, $this->req("POST", "/discover", ['url' => 2112]));
} }
public function testDiscoverFeeds(): void { /** @dataProvider provideDiscoveries */
$exp = new Response([ public function testDiscoverFeeds($in, ResponseInterface $exp): void {
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => $in]));
}
public function provideDiscoveries(): iterable {
self::clearData();
$discovered = [
['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Feed"], ['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Feed"],
['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Missing"], ['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Missing"],
]); ];
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Valid"])); return [
$exp = new Response([]); ["http://localhost:8000/Feed/Discovery/Valid", new Response($discovered)],
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Invalid"])); ["http://localhost:8000/Feed/Discovery/Invalid", new Response([])],
$exp = new ErrorResponse("Fetch404", 500); ["http://localhost:8000/Feed/Discovery/Missing", new ErrorResponse("Fetch404", 502)],
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Missing"])); ];
} }
/** @dataProvider provideUserQueries */ /** @dataProvider provideUserQueries */