1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 13:12:41 +00:00

Prototype feed creation

This commit is contained in:
J. King 2021-01-19 23:17:03 -05:00
parent 14d2d19ae1
commit e7b2f54183
6 changed files with 90 additions and 28 deletions

View file

@ -25,16 +25,17 @@ Miniflux version 2.0.27 is emulated, though not all features are implemented
- Custom User-Agent strings
- The `disabled`, `ignore_http_cache`, and `fetch_via_proxy` flags
- Changing the URL, username, or password of a feed
- Titles and types are not available during feed discovery and are filled with generic data
# Differences
- Various error messages differ due to significant implementation differences
- Various error codes and messages differ due to significant implementation differences
- `PUT` requests which return a body respond with `200 OK` rather than `201 Created`
- Only the URL should be considered reliable in feed discovery results
- The "All" category is treated specially (see below for details)
- Category names consisting only of whitespace are rejected along with the empty string
- Filtering rules may not function identically (see below for details)
- The `checked_at` field of feeds indicates when the feed was last updated rather than when it was last checked
- Creating a feed with the `scrape` property set to `true` might not return scraped content for the initial synchronization
# Behaviour of filtering (block and keep) rules

View file

@ -745,10 +745,11 @@ class Database {
* @param string $fetchUser The user name required to access the newsfeed, if applicable
* @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext
* @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document
* @param boolean $scrape Whether the initial synchronization should scrape full-article content
*/
public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true): int {
public function subscriptionAdd(string $user, string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true, bool $scrape = false): int {
// get the ID of the underlying feed, or add it if it's not yet in the database
$feedID = $this->feedAdd($url, $fetchUser, $fetchPassword, $discover);
$feedID = $this->feedAdd($url, $fetchUser, $fetchPassword, $discover, $scrape);
// Add the feed to the user's subscriptions and return the new subscription's ID.
return $this->db->prepare('INSERT INTO arsse_subscriptions(owner,feed) values(?,?)', 'str', 'int')->run($user, $feedID)->lastId();
}
@ -1089,8 +1090,9 @@ class Database {
* @param string $fetchUser The user name required to access the newsfeed, if applicable
* @param string $fetchPassword The password required to fetch the newsfeed, if applicable; this will be stored in cleartext
* @param boolean $discover Whether to perform newsfeed discovery if $url points to an HTML document
* @param boolean $scrape Whether the initial synchronization should scrape full-article content
*/
public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true): int {
public function feedAdd(string $url, string $fetchUser = "", string $fetchPassword = "", bool $discover = true, bool $scrape = false): int {
// normalize the input URL
$url = URL::normalize($url);
// check to see if the feed already exists
@ -1106,7 +1108,7 @@ class Database {
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
try {
// perform an initial update on the newly added feed
$this->feedUpdate($feedID, true);
$this->feedUpdate($feedID, true, $scrape);
} catch (\Throwable $e) {
// if the update fails, delete the feed we just added
$this->db->prepare('DELETE from arsse_feeds where id = ?', 'int')->run($feedID);
@ -1126,8 +1128,9 @@ class Database {
*
* @param integer $feedID The numerical identifier of the newsfeed to refresh
* @param boolean $throwError Whether to throw an exception on failure in addition to storing error information in the database
* @param boolean|null $scrapeOverride If not null, overrides information in the database signaling whether or not to scrape full-article content. This is intended for when there are no subscriptions for the feed in the database yet
*/
public function feedUpdate($feedID, bool $throwError = false): bool {
public function feedUpdate($feedID, bool $throwError = false, ?bool $scrapeOverride = null): bool {
// check to make sure the feed exists
if (!V::id($feedID)) {
throw new Db\ExceptionInput("typeViolation", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID, 'type' => "int > 0"]);
@ -1144,7 +1147,7 @@ class Database {
throw new Db\ExceptionInput("subjectMissing", ["action" => __FUNCTION__, "field" => "feed", 'id' => $feedID]);
}
// determine whether the feed's items should be scraped for full content from the source Web site
$scrape = (Arsse::$conf->fetchEnableScraping && $f['scrapers']);
$scrape = (Arsse::$conf->fetchEnableScraping && ($scrapeOverride ?? $f['scrapers']));
// the Feed object throws an exception when there are problems, but that isn't ideal
// here. When an exception is thrown it should update the database with the
// error instead of failing; if other exceptions are thrown, we should simply roll back

View file

@ -14,8 +14,10 @@ use JKingWeb\Arsse\Context\Context;
use JKingWeb\Arsse\Db\ExceptionInput;
use JKingWeb\Arsse\Misc\HTTP;
use JKingWeb\Arsse\Misc\Date;
use JKingWeb\Arsse\Misc\URL;
use JKingWeb\Arsse\Misc\ValueInfo as V;
use JKingWeb\Arsse\REST\Exception;
use JKingWeb\Arsse\Rule\Rule;
use JKingWeb\Arsse\User\ExceptionConflict;
use JKingWeb\Arsse\User\Exception as UserException;
use Psr\Http\Message\ServerRequestInterface;
@ -31,12 +33,25 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
protected const ACCEPTED_TYPES_JSON = ["application/json"];
protected const TOKEN_LENGTH = 32;
protected const VALID_JSON = [
// user properties which map directly to Arsse user metadata are listed separately
// user properties which map directly to Arsse user metadata are listed separately;
// not all these properties are used by our implementation, but they are treated
// with the same strictness as in Miniflux to ease cross-compatibility
'url' => "string",
'username' => "string",
'password' => "string",
'user_agent' => "string",
'title' => "string",
'feed_url' => "string",
'category_id' => "integer",
'crawler' => "boolean",
'user_agent' => "string",
'scraper_rules' => "string",
'rewrite_rules' => "string",
'keeplist_rules' => "string",
'blocklist_rules' => "string",
'disabled' => "boolean",
'ignore_http_cache' => "boolean",
'fetch_via_proxy' => "boolean",
];
protected const USER_META_MAP = [
// Miniflux ID // Arsse ID Default value Extra
@ -81,7 +96,7 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
],
'/feeds' => [
'GET' => ["getFeeds", false, false, false, false, []],
'POST' => ["createFeed", false, false, true, false, []],
'POST' => ["createFeed", false, false, true, false, ["feed_url", "category_id"]],
],
'/feeds/1' => [
'GET' => ["getFeed", false, true, false, false, []],
@ -263,6 +278,10 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
$body[$k] = null;
} elseif (gettype($body[$k]) !== $t) {
return new ErrorResponse(["InvalidInputType", 'field' => $k, 'expected' => $t, 'actual' => gettype($body[$k])], 422);
} elseif (in_array($k, ["keeplist_rules", "blocklist_rules"]) && !Rule::validate($body[$k])) {
return new ErrorResponse(["InvalidInputValue", 'field' => $k], 422);
} elseif (in_array($k, ["url", "feed_url"]) && !URL::absolute($body[$k])) {
return new ErrorResponse(["InvalidInputValue", 'field' => $k], 422);
}
}
//normalize user-specific input
@ -377,7 +396,7 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
10506 => "Fetch403",
10507 => "Fetch401",
][$e->getCode()] ?? "FetchOther";
return new ErrorResponse($msg, 500);
return new ErrorResponse($msg, 502);
}
$out = [];
foreach ($list as $url) {
@ -646,6 +665,37 @@ class V1 extends \JKingWeb\Arsse\REST\AbstractHandler {
return new Response($out);
}
protected function createFeed(array $data): ResponseInterface {
$props = [
'keep_rule' => $data['keeplist_rules'],
'block_rule' => $data['blocklist_rules'],
'folder' => $data['category_id'] - 1,
'scrape' => (bool) $data['crawler'],
];
try {
Arsse::$db->feedAdd($data['feed_url'], (string) $data['username'], (string) $data['password'], false, (bool) $data['crawler']);
$tr = Arsse::$db->begin();
$id = Arsse::$db->subscriptionAdd(Arsse::$user->id, $data['feed_url'], (string) $data['username'], (string) $data['password'], false, (bool) $data['crawler']);
Arsse::$db->subscriptionPropertiesSet(Arsse::$user->id, $id, $props);
$tr->commit();
} catch (FeedException $e) {
$msg = [
10502 => "Fetch404",
10506 => "Fetch403",
10507 => "Fetch401",
][$e->getCode()] ?? "FetchOther";
return new ErrorResponse($msg, 502);
} catch (ExceptionInput $e) {
switch ($e->getCode()) {
case 10235:
return new ErrorResponse("MissingCategory", 422);
case 10236:
return new ErrorResponse("DuplicateFeed", 409);
}
}
return new Response(['feed_id' => $id], 201);
}
public static function tokenGenerate(string $user, string $label): string {
// Miniflux produces tokens in base64url alphabet
$t = str_replace(["+", "/"], ["-", "_"], base64_encode(random_bytes(self::TOKEN_LENGTH)));

View file

@ -19,10 +19,12 @@ return [
'API.Miniflux.Error.Fetch401' => 'You are not authorized to access this resource (invalid username/password)',
'API.Miniflux.Error.Fetch403' => 'Unable to fetch this resource (Status Code = 403)',
'API.Miniflux.Error.FetchOther' => 'Unable to fetch this resource',
'API.Miniflux.Error.DuplicateCategory' => 'Category "{title}" already exists',
'API.Miniflux.Error.DuplicateCategory' => 'This category already exists.',
'API.Miniflux.Error.InvalidCategory' => 'Invalid category title "{title}"',
'API.Miniflux.Error.MissingCategory' => 'This category does not exist or does not belong to this user.',
'API.Miniflux.Error.InvalidElevation' => 'Only administrators can change permissions of standard users',
'API.Miniflux.Error.DuplicateUser' => 'The user name "{user}" already exists',
'API.Miniflux.Error.DuplicateFeed' => 'This feed already exists.',
'API.TTRSS.Category.Uncategorized' => 'Uncategorized',
'API.TTRSS.Category.Special' => 'Special',

View file

@ -221,7 +221,7 @@ trait SeriesSubscription {
$subID = $this->nextID("arsse_subscriptions");
\Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false));
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],
@ -238,7 +238,7 @@ trait SeriesSubscription {
$subID = $this->nextID("arsse_subscriptions");
\Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true));
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],
@ -256,7 +256,7 @@ trait SeriesSubscription {
try {
Arsse::$db->subscriptionAdd($this->user, $url, "", "", false);
} finally {
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
\Phake::verify(Arsse::$db)->feedUpdate($feedID, true, false);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],

View file

@ -172,16 +172,22 @@ class TestV1 extends \JKingWeb\Arsse\Test\AbstractTest {
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => 2112]));
}
public function testDiscoverFeeds(): void {
$exp = new Response([
/** @dataProvider provideDiscoveries */
public function testDiscoverFeeds($in, ResponseInterface $exp): void {
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => $in]));
}
public function provideDiscoveries(): iterable {
self::clearData();
$discovered = [
['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Feed"],
['title' => "Feed", 'type' => "rss", 'url' => "http://localhost:8000/Feed/Discovery/Missing"],
]);
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Valid"]));
$exp = new Response([]);
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Invalid"]));
$exp = new ErrorResponse("Fetch404", 500);
$this->assertMessage($exp, $this->req("POST", "/discover", ['url' => "http://localhost:8000/Feed/Discovery/Missing"]));
];
return [
["http://localhost:8000/Feed/Discovery/Valid", new Response($discovered)],
["http://localhost:8000/Feed/Discovery/Invalid", new Response([])],
["http://localhost:8000/Feed/Discovery/Missing", new ErrorResponse("Fetch404", 502)],
];
}
/** @dataProvider provideUserQueries */