1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 21:22:40 +00:00

Perform feed discovery correctly; fixes #118

This commit is contained in:
J. King 2017-10-02 11:53:52 -04:00
parent 1b72d45adf
commit a80e283abc
4 changed files with 84 additions and 38 deletions

View file

@ -411,13 +411,19 @@ class Database {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]); throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
} }
// check to see if the feed exists // check to see if the feed exists
$feedID = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str")->run($url, $fetchUser, $fetchPassword)->getValue(); $check = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str");
$feedID = $check->run($url, $fetchUser, $fetchPassword)->getValue();
if ($discover && is_null($feedID)) {
// if the feed doesn't exist, first perform discovery if requested and check for the existence of that URL
$url = Feed::discover($url, $fetchUser, $fetchPassword);
$feedID = $check->run($url, $fetchUser, $fetchPassword)->getValue();
}
if (is_null($feedID)) { if (is_null($feedID)) {
// if the feed doesn't exist add it to the database; we do this unconditionally so as to lock SQLite databases for as little time as possible // if the feed still doesn't exist in the database, add it to the database; we do this unconditionally so as to lock SQLite databases for as little time as possible
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId(); $feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
try { try {
// perform an initial update on the newly added feed // perform an initial update on the newly added feed
$this->feedUpdate($feedID, true, $discover); $this->feedUpdate($feedID, true);
} catch (\Throwable $e) { } catch (\Throwable $e) {
// if the update fails, delete the feed we just added // if the update fails, delete the feed we just added
$this->db->prepare('DELETE from arsse_feeds where id is ?', 'int')->run($feedID); $this->db->prepare('DELETE from arsse_feeds where id is ?', 'int')->run($feedID);
@ -548,7 +554,7 @@ class Database {
return array_column($feeds, 'id'); return array_column($feeds, 'id');
} }
public function feedUpdate($feedID, bool $throwError = false, bool $discover = false): bool { public function feedUpdate($feedID, bool $throwError = false): bool {
$tr = $this->db->begin(); $tr = $this->db->begin();
// check to make sure the feed exists // check to make sure the feed exists
if (!ValueInfo::id($feedID)) { if (!ValueInfo::id($feedID)) {
@ -564,7 +570,7 @@ class Database {
// here. When an exception is thrown it should update the database with the // here. When an exception is thrown it should update the database with the
// error instead of failing; if other exceptions are thrown, we should simply roll back // error instead of failing; if other exceptions are thrown, we should simply roll back
try { try {
$feed = new Feed((int) $feedID, $f['url'], (string) Date::transform($f['modified'], "http", "sql"), $f['etag'], $f['username'], $f['password'], $scrape, $discover); $feed = new Feed((int) $feedID, $f['url'], (string) Date::transform($f['modified'], "http", "sql"), $f['etag'], $f['username'], $f['password'], $scrape);
if (!$feed->modified) { if (!$feed->modified) {
// if the feed hasn't changed, just compute the next fetch time and record it // if the feed hasn't changed, just compute the next fetch time and record it
$this->db->prepare("UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?", 'datetime', 'int')->run($feed->nextFetch, $feedID); $this->db->prepare("UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?", 'datetime', 'int')->run($feed->nextFetch, $feedID);

View file

@ -5,6 +5,7 @@ namespace JKingWeb\Arsse;
use JKingWeb\Arsse\Misc\Date; use JKingWeb\Arsse\Misc\Date;
use PicoFeed\PicoFeedException; use PicoFeed\PicoFeedException;
use PicoFeed\Config\Config; use PicoFeed\Config\Config;
use PicoFeed\Client\Client;
use PicoFeed\Reader\Reader; use PicoFeed\Reader\Reader;
use PicoFeed\Reader\Favicon; use PicoFeed\Reader\Favicon;
use PicoFeed\Scraper\Scraper; use PicoFeed\Scraper\Scraper;
@ -12,8 +13,6 @@ use PicoFeed\Scraper\Scraper;
class Feed { class Feed {
public $data = null; public $data = null;
public $favicon; public $favicon;
public $parser;
public $reader;
public $resource; public $resource;
public $modified = false; public $modified = false;
public $lastModified; public $lastModified;
@ -21,22 +20,30 @@ class Feed {
public $newItems = []; public $newItems = [];
public $changedItems = []; public $changedItems = [];
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false, bool $discover = false) { public static function discover(string $url, string $username = '', string $password = ''): string {
// set the configuration // fetch the candidate feed
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)', $f = self::download($url, "", "", $username, $password);
Arsse::VERSION, // Arsse version if ($f->reader->detectFormat($f->getContent())) {
php_uname('s'), // OS // if the prospective URL is a feed, use it
php_uname('r'), // OS version $out = $url;
php_uname('m') // platform architecture } else {
); $links = $f->reader->find($f->getUrl(), $f->getContent());
$this->config = new Config; if (!$links) {
$this->config->setMaxBodySize(Arsse::$conf->fetchSizeLimit); // work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
$this->config->setClientTimeout(Arsse::$conf->fetchTimeout); libxml_use_internal_errors(false);
$this->config->setGrabberTimeout(Arsse::$conf->fetchTimeout); throw new Feed\Exception($url, new \PicoFeed\Reader\SubscriptionNotFoundException('Unable to find a subscription'));
$this->config->setClientUserAgent($userAgent); } else {
$this->config->setGrabberUserAgent($userAgent); $out = $links[0];
}
}
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
libxml_use_internal_errors(false);
return $out;
}
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false) {
// fetch the feed // fetch the feed
$this->download($url, $lastModified, $etag, $username, $password, $discover); $this->resource = self::download($url, $lastModified, $etag, $username, $password);
// format the HTTP Last-Modified date returned // format the HTTP Last-Modified date returned
$lastMod = $this->resource->getLastModified(); $lastMod = $this->resource->getLastModified();
if (strlen($lastMod)) { if (strlen($lastMod)) {
@ -65,26 +72,40 @@ class Feed {
$this->nextFetch = $this->computeNextFetch(); $this->nextFetch = $this->computeNextFetch();
} }
protected function download(string $url, string $lastModified, string $etag, string $username, string $password, bool $discover): bool { protected static function configure(): Config {
$action = $discover ? "discover" : "download"; $userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://thearsse.com/) PicoFeed (https://github.com/miniflux/picoFeed)',
Arsse::VERSION, // Arsse version
php_uname('s'), // OS
php_uname('r'), // OS version
php_uname('m') // platform architecture
);
$config = new Config;
$config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
$config->setClientTimeout(Arsse::$conf->fetchTimeout);
$config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
$config->setClientUserAgent($userAgent);
$config->setGrabberUserAgent($userAgent);
return $config;
}
protected static function download(string $url, string $lastModified, string $etag, string $username, string $password): Client {
try { try {
$this->reader = new Reader($this->config); $reader = new Reader(self::configure());
$this->resource = $this->reader->$action($url, $lastModified, $etag, $username, $password); $client = $reader->download($url, $lastModified, $etag, $username, $password);
$client->reader = $reader;
return $client;
} catch (PicoFeedException $e) { } catch (PicoFeedException $e) {
throw new Feed\Exception($url, $e); throw new Feed\Exception($url, $e);
} }
return true;
} }
protected function parse(): bool { protected function parse(): bool {
try { try {
$this->parser = $this->reader->getParser( $feed = $this->resource->reader->getParser(
$this->resource->getUrl(), $this->resource->getUrl(),
$this->resource->getContent(), $this->resource->getContent(),
$this->resource->getEncoding() $this->resource->getEncoding()
); )->execute();
$feed = $this->parser->execute();
// Grab the favicon for the feed; returns an empty string if it cannot find one. // Grab the favicon for the feed; returns an empty string if it cannot find one.
// Some feeds might use a different domain (eg: feedburner), so the site url is // Some feeds might use a different domain (eg: feedburner), so the site url is
// used instead of the feed's url. // used instead of the feed's url.
@ -388,7 +409,7 @@ class Feed {
} }
protected function scrape(): bool { protected function scrape(): bool {
$scraper = new Scraper($this->config); $scraper = new Scraper(self::configure());
foreach (array_merge($this->newItems, $this->changedItems) as $item) { foreach (array_merge($this->newItems, $this->changedItems) as $item) {
$scraper->setUrl($item->url); $scraper->setUrl($item->url);
$scraper->execute(); $scraper->execute();

View file

@ -134,12 +134,13 @@ class TestFeed extends Test\AbstractTest {
} }
public function testDiscoverAFeedSuccessfully() { public function testDiscoverAFeedSuccessfully() {
$this->assertInstanceOf(Feed::class, new Feed(null, $this->base."Discovery/Valid", "", "", "", "", false, true)); $this->assertSame($this->base."Discovery/Feed", Feed::discover($this->base."Discovery/Valid"));
$this->assertSame($this->base."Discovery/Feed", Feed::discover($this->base."Discovery/Feed"));
} }
public function testDiscoverAFeedUnsuccessfully() { public function testDiscoverAFeedUnsuccessfully() {
$this->assertException("subscriptionNotFound", "Feed"); $this->assertException("subscriptionNotFound", "Feed");
new Feed(null, $this->base."Discovery/Invalid", "", "", "", "", false, true); Feed::discover($this->base."Discovery/Invalid");
} }
public function testParseEntityExpansionAttack() { public function testParseEntityExpansionAttack() {

View file

@ -133,9 +133,9 @@ trait SeriesSubscription {
$feedID = $this->nextID("arsse_feeds"); $feedID = $this->nextID("arsse_feeds");
$subID = $this->nextID("arsse_subscriptions"); $subID = $this->nextID("arsse_subscriptions");
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true); Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url)); $this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false));
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd"); Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
Phake::verify(Arsse::$db)->feedUpdate($feedID, true, true); Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
$state = $this->primeExpectations($this->data, [ $state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'], 'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'], 'arsse_subscriptions' => ['id','owner','feed'],
@ -145,15 +145,33 @@ trait SeriesSubscription {
$this->compareExpectations($state); $this->compareExpectations($state);
} }
public function testAddASubscriptionToANewFeedViaDiscovery() {
$url = "http://localhost:8000/Feed/Discovery/Valid";
$discovered = "http://localhost:8000/Feed/Discovery/Feed";
$feedID = $this->nextID("arsse_feeds");
$subID = $this->nextID("arsse_subscriptions");
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true));
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],
]);
$state['arsse_feeds']['rows'][] = [$feedID,$discovered,"",""];
$state['arsse_subscriptions']['rows'][] = [$subID,$this->user,$feedID];
$this->compareExpectations($state);
}
public function testAddASubscriptionToAnInvalidFeed() { public function testAddASubscriptionToAnInvalidFeed() {
$url = "http://example.org/feed1"; $url = "http://example.org/feed1";
$feedID = $this->nextID("arsse_feeds"); $feedID = $this->nextID("arsse_feeds");
Phake::when(Arsse::$db)->feedUpdate->thenThrow(new FeedException($url, new \PicoFeed\Client\InvalidUrlException())); Phake::when(Arsse::$db)->feedUpdate->thenThrow(new FeedException($url, new \PicoFeed\Client\InvalidUrlException()));
try { try {
Arsse::$db->subscriptionAdd($this->user, $url); Arsse::$db->subscriptionAdd($this->user, $url, "", "", false);
} catch (FeedException $e) { } catch (FeedException $e) {
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd"); Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
Phake::verify(Arsse::$db)->feedUpdate($feedID, true, true); Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
$state = $this->primeExpectations($this->data, [ $state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'], 'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'], 'arsse_subscriptions' => ['id','owner','feed'],