1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 21:22:40 +00:00

Perform feed discovery correctly; fixes #118

This commit is contained in:
J. King 2017-10-02 11:53:52 -04:00
parent 1b72d45adf
commit a80e283abc
4 changed files with 84 additions and 38 deletions

View file

@ -411,13 +411,19 @@ class Database {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
// check to see if the feed exists
$feedID = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str")->run($url, $fetchUser, $fetchPassword)->getValue();
$check = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str");
$feedID = $check->run($url, $fetchUser, $fetchPassword)->getValue();
if ($discover && is_null($feedID)) {
// if the feed doesn't exist, first perform discovery if requested and check for the existence of that URL
$url = Feed::discover($url, $fetchUser, $fetchPassword);
$feedID = $check->run($url, $fetchUser, $fetchPassword)->getValue();
}
if (is_null($feedID)) {
// if the feed doesn't exist add it to the database; we do this unconditionally so as to lock SQLite databases for as little time as possible
// if the feed still doesn't exist in the database, add it to the database; we do this unconditionally so as to lock SQLite databases for as little time as possible
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
try {
// perform an initial update on the newly added feed
$this->feedUpdate($feedID, true, $discover);
$this->feedUpdate($feedID, true);
} catch (\Throwable $e) {
// if the update fails, delete the feed we just added
$this->db->prepare('DELETE from arsse_feeds where id is ?', 'int')->run($feedID);
@ -548,7 +554,7 @@ class Database {
return array_column($feeds, 'id');
}
public function feedUpdate($feedID, bool $throwError = false, bool $discover = false): bool {
public function feedUpdate($feedID, bool $throwError = false): bool {
$tr = $this->db->begin();
// check to make sure the feed exists
if (!ValueInfo::id($feedID)) {
@ -564,7 +570,7 @@ class Database {
// here. When an exception is thrown it should update the database with the
// error instead of failing; if other exceptions are thrown, we should simply roll back
try {
$feed = new Feed((int) $feedID, $f['url'], (string) Date::transform($f['modified'], "http", "sql"), $f['etag'], $f['username'], $f['password'], $scrape, $discover);
$feed = new Feed((int) $feedID, $f['url'], (string) Date::transform($f['modified'], "http", "sql"), $f['etag'], $f['username'], $f['password'], $scrape);
if (!$feed->modified) {
// if the feed hasn't changed, just compute the next fetch time and record it
$this->db->prepare("UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?", 'datetime', 'int')->run($feed->nextFetch, $feedID);

View file

@ -5,6 +5,7 @@ namespace JKingWeb\Arsse;
use JKingWeb\Arsse\Misc\Date;
use PicoFeed\PicoFeedException;
use PicoFeed\Config\Config;
use PicoFeed\Client\Client;
use PicoFeed\Reader\Reader;
use PicoFeed\Reader\Favicon;
use PicoFeed\Scraper\Scraper;
@ -12,8 +13,6 @@ use PicoFeed\Scraper\Scraper;
class Feed {
public $data = null;
public $favicon;
public $parser;
public $reader;
public $resource;
public $modified = false;
public $lastModified;
@ -21,22 +20,30 @@ class Feed {
public $newItems = [];
public $changedItems = [];
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false, bool $discover = false) {
// set the configuration
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)',
Arsse::VERSION, // Arsse version
php_uname('s'), // OS
php_uname('r'), // OS version
php_uname('m') // platform architecture
);
$this->config = new Config;
$this->config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
$this->config->setClientTimeout(Arsse::$conf->fetchTimeout);
$this->config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
$this->config->setClientUserAgent($userAgent);
$this->config->setGrabberUserAgent($userAgent);
public static function discover(string $url, string $username = '', string $password = ''): string {
// fetch the candidate feed
$f = self::download($url, "", "", $username, $password);
if ($f->reader->detectFormat($f->getContent())) {
// if the prospective URL is a feed, use it
$out = $url;
} else {
$links = $f->reader->find($f->getUrl(), $f->getContent());
if (!$links) {
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
libxml_use_internal_errors(false);
throw new Feed\Exception($url, new \PicoFeed\Reader\SubscriptionNotFoundException('Unable to find a subscription'));
} else {
$out = $links[0];
}
}
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
libxml_use_internal_errors(false);
return $out;
}
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false) {
// fetch the feed
$this->download($url, $lastModified, $etag, $username, $password, $discover);
$this->resource = self::download($url, $lastModified, $etag, $username, $password);
// format the HTTP Last-Modified date returned
$lastMod = $this->resource->getLastModified();
if (strlen($lastMod)) {
@ -65,26 +72,40 @@ class Feed {
$this->nextFetch = $this->computeNextFetch();
}
protected function download(string $url, string $lastModified, string $etag, string $username, string $password, bool $discover): bool {
$action = $discover ? "discover" : "download";
protected static function configure(): Config {
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://thearsse.com/) PicoFeed (https://github.com/miniflux/picoFeed)',
Arsse::VERSION, // Arsse version
php_uname('s'), // OS
php_uname('r'), // OS version
php_uname('m') // platform architecture
);
$config = new Config;
$config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
$config->setClientTimeout(Arsse::$conf->fetchTimeout);
$config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
$config->setClientUserAgent($userAgent);
$config->setGrabberUserAgent($userAgent);
return $config;
}
protected static function download(string $url, string $lastModified, string $etag, string $username, string $password): Client {
try {
$this->reader = new Reader($this->config);
$this->resource = $this->reader->$action($url, $lastModified, $etag, $username, $password);
$reader = new Reader(self::configure());
$client = $reader->download($url, $lastModified, $etag, $username, $password);
$client->reader = $reader;
return $client;
} catch (PicoFeedException $e) {
throw new Feed\Exception($url, $e);
}
return true;
}
protected function parse(): bool {
try {
$this->parser = $this->reader->getParser(
$feed = $this->resource->reader->getParser(
$this->resource->getUrl(),
$this->resource->getContent(),
$this->resource->getEncoding()
);
$feed = $this->parser->execute();
)->execute();
// Grab the favicon for the feed; returns an empty string if it cannot find one.
// Some feeds might use a different domain (eg: feedburner), so the site url is
// used instead of the feed's url.
@ -388,7 +409,7 @@ class Feed {
}
protected function scrape(): bool {
$scraper = new Scraper($this->config);
$scraper = new Scraper(self::configure());
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
$scraper->setUrl($item->url);
$scraper->execute();

View file

@ -134,12 +134,13 @@ class TestFeed extends Test\AbstractTest {
}
public function testDiscoverAFeedSuccessfully() {
$this->assertInstanceOf(Feed::class, new Feed(null, $this->base."Discovery/Valid", "", "", "", "", false, true));
$this->assertSame($this->base."Discovery/Feed", Feed::discover($this->base."Discovery/Valid"));
$this->assertSame($this->base."Discovery/Feed", Feed::discover($this->base."Discovery/Feed"));
}
public function testDiscoverAFeedUnsuccessfully() {
$this->assertException("subscriptionNotFound", "Feed");
new Feed(null, $this->base."Discovery/Invalid", "", "", "", "", false, true);
Feed::discover($this->base."Discovery/Invalid");
}
public function testParseEntityExpansionAttack() {

View file

@ -133,9 +133,9 @@ trait SeriesSubscription {
$feedID = $this->nextID("arsse_feeds");
$subID = $this->nextID("arsse_subscriptions");
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url));
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false));
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
Phake::verify(Arsse::$db)->feedUpdate($feedID, true, true);
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],
@ -145,15 +145,33 @@ trait SeriesSubscription {
$this->compareExpectations($state);
}
public function testAddASubscriptionToANewFeedViaDiscovery() {
$url = "http://localhost:8000/Feed/Discovery/Valid";
$discovered = "http://localhost:8000/Feed/Discovery/Feed";
$feedID = $this->nextID("arsse_feeds");
$subID = $this->nextID("arsse_subscriptions");
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true));
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],
]);
$state['arsse_feeds']['rows'][] = [$feedID,$discovered,"",""];
$state['arsse_subscriptions']['rows'][] = [$subID,$this->user,$feedID];
$this->compareExpectations($state);
}
public function testAddASubscriptionToAnInvalidFeed() {
$url = "http://example.org/feed1";
$feedID = $this->nextID("arsse_feeds");
Phake::when(Arsse::$db)->feedUpdate->thenThrow(new FeedException($url, new \PicoFeed\Client\InvalidUrlException()));
try {
Arsse::$db->subscriptionAdd($this->user, $url);
Arsse::$db->subscriptionAdd($this->user, $url, "", "", false);
} catch (FeedException $e) {
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
Phake::verify(Arsse::$db)->feedUpdate($feedID, true, true);
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
$state = $this->primeExpectations($this->data, [
'arsse_feeds' => ['id','url','username','password'],
'arsse_subscriptions' => ['id','owner','feed'],