mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 21:22:40 +00:00
Perform feed discovery correctly; fixes #118
This commit is contained in:
parent
1b72d45adf
commit
a80e283abc
4 changed files with 84 additions and 38 deletions
|
@ -411,13 +411,19 @@ class Database {
|
||||||
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
|
||||||
}
|
}
|
||||||
// check to see if the feed exists
|
// check to see if the feed exists
|
||||||
$feedID = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str")->run($url, $fetchUser, $fetchPassword)->getValue();
|
$check = $this->db->prepare("SELECT id from arsse_feeds where url is ? and username is ? and password is ?", "str", "str", "str");
|
||||||
|
$feedID = $check->run($url, $fetchUser, $fetchPassword)->getValue();
|
||||||
|
if ($discover && is_null($feedID)) {
|
||||||
|
// if the feed doesn't exist, first perform discovery if requested and check for the existence of that URL
|
||||||
|
$url = Feed::discover($url, $fetchUser, $fetchPassword);
|
||||||
|
$feedID = $check->run($url, $fetchUser, $fetchPassword)->getValue();
|
||||||
|
}
|
||||||
if (is_null($feedID)) {
|
if (is_null($feedID)) {
|
||||||
// if the feed doesn't exist add it to the database; we do this unconditionally so as to lock SQLite databases for as little time as possible
|
// if the feed still doesn't exist in the database, add it to the database; we do this unconditionally so as to lock SQLite databases for as little time as possible
|
||||||
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
|
$feedID = $this->db->prepare('INSERT INTO arsse_feeds(url,username,password) values(?,?,?)', 'str', 'str', 'str')->run($url, $fetchUser, $fetchPassword)->lastId();
|
||||||
try {
|
try {
|
||||||
// perform an initial update on the newly added feed
|
// perform an initial update on the newly added feed
|
||||||
$this->feedUpdate($feedID, true, $discover);
|
$this->feedUpdate($feedID, true);
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
// if the update fails, delete the feed we just added
|
// if the update fails, delete the feed we just added
|
||||||
$this->db->prepare('DELETE from arsse_feeds where id is ?', 'int')->run($feedID);
|
$this->db->prepare('DELETE from arsse_feeds where id is ?', 'int')->run($feedID);
|
||||||
|
@ -548,7 +554,7 @@ class Database {
|
||||||
return array_column($feeds, 'id');
|
return array_column($feeds, 'id');
|
||||||
}
|
}
|
||||||
|
|
||||||
public function feedUpdate($feedID, bool $throwError = false, bool $discover = false): bool {
|
public function feedUpdate($feedID, bool $throwError = false): bool {
|
||||||
$tr = $this->db->begin();
|
$tr = $this->db->begin();
|
||||||
// check to make sure the feed exists
|
// check to make sure the feed exists
|
||||||
if (!ValueInfo::id($feedID)) {
|
if (!ValueInfo::id($feedID)) {
|
||||||
|
@ -564,7 +570,7 @@ class Database {
|
||||||
// here. When an exception is thrown it should update the database with the
|
// here. When an exception is thrown it should update the database with the
|
||||||
// error instead of failing; if other exceptions are thrown, we should simply roll back
|
// error instead of failing; if other exceptions are thrown, we should simply roll back
|
||||||
try {
|
try {
|
||||||
$feed = new Feed((int) $feedID, $f['url'], (string) Date::transform($f['modified'], "http", "sql"), $f['etag'], $f['username'], $f['password'], $scrape, $discover);
|
$feed = new Feed((int) $feedID, $f['url'], (string) Date::transform($f['modified'], "http", "sql"), $f['etag'], $f['username'], $f['password'], $scrape);
|
||||||
if (!$feed->modified) {
|
if (!$feed->modified) {
|
||||||
// if the feed hasn't changed, just compute the next fetch time and record it
|
// if the feed hasn't changed, just compute the next fetch time and record it
|
||||||
$this->db->prepare("UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?", 'datetime', 'int')->run($feed->nextFetch, $feedID);
|
$this->db->prepare("UPDATE arsse_feeds SET updated = CURRENT_TIMESTAMP, next_fetch = ? WHERE id is ?", 'datetime', 'int')->run($feed->nextFetch, $feedID);
|
||||||
|
|
75
lib/Feed.php
75
lib/Feed.php
|
@ -5,6 +5,7 @@ namespace JKingWeb\Arsse;
|
||||||
use JKingWeb\Arsse\Misc\Date;
|
use JKingWeb\Arsse\Misc\Date;
|
||||||
use PicoFeed\PicoFeedException;
|
use PicoFeed\PicoFeedException;
|
||||||
use PicoFeed\Config\Config;
|
use PicoFeed\Config\Config;
|
||||||
|
use PicoFeed\Client\Client;
|
||||||
use PicoFeed\Reader\Reader;
|
use PicoFeed\Reader\Reader;
|
||||||
use PicoFeed\Reader\Favicon;
|
use PicoFeed\Reader\Favicon;
|
||||||
use PicoFeed\Scraper\Scraper;
|
use PicoFeed\Scraper\Scraper;
|
||||||
|
@ -12,31 +13,37 @@ use PicoFeed\Scraper\Scraper;
|
||||||
class Feed {
|
class Feed {
|
||||||
public $data = null;
|
public $data = null;
|
||||||
public $favicon;
|
public $favicon;
|
||||||
public $parser;
|
|
||||||
public $reader;
|
|
||||||
public $resource;
|
public $resource;
|
||||||
public $modified = false;
|
public $modified = false;
|
||||||
public $lastModified;
|
public $lastModified;
|
||||||
public $nextFetch;
|
public $nextFetch;
|
||||||
public $newItems = [];
|
public $newItems = [];
|
||||||
public $changedItems = [];
|
public $changedItems = [];
|
||||||
|
|
||||||
|
public static function discover(string $url, string $username = '', string $password = ''): string {
|
||||||
|
// fetch the candidate feed
|
||||||
|
$f = self::download($url, "", "", $username, $password);
|
||||||
|
if ($f->reader->detectFormat($f->getContent())) {
|
||||||
|
// if the prospective URL is a feed, use it
|
||||||
|
$out = $url;
|
||||||
|
} else {
|
||||||
|
$links = $f->reader->find($f->getUrl(), $f->getContent());
|
||||||
|
if (!$links) {
|
||||||
|
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
|
||||||
|
libxml_use_internal_errors(false);
|
||||||
|
throw new Feed\Exception($url, new \PicoFeed\Reader\SubscriptionNotFoundException('Unable to find a subscription'));
|
||||||
|
} else {
|
||||||
|
$out = $links[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// work around a PicoFeed memory leak FIXME: remove this hack (or not) once PicoFeed stops leaking memory
|
||||||
|
libxml_use_internal_errors(false);
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false, bool $discover = false) {
|
public function __construct(int $feedID = null, string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '', bool $scrape = false) {
|
||||||
// set the configuration
|
|
||||||
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)',
|
|
||||||
Arsse::VERSION, // Arsse version
|
|
||||||
php_uname('s'), // OS
|
|
||||||
php_uname('r'), // OS version
|
|
||||||
php_uname('m') // platform architecture
|
|
||||||
);
|
|
||||||
$this->config = new Config;
|
|
||||||
$this->config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
|
|
||||||
$this->config->setClientTimeout(Arsse::$conf->fetchTimeout);
|
|
||||||
$this->config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
|
|
||||||
$this->config->setClientUserAgent($userAgent);
|
|
||||||
$this->config->setGrabberUserAgent($userAgent);
|
|
||||||
// fetch the feed
|
// fetch the feed
|
||||||
$this->download($url, $lastModified, $etag, $username, $password, $discover);
|
$this->resource = self::download($url, $lastModified, $etag, $username, $password);
|
||||||
// format the HTTP Last-Modified date returned
|
// format the HTTP Last-Modified date returned
|
||||||
$lastMod = $this->resource->getLastModified();
|
$lastMod = $this->resource->getLastModified();
|
||||||
if (strlen($lastMod)) {
|
if (strlen($lastMod)) {
|
||||||
|
@ -65,26 +72,40 @@ class Feed {
|
||||||
$this->nextFetch = $this->computeNextFetch();
|
$this->nextFetch = $this->computeNextFetch();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function download(string $url, string $lastModified, string $etag, string $username, string $password, bool $discover): bool {
|
protected static function configure(): Config {
|
||||||
$action = $discover ? "discover" : "download";
|
$userAgent = Arsse::$conf->fetchUserAgentString ?? sprintf('Arsse/%s (%s %s; %s; https://thearsse.com/) PicoFeed (https://github.com/miniflux/picoFeed)',
|
||||||
|
Arsse::VERSION, // Arsse version
|
||||||
|
php_uname('s'), // OS
|
||||||
|
php_uname('r'), // OS version
|
||||||
|
php_uname('m') // platform architecture
|
||||||
|
);
|
||||||
|
$config = new Config;
|
||||||
|
$config->setMaxBodySize(Arsse::$conf->fetchSizeLimit);
|
||||||
|
$config->setClientTimeout(Arsse::$conf->fetchTimeout);
|
||||||
|
$config->setGrabberTimeout(Arsse::$conf->fetchTimeout);
|
||||||
|
$config->setClientUserAgent($userAgent);
|
||||||
|
$config->setGrabberUserAgent($userAgent);
|
||||||
|
return $config;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static function download(string $url, string $lastModified, string $etag, string $username, string $password): Client {
|
||||||
try {
|
try {
|
||||||
$this->reader = new Reader($this->config);
|
$reader = new Reader(self::configure());
|
||||||
$this->resource = $this->reader->$action($url, $lastModified, $etag, $username, $password);
|
$client = $reader->download($url, $lastModified, $etag, $username, $password);
|
||||||
|
$client->reader = $reader;
|
||||||
|
return $client;
|
||||||
} catch (PicoFeedException $e) {
|
} catch (PicoFeedException $e) {
|
||||||
throw new Feed\Exception($url, $e);
|
throw new Feed\Exception($url, $e);
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parse(): bool {
|
protected function parse(): bool {
|
||||||
try {
|
try {
|
||||||
$this->parser = $this->reader->getParser(
|
$feed = $this->resource->reader->getParser(
|
||||||
$this->resource->getUrl(),
|
$this->resource->getUrl(),
|
||||||
$this->resource->getContent(),
|
$this->resource->getContent(),
|
||||||
$this->resource->getEncoding()
|
$this->resource->getEncoding()
|
||||||
);
|
)->execute();
|
||||||
$feed = $this->parser->execute();
|
|
||||||
|
|
||||||
// Grab the favicon for the feed; returns an empty string if it cannot find one.
|
// Grab the favicon for the feed; returns an empty string if it cannot find one.
|
||||||
// Some feeds might use a different domain (eg: feedburner), so the site url is
|
// Some feeds might use a different domain (eg: feedburner), so the site url is
|
||||||
// used instead of the feed's url.
|
// used instead of the feed's url.
|
||||||
|
@ -388,7 +409,7 @@ class Feed {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function scrape(): bool {
|
protected function scrape(): bool {
|
||||||
$scraper = new Scraper($this->config);
|
$scraper = new Scraper(self::configure());
|
||||||
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
|
foreach (array_merge($this->newItems, $this->changedItems) as $item) {
|
||||||
$scraper->setUrl($item->url);
|
$scraper->setUrl($item->url);
|
||||||
$scraper->execute();
|
$scraper->execute();
|
||||||
|
|
|
@ -134,12 +134,13 @@ class TestFeed extends Test\AbstractTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testDiscoverAFeedSuccessfully() {
|
public function testDiscoverAFeedSuccessfully() {
|
||||||
$this->assertInstanceOf(Feed::class, new Feed(null, $this->base."Discovery/Valid", "", "", "", "", false, true));
|
$this->assertSame($this->base."Discovery/Feed", Feed::discover($this->base."Discovery/Valid"));
|
||||||
|
$this->assertSame($this->base."Discovery/Feed", Feed::discover($this->base."Discovery/Feed"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testDiscoverAFeedUnsuccessfully() {
|
public function testDiscoverAFeedUnsuccessfully() {
|
||||||
$this->assertException("subscriptionNotFound", "Feed");
|
$this->assertException("subscriptionNotFound", "Feed");
|
||||||
new Feed(null, $this->base."Discovery/Invalid", "", "", "", "", false, true);
|
Feed::discover($this->base."Discovery/Invalid");
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testParseEntityExpansionAttack() {
|
public function testParseEntityExpansionAttack() {
|
||||||
|
|
|
@ -133,9 +133,9 @@ trait SeriesSubscription {
|
||||||
$feedID = $this->nextID("arsse_feeds");
|
$feedID = $this->nextID("arsse_feeds");
|
||||||
$subID = $this->nextID("arsse_subscriptions");
|
$subID = $this->nextID("arsse_subscriptions");
|
||||||
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
|
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
|
||||||
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url));
|
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", false));
|
||||||
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
|
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
|
||||||
Phake::verify(Arsse::$db)->feedUpdate($feedID, true, true);
|
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
|
||||||
$state = $this->primeExpectations($this->data, [
|
$state = $this->primeExpectations($this->data, [
|
||||||
'arsse_feeds' => ['id','url','username','password'],
|
'arsse_feeds' => ['id','url','username','password'],
|
||||||
'arsse_subscriptions' => ['id','owner','feed'],
|
'arsse_subscriptions' => ['id','owner','feed'],
|
||||||
|
@ -145,15 +145,33 @@ trait SeriesSubscription {
|
||||||
$this->compareExpectations($state);
|
$this->compareExpectations($state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testAddASubscriptionToANewFeedViaDiscovery() {
|
||||||
|
$url = "http://localhost:8000/Feed/Discovery/Valid";
|
||||||
|
$discovered = "http://localhost:8000/Feed/Discovery/Feed";
|
||||||
|
$feedID = $this->nextID("arsse_feeds");
|
||||||
|
$subID = $this->nextID("arsse_subscriptions");
|
||||||
|
Phake::when(Arsse::$db)->feedUpdate->thenReturn(true);
|
||||||
|
$this->assertSame($subID, Arsse::$db->subscriptionAdd($this->user, $url, "", "", true));
|
||||||
|
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
|
||||||
|
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
|
||||||
|
$state = $this->primeExpectations($this->data, [
|
||||||
|
'arsse_feeds' => ['id','url','username','password'],
|
||||||
|
'arsse_subscriptions' => ['id','owner','feed'],
|
||||||
|
]);
|
||||||
|
$state['arsse_feeds']['rows'][] = [$feedID,$discovered,"",""];
|
||||||
|
$state['arsse_subscriptions']['rows'][] = [$subID,$this->user,$feedID];
|
||||||
|
$this->compareExpectations($state);
|
||||||
|
}
|
||||||
|
|
||||||
public function testAddASubscriptionToAnInvalidFeed() {
|
public function testAddASubscriptionToAnInvalidFeed() {
|
||||||
$url = "http://example.org/feed1";
|
$url = "http://example.org/feed1";
|
||||||
$feedID = $this->nextID("arsse_feeds");
|
$feedID = $this->nextID("arsse_feeds");
|
||||||
Phake::when(Arsse::$db)->feedUpdate->thenThrow(new FeedException($url, new \PicoFeed\Client\InvalidUrlException()));
|
Phake::when(Arsse::$db)->feedUpdate->thenThrow(new FeedException($url, new \PicoFeed\Client\InvalidUrlException()));
|
||||||
try {
|
try {
|
||||||
Arsse::$db->subscriptionAdd($this->user, $url);
|
Arsse::$db->subscriptionAdd($this->user, $url, "", "", false);
|
||||||
} catch (FeedException $e) {
|
} catch (FeedException $e) {
|
||||||
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
|
Phake::verify(Arsse::$user)->authorize($this->user, "subscriptionAdd");
|
||||||
Phake::verify(Arsse::$db)->feedUpdate($feedID, true, true);
|
Phake::verify(Arsse::$db)->feedUpdate($feedID, true);
|
||||||
$state = $this->primeExpectations($this->data, [
|
$state = $this->primeExpectations($this->data, [
|
||||||
'arsse_feeds' => ['id','url','username','password'],
|
'arsse_feeds' => ['id','url','username','password'],
|
||||||
'arsse_subscriptions' => ['id','owner','feed'],
|
'arsse_subscriptions' => ['id','owner','feed'],
|
||||||
|
|
Loading…
Reference in a new issue