2017-03-18 11:01:23 -05:00
|
|
|
<?php
|
2017-03-28 18:19:12 -05:00
|
|
|
declare(strict_types=1);
|
2017-03-27 23:12:12 -05:00
|
|
|
namespace JKingWeb\Arsse;
|
2017-03-18 11:01:23 -05:00
|
|
|
use PicoFeed\Reader\Reader;
|
|
|
|
use PicoFeed\PicoFeedException;
|
|
|
|
use PicoFeed\Reader\Favicon;
|
2017-03-28 18:19:12 -05:00
|
|
|
use PicoFeed\Config\Config;
|
2017-03-18 11:01:23 -05:00
|
|
|
|
|
|
|
class Feed {
|
2017-03-26 15:16:15 -05:00
|
|
|
public $data = null;
|
|
|
|
public $favicon;
|
|
|
|
public $parser;
|
2017-03-18 11:01:23 -05:00
|
|
|
public $reader;
|
|
|
|
public $resource;
|
|
|
|
|
2017-03-26 15:16:15 -05:00
|
|
|
public function __construct(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = '') {
|
2017-03-18 11:01:23 -05:00
|
|
|
try {
|
2017-03-28 18:19:12 -05:00
|
|
|
$config = new Config;
|
|
|
|
$config->setClientUserAgent(Data::$conf->userAgentString);
|
|
|
|
$config->setGrabberUserAgent(Data::$conf->userAgentString);
|
|
|
|
|
|
|
|
$this->reader = new Reader($config);
|
2017-04-01 15:42:10 -04:00
|
|
|
$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
|
2017-03-18 11:01:23 -05:00
|
|
|
// Grab the favicon for the feed; returns an empty string if it cannot find one.
|
2017-03-26 15:16:15 -05:00
|
|
|
$this->favicon = (new Favicon)->find($url);
|
2017-03-18 11:01:23 -05:00
|
|
|
} catch (PicoFeedException $e) {
|
|
|
|
throw new Feed\Exception($url, $e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function parse(): bool {
|
|
|
|
try {
|
|
|
|
$this->parser = $this->reader->getParser(
|
2017-04-01 15:42:10 -04:00
|
|
|
$this->resource->getUrl(),
|
|
|
|
$this->resource->getContent(),
|
|
|
|
$this->resource->getEncoding()
|
2017-03-18 11:01:23 -05:00
|
|
|
);
|
|
|
|
$feed = $this->parser->execute();
|
|
|
|
} catch (PicoFeedException $e) {
|
|
|
|
throw new Feed\Exception($url, $e);
|
|
|
|
}
|
|
|
|
|
|
|
|
// PicoFeed does not provide valid ids when there is no id element. Its solution
|
|
|
|
// of hashing the url, title, and content together for the id if there is no id
|
|
|
|
// element is stupid. Many feeds are frankenstein mixtures of Atom and RSS, but
|
|
|
|
// some are pure RSS with guid elements while others use the Dublin Core spec for
|
|
|
|
// identification. These feeds shouldn't be duplicated when updated. That should
|
|
|
|
// only be reserved for severely broken feeds.
|
|
|
|
|
|
|
|
foreach ($feed->items as &$f) {
|
2017-03-26 15:16:15 -05:00
|
|
|
// Hashes used for comparison to check for updates and also to identify when an
|
|
|
|
// id doesn't exist.
|
2017-04-01 15:42:10 -04:00
|
|
|
$f->urlTitleHash = hash('sha256', $f->url.$f->title);
|
|
|
|
$f->urlContentHash = hash('sha256', $f->url.$f->content.$f->enclosureUrl.$f->enclosureType);
|
|
|
|
$f->titleContentHash = hash('sha256', $f->title.$f->content.$f->enclosureUrl.$f->enclosureType);
|
2017-03-26 15:16:15 -05:00
|
|
|
|
2017-03-18 11:01:23 -05:00
|
|
|
// If there is an id element then continue. The id is used already.
|
|
|
|
$id = (string)$f->xml->id;
|
|
|
|
if ($id !== '') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a guid element use it as the id.
|
|
|
|
$id = (string)$f->xml->guid;
|
|
|
|
if ($id !== '') {
|
|
|
|
$f->id = hash('sha256', $id);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a Dublin Core identifier use it.
|
|
|
|
$id = (string)$f->xml->children('http://purl.org/dc/elements/1.1/')->identifier;
|
|
|
|
if ($id !== '') {
|
|
|
|
$f->id = hash('sha256', $id);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-03-26 15:16:15 -05:00
|
|
|
// If there aren't any of those there is no id.
|
2017-03-18 11:01:23 -05:00
|
|
|
$f->id = '';
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->data = $feed;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|