1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2025-01-08 17:02:41 +00:00

Feed tests: fetching and parsing

This commit is contained in:
J. King 2017-05-27 18:15:52 -04:00
parent ff59f00356
commit 7656de716b
19 changed files with 290 additions and 12 deletions

4
302EndlessLoop@i=0 Normal file
View file

@ -0,0 +1,4 @@
<br />
<b>Warning</b>: Unknown: failed to open stream: No such file or directory in <b>Unknown</b> on line <b>0</b><br />
<br />
<b>Fatal error</b>: Unknown: Failed opening required '%base%\server.php' (include_path='.;C:\php\pear') in <b>Unknown</b> on line <b>0</b><br />

View file

@ -64,7 +64,7 @@ abstract class AbstractException extends \Exception {
"Feed/Exception.timeout" => 10505, "Feed/Exception.timeout" => 10505,
"Feed/Exception.forbidden" => 10506, "Feed/Exception.forbidden" => 10506,
"Feed/Exception.unauthorized" => 10507, "Feed/Exception.unauthorized" => 10507,
"Feed/Exception.malformed" => 10511, "Feed/Exception.malformedXml" => 10511,
"Feed/Exception.xmlEntity" => 10512, "Feed/Exception.xmlEntity" => 10512,
"Feed/Exception.subscriptionNotFound" => 10521, "Feed/Exception.subscriptionNotFound" => 10521,
"Feed/Exception.unsupportedFeedFormat" => 10522, "Feed/Exception.unsupportedFeedFormat" => 10522,

View file

@ -28,10 +28,12 @@ class Conf {
public $userComposeNames = true; public $userComposeNames = true;
public $userTempPasswordLength = 20; public $userTempPasswordLength = 20;
public $userAgentString; public $fetchTimeout = 10;
public $fetchSizeLimit = 2 * 1024 * 1024;
public $fetchUserAgentString;
public function __construct(string $import_file = "") { public function __construct(string $import_file = "") {
$this->userAgentString = sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)', $this->fetchUserAgentString = sprintf('Arsse/%s (%s %s; %s; https://code.jkingweb.ca/jking/arsse) PicoFeed (https://github.com/fguillot/picoFeed)',
VERSION, // Arsse version VERSION, // Arsse version
php_uname('s'), // OS php_uname('s'), // OS
php_uname('r'), // OS version php_uname('r'), // OS version

View file

@ -196,7 +196,7 @@ class Database {
$value = $in; $value = $in;
break; break;
} }
return (bool) $this->db->prepare("REPLACE INTO arsse_settings(key,value,type) values(?,?,?)", "str", "str", "str")->run($key, $value, $type)->changes(); return (bool) $this->db->prepare("REPLACE INTO arsse_settings(key,value,type) values(?,?,?)", "str", "str", "str")->run($key, $value, $type)->changes(); // FIXME: this will not work on PostgreSQL
} }
public function settingRemove(string $key): bool { public function settingRemove(string $key): bool {

View file

@ -44,8 +44,11 @@ class Feed {
public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool { public function download(string $url, string $lastModified = '', string $etag = '', string $username = '', string $password = ''): bool {
try { try {
$config = new Config; $config = new Config;
$config->setClientUserAgent(Data::$conf->userAgentString); $config->setMaxBodySize(Data::$conf->fetchSizeLimit);
$config->setGrabberUserAgent(Data::$conf->userAgentString); $config->setClientTimeout(Data::$conf->fetchTimeout);
$config->setGrabberTimeout(Data::$conf->fetchTimeout);
$config->setClientUserAgent(Data::$conf->fetchUserAgentString);
$config->setGrabberUserAgent(Data::$conf->fetchUserAgentString);
$this->reader = new Reader($config); $this->reader = new Reader($config);
$this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password); $this->resource = $this->reader->download($url, $lastModified, $etag, $username, $password);
@ -102,9 +105,10 @@ class Feed {
$f->titleContentHash = hash('sha256', $f->title.$content); $f->titleContentHash = hash('sha256', $f->title.$content);
} }
// If there is an id element then continue. The id is used already. // If there is an Atom id element use it as the id.
$id = (string)$f->xml->id; $id = (string)$f->xml->children('http://www.w3.org/2005/Atom')->id;
if ($id !== '') { if ($id !== '') {
$f->id = hash('sha256', $id);
continue; continue;
} }

View file

@ -93,8 +93,8 @@ return [
'Exception.JKingWeb/Arsse/Feed/Exception.timeout' => 'Could not download feed "{url}" because its server timed out', 'Exception.JKingWeb/Arsse/Feed/Exception.timeout' => 'Could not download feed "{url}" because its server timed out',
'Exception.JKingWeb/Arsse/Feed/Exception.forbidden' => 'Could not download feed "{url}" because you do not have permission to access it', 'Exception.JKingWeb/Arsse/Feed/Exception.forbidden' => 'Could not download feed "{url}" because you do not have permission to access it',
'Exception.JKingWeb/Arsse/Feed/Exception.unauthorized' => 'Could not download feed "{url}" because you provided insufficient or invalid credentials', 'Exception.JKingWeb/Arsse/Feed/Exception.unauthorized' => 'Could not download feed "{url}" because you provided insufficient or invalid credentials',
'Exception.JKingWeb/Arsse/Feed/Exception.malformed' => 'Could not parse feed "{url}" because it is malformed', 'Exception.JKingWeb/Arsse/Feed/Exception.malformedXml' => 'Could not parse feed "{url}" because it is malformed',
'Exception.JKingWeb/Arsse/Feed/Exception.xmlEntity' => 'Refused to parse feed "{url}" because it contains an XXE attack', 'Exception.JKingWeb/Arsse/Feed/Exception.xmlEntity' => 'Refused to parse feed "{url}" because it contains an XXE attack',
'Exception.JKingWeb/Arsse/Feed/Exception.subscriptionNotFound' => 'Unable to find a feed at location "{url}"', 'Exception.JKingWeb/Arsse/Feed/Exception.subscriptionNotFound' => 'Unable to find a feed at location "{url}"',
'Exception.JKingWeb/Arsse/Feed/Exception.unsupportedFeedFormat' => 'Feed "{url}" is of an unsupported format' 'Exception.JKingWeb/Arsse/Feed/Exception.unsupportedFeedFormat' => 'Feed "{url}" is of an unsupported format',
]; ];

View file

@ -13,12 +13,112 @@ class TestFeed extends \PHPUnit\Framework\TestCase {
function setUp() { function setUp() {
if(!@file_get_contents(self::$host."IsUp")) { if(!@file_get_contents(self::$host."IsUp")) {
$this->markTestSkipped("Test Web server is not accepting requests"); $this->markTestSkipped("Test Web server is not accepting requests");
} else if(!extension_loaded('curl')) {
$this->markTestSkipped("Feed tests are only accurate with curl enabled.");
} }
$this->base = self::$host."Feed/"; $this->base = self::$host."Feed/";
$this->clearData(); $this->clearData();
Data::$conf = new Conf(); Data::$conf = new Conf();
} }
function testHandle400() {
$this->assertException("unsupportedFeedFormat", "Feed");
new Feed(null, $this->base."Fetching/Error?code=400");
}
function testHandle401() {
$this->assertException("unauthorized", "Feed");
new Feed(null, $this->base."Fetching/Error?code=401");
}
function testHandle403() {
$this->assertException("forbidden", "Feed");
new Feed(null, $this->base."Fetching/Error?code=403");
}
function testHandle404() {
$this->assertException("invalidUrl", "Feed");
new Feed(null, $this->base."Fetching/Error?code=404");
}
function testHandle500() {
$this->assertException("unsupportedFeedFormat", "Feed");
new Feed(null, $this->base."Fetching/Error?code=500");
}
function testHandleARedirectLoop() {
$this->assertException("maxRedirect", "Feed");
new Feed(null, $this->base."Fetching/EndlessLoop?i=0");
}
function testHandleATimeout() {
Data::$conf->fetchTimeout = 1;
$this->assertException("timeout", "Feed");
new Feed(null, $this->base."Fetching/Timeout");
}
function testHandleAnOverlyLargeFeed() {
Data::$conf->fetchSizeLimit = 512;
$this->assertException("maxSize", "Feed");
new Feed(null, $this->base."Fetching/TooLarge");
}
function testHandleACertificateError() {
$this->assertException("invalidCertificate", "Feed");
new Feed(null, "https://localhost:8000/");
}
function testParseAFeed() {
// test that various properties are set on the feed and on items
$f = new Feed(null, $this->base."Parsing/Valid");
$this->assertTrue(isset($f->lastModified));
$this->assertTrue(isset($f->nextFetch));
// check ID preference cascade
$h0 = "0a4f0e3768c8a5e9d8d9a16545ae4ff5b097f6dac3ad49555a94a7cace68ba73"; // hash of Atom ID
$h1 = "a135beced0236b723d12f845ff20ec22d4fc3afe1130012618f027170d57cb4e"; // hash of RSS2 GUID
$h2 = "205e986f4f8b3acfa281227beadb14f5e8c32c8dae4737f888c94c0df49c56f8"; // hash of Dublin Core identifier
$this->assertSame($h0, $f->data->items[0]->id);
$this->assertSame($h1, $f->data->items[1]->id);
$this->assertSame($h2, $f->data->items[2]->id);
// check null hashes
$h3 = "6287ba30f534e404e68356237e809683e311285d8b9f47d046ac58784eece052"; // URL hash
$h4 = "6cbb5d2dcb11610a99eb3f633dc246690c0acf33327bf7534f95542caa8f27c4"; // title hash
$h5 = "2b7c57ffa9adde92ccd1884fa1153a5bcd3211e48d99e27be5414cb078e6891c"; // content/enclosure hash
$this->assertNotEquals("", $f->data->items[3]->urlTitleHash);
$this->assertSame($h3, $f->data->items[3]->urlContentHash);
$this->assertSame("", $f->data->items[3]->titleContentHash);
$this->assertNotEquals("", $f->data->items[4]->urlTitleHash);
$this->assertSame("", $f->data->items[4]->urlContentHash);
$this->assertSame($h4, $f->data->items[4]->titleContentHash);
$this->assertSame("", $f->data->items[5]->urlTitleHash);
$this->assertNotEquals("", $f->data->items[5]->urlContentHash);
$this->assertNotEquals("", $f->data->items[5]->titleContentHash);
// check null IDs
$this->assertSame("", $f->data->items[3]->id);
$this->assertSame("", $f->data->items[4]->id);
$this->assertSame("", $f->data->items[5]->id);
}
function testParseEntityExpansionAttack() {
$this->assertException("xmlEntity", "Feed");
new Feed(null, $this->base."Parsing/XEEAttack");
}
function testParseExternalEntityAttack() {
$this->assertException("xmlEntity", "Feed");
new Feed(null, $this->base."Parsing/XXEAttack");
}
function testParseAnUnsupportedFeed() {
$this->assertException("unsupportedFeedFormat", "Feed");
new Feed(null, $this->base."Parsing/Unsupported");
}
function testParseAMalformedFeed() {
$this->assertException("malformedXml", "Feed");
new Feed(null, $this->base."Parsing/Malformed");
}
function testDeduplicateFeedItems() { function testDeduplicateFeedItems() {
// duplicates with dates lead to the newest match being kept // duplicates with dates lead to the newest match being kept
$t = strtotime("2002-05-19T15:21:36Z"); $t = strtotime("2002-05-19T15:21:36Z");

View file

@ -0,0 +1,7 @@
<?php return [
'code' => 302,
'cache' => false,
'fields' => [
'Location: http://localhost:'.$_SERVER['SERVER_PORT'].$_SERVER['REQUEST_URI']."0",
]
];

View file

@ -0,0 +1,4 @@
<?php return [
'code' => (int) $_GET['code'],
'cache' => false,
];

View file

@ -0,0 +1,6 @@
<?php
sleep(5);
return [
'code' => 404,
'cache' => false,
];

View file

@ -0,0 +1,18 @@
<?php
$item = '
<item>
<description>'.str_repeat("0", 1024).'</description>
</item>';
return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<title>Test feed</title>
<link>http://example.com/</link>
<description>Example newsfeed title</description>
$item
</channel>
</rss>
MESSAGE_BODY
];

View file

@ -0,0 +1,6 @@
<?php return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
MESSAGE_BODY
];

View file

@ -0,0 +1,6 @@
<?php return [
'mime' => "application/xml",
'content' => <<<MESSAGE_BODY
<random-vocabulary/>
MESSAGE_BODY
];

View file

@ -0,0 +1,35 @@
<?php return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<title>Test feed</title>
<link>http://example.com/</link>
<description>Example newsfeed title</description>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier>
<guid>http://example.com/1</guid>
<atom:id>urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f</atom:id> <!-- Correct ID -->
</item>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier>
<guid>http://example.com/1</guid> <!-- Correct ID -->
</item>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier> <!-- Correct ID -->
</item>
<item>
<link>http://example.com/2</link>
</item>
<item>
<title>Example title</title>
</item>
<item>
<description>Example content</description>
<enclosure url="http://example.com/text" type="text/plain"/>
</item>
</channel>
</rss>
MESSAGE_BODY
];

View file

@ -0,0 +1,47 @@
<?php return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<!DOCTYPE test [
<!ENTITY xee0 "XEE">
<!ENTITY xee1 "&xee0;&xee0;&xee0;&xee0;&xee0;&xee0;&xee0;&xee0;&xee0;&xee0;">
<!ENTITY xee2 "&xee1;&xee1;&xee1;&xee1;&xee1;&xee1;&xee1;&xee1;&xee1;&xee1;">
<!ENTITY xee3 "&xee2;&xee2;&xee2;&xee2;&xee2;&xee2;&xee2;&xee2;&xee2;&xee2;">
<!ENTITY xee4 "&xee3;&xee3;&xee3;&xee3;&xee3;&xee3;&xee3;&xee3;&xee3;&xee3;">
<!ENTITY xee5 "&xee4;&xee4;&xee4;&xee4;&xee4;&xee4;&xee4;&xee4;&xee4;&xee4;">
<!ENTITY xee6 "&xee5;&xee5;&xee5;&xee5;&xee5;&xee5;&xee5;&xee5;&xee5;&xee5;">
<!ENTITY xee7 "&xee6;&xee6;&xee6;&xee6;&xee6;&xee6;&xee6;&xee6;&xee6;&xee6;">
<!ENTITY xee8 "&xee7;&xee7;&xee7;&xee7;&xee7;&xee7;&xee7;&xee7;&xee7;&xee7;">
<!ENTITY xee9 "&xee8;&xee8;&xee8;&xee8;&xee8;&xee8;&xee8;&xee8;&xee8;&xee8;">
]>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<title>Test feed</title>
<link>http://example.com/</link>
<description>Example newsfeed title</description>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier>
<guid>http://example.com/1</guid>
<atom:id>urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f</atom:id> <!-- Correct ID -->
</item>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier>
<guid>http://example.com/1</guid> <!-- Correct ID -->
</item>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier> <!-- Correct ID -->
</item>
<item>
<link>http://example.com/2</link>
</item>
<item>
<title>Example title</title>
</item>
<item>
<description>Example content</description>
<enclosure url="http://example.com/text" type="text/plain"/>
</item>
</channel>
</rss>
MESSAGE_BODY
];

View file

@ -0,0 +1,38 @@
<?php return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<!DOCTYPE test [
<!ENTITY xxe SYSTEM "file:///etc/passwd">
]>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<title>Test feed</title>
<link>http://example.com/</link>
<description>&xxe;</description>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier>
<guid>http://example.com/1</guid>
<atom:id>urn:uuid:4c8dbc84-42eb-11e7-9f61-6f83db96854f</atom:id> <!-- Correct ID -->
</item>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier>
<guid>http://example.com/1</guid> <!-- Correct ID -->
</item>
<item>
<dc:identifier>urn:uuid:43fb1908-42ec-11e7-b61b-2b118faca2f2</dc:identifier> <!-- Correct ID -->
</item>
<item>
<link>http://example.com/2</link>
</item>
<item>
<title>Example title</title>
</item>
<item>
<description>Example content</description>
<enclosure url="http://example.com/text" type="text/plain"/>
</item>
</channel>
</rss>
MESSAGE_BODY
];

View file

@ -25,6 +25,7 @@ which include the following data:
*/ */
ignore_user_abort(false);
$defaults = [ // default values for response $defaults = [ // default values for response
'code' => 200, 'code' => 200,
'content' => "", 'content' => "",

View file

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
base=`dirname "$0"` base=`dirname "$0"`
php -n -S localhost:8000 "$base/server.php" >/dev/null & php -n -S localhost:8000 "$base/server.php" >/dev/null 2>/dev/null &
sleep 1s sleep 1s
php "$base/../vendor/phpunit/phpunit/phpunit" -c "$base/phpunit.xml" $* php "$base/../vendor/phpunit/phpunit/phpunit" -c "$base/phpunit.xml" $*
sleep 1s sleep 1s

View file

@ -1,7 +1,7 @@
@echo off @echo off
setlocal setlocal
set base=%~dp0 set base=%~dp0
start /b php -n -S localhost:8000 "%base%\server.php" >nul start /b php -n -S localhost:8000 "%base%\server.php" >nul 2>nul
timeout /nobreak /t 1 >nul timeout /nobreak /t 1 >nul
php "%base%\..\vendor\phpunit\phpunit\phpunit" -c "%base%\phpunit.xml" %* php "%base%\..\vendor\phpunit\phpunit\phpunit" -c "%base%\phpunit.xml" %*
timeout /nobreak /t 1 >nul timeout /nobreak /t 1 >nul