1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 13:12:41 +00:00

Fixed more Feed bugs

- Two items with differing IDs but identical hashes were matching in the deduplicator; they shouldn't
- They would also match in the database matcher, and shouldn't
- The second-pass database matcher was overaggressively finding items as new
This commit is contained in:
J. King 2017-05-23 22:15:57 -04:00
parent fd7d1c3192
commit 6771e8916a
3 changed files with 46 additions and 3 deletions

View file

@ -139,6 +139,8 @@ class Feed {
$out = []; $out = [];
foreach($items as $item) { foreach($items as $item) {
foreach($out as $index => $check) { foreach($out as $index => $check) {
// if the two items both have IDs and they differ, they do not match, regardless of hashes
if($item->id && $check->id && $item->id != $check->id) continue;
// if the two items have the same ID or any one hash matches, they are two versions of the same item // if the two items have the same ID or any one hash matches, they are two versions of the same item
if( if(
($item->id && $check->id && $item->id == $check->id) || ($item->id && $check->id && $item->id == $check->id) ||
@ -185,6 +187,8 @@ class Feed {
foreach($items as $index => $i) { foreach($items as $index => $i) {
$found = false; $found = false;
foreach($articles as $a) { foreach($articles as $a) {
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
if($i->id && $i->id !== $a['guid']) continue;
if( if(
// the item matches if the GUID matches... // the item matches if the GUID matches...
($i->id && $i->id === $a['guid']) || ($i->id && $i->id === $a['guid']) ||
@ -228,6 +232,8 @@ class Feed {
$i = $items[$index]; $i = $items[$index];
$found = false; $found = false;
foreach($articles as $a) { foreach($articles as $a) {
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
if($i->id && $i->id !== $a['guid']) continue;
if( if(
// the item matches if the GUID matches... // the item matches if the GUID matches...
($i->id && $i->id === $a['guid']) || ($i->id && $i->id === $a['guid']) ||
@ -252,9 +258,6 @@ class Feed {
$found = true; $found = true;
break; break;
} }
} else {
// if we don't have a match, add the item to the definite new list
$new[] = $index;
} }
} }
if(!$found) $new[] = $index; if(!$found) $new[] = $index;

View file

@ -27,6 +27,9 @@ class TestFeed extends \PHPUnit\Framework\TestCase {
$f = new Feed(null, $this->base."Deduplication/ID-Dates"); $f = new Feed(null, $this->base."Deduplication/ID-Dates");
$this->assertCount(2, $f->newItems); $this->assertCount(2, $f->newItems);
$this->assertTime($t, $f->newItems[0]->updatedDate); $this->assertTime($t, $f->newItems[0]->updatedDate);
$f = new Feed(null, $this->base."Deduplication/IdenticalHashes");
$this->assertCount(2, $f->newItems);
$this->assertTime($t, $f->newItems[0]->updatedDate);
} }
function testHandleCacheHeadersOn304() { function testHandleCacheHeadersOn304() {

View file

@ -0,0 +1,37 @@
<?php return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Test feed</title>
<link>http://example.com/</link>
<description>A basic feed for testing</description>
<item>
<guid>1</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate> <!-- This is the correct item and date -->
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
<item>
<guid>1</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
<item>
<guid>1</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
<item>
<guid>2</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
</channel>
</rss>
MESSAGE_BODY
];