mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 13:12:41 +00:00
Fixed more Feed bugs
- Two items with differing IDs but identical hashes were matching in the deduplicator; they shouldn't - They would also match in the database matcher, and shouldn't - The second-pass database matcher was overaggressively finding items as new
This commit is contained in:
parent
fd7d1c3192
commit
6771e8916a
3 changed files with 46 additions and 3 deletions
|
@ -139,6 +139,8 @@ class Feed {
|
|||
$out = [];
|
||||
foreach($items as $item) {
|
||||
foreach($out as $index => $check) {
|
||||
// if the two items both have IDs and they differ, they do not match, regardless of hashes
|
||||
if($item->id && $check->id && $item->id != $check->id) continue;
|
||||
// if the two items have the same ID or any one hash matches, they are two versions of the same item
|
||||
if(
|
||||
($item->id && $check->id && $item->id == $check->id) ||
|
||||
|
@ -185,6 +187,8 @@ class Feed {
|
|||
foreach($items as $index => $i) {
|
||||
$found = false;
|
||||
foreach($articles as $a) {
|
||||
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
|
||||
if($i->id && $i->id !== $a['guid']) continue;
|
||||
if(
|
||||
// the item matches if the GUID matches...
|
||||
($i->id && $i->id === $a['guid']) ||
|
||||
|
@ -228,6 +232,8 @@ class Feed {
|
|||
$i = $items[$index];
|
||||
$found = false;
|
||||
foreach($articles as $a) {
|
||||
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
|
||||
if($i->id && $i->id !== $a['guid']) continue;
|
||||
if(
|
||||
// the item matches if the GUID matches...
|
||||
($i->id && $i->id === $a['guid']) ||
|
||||
|
@ -252,9 +258,6 @@ class Feed {
|
|||
$found = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// if we don't have a match, add the item to the definite new list
|
||||
$new[] = $index;
|
||||
}
|
||||
}
|
||||
if(!$found) $new[] = $index;
|
||||
|
|
|
@ -27,6 +27,9 @@ class TestFeed extends \PHPUnit\Framework\TestCase {
|
|||
$f = new Feed(null, $this->base."Deduplication/ID-Dates");
|
||||
$this->assertCount(2, $f->newItems);
|
||||
$this->assertTime($t, $f->newItems[0]->updatedDate);
|
||||
$f = new Feed(null, $this->base."Deduplication/IdenticalHashes");
|
||||
$this->assertCount(2, $f->newItems);
|
||||
$this->assertTime($t, $f->newItems[0]->updatedDate);
|
||||
}
|
||||
|
||||
function testHandleCacheHeadersOn304() {
|
||||
|
|
37
tests/docroot/Feed/Deduplication/IdenticalHashes.php
Normal file
37
tests/docroot/Feed/Deduplication/IdenticalHashes.php
Normal file
|
@ -0,0 +1,37 @@
|
|||
<?php return [
|
||||
'mime' => "application/rss+xml",
|
||||
'content' => <<<MESSAGE_BODY
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>Test feed</title>
|
||||
<link>http://example.com/</link>
|
||||
<description>A basic feed for testing</description>
|
||||
|
||||
<item>
|
||||
<guid>1</guid>
|
||||
<description>Sample article 2</description>
|
||||
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate> <!-- This is the correct item and date -->
|
||||
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||
</item>
|
||||
<item>
|
||||
<guid>1</guid>
|
||||
<description>Sample article 2</description>
|
||||
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
|
||||
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||
</item>
|
||||
<item>
|
||||
<guid>1</guid>
|
||||
<description>Sample article 2</description>
|
||||
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
|
||||
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||
</item>
|
||||
<item>
|
||||
<guid>2</guid>
|
||||
<description>Sample article 2</description>
|
||||
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
|
||||
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
MESSAGE_BODY
|
||||
];
|
Loading…
Reference in a new issue