mirror of
https://code.mensbeam.com/MensBeam/Arsse.git
synced 2024-12-22 13:12:41 +00:00
Fixed more Feed bugs
- Two items with differing IDs but identical hashes were matching in the deduplicator; they shouldn't - They would also match in the database matcher, and shouldn't - The second-pass database matcher was overaggressively finding items as new
This commit is contained in:
parent
fd7d1c3192
commit
6771e8916a
3 changed files with 46 additions and 3 deletions
|
@ -139,6 +139,8 @@ class Feed {
|
||||||
$out = [];
|
$out = [];
|
||||||
foreach($items as $item) {
|
foreach($items as $item) {
|
||||||
foreach($out as $index => $check) {
|
foreach($out as $index => $check) {
|
||||||
|
// if the two items both have IDs and they differ, they do not match, regardless of hashes
|
||||||
|
if($item->id && $check->id && $item->id != $check->id) continue;
|
||||||
// if the two items have the same ID or any one hash matches, they are two versions of the same item
|
// if the two items have the same ID or any one hash matches, they are two versions of the same item
|
||||||
if(
|
if(
|
||||||
($item->id && $check->id && $item->id == $check->id) ||
|
($item->id && $check->id && $item->id == $check->id) ||
|
||||||
|
@ -185,6 +187,8 @@ class Feed {
|
||||||
foreach($items as $index => $i) {
|
foreach($items as $index => $i) {
|
||||||
$found = false;
|
$found = false;
|
||||||
foreach($articles as $a) {
|
foreach($articles as $a) {
|
||||||
|
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
|
||||||
|
if($i->id && $i->id !== $a['guid']) continue;
|
||||||
if(
|
if(
|
||||||
// the item matches if the GUID matches...
|
// the item matches if the GUID matches...
|
||||||
($i->id && $i->id === $a['guid']) ||
|
($i->id && $i->id === $a['guid']) ||
|
||||||
|
@ -228,6 +232,8 @@ class Feed {
|
||||||
$i = $items[$index];
|
$i = $items[$index];
|
||||||
$found = false;
|
$found = false;
|
||||||
foreach($articles as $a) {
|
foreach($articles as $a) {
|
||||||
|
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
|
||||||
|
if($i->id && $i->id !== $a['guid']) continue;
|
||||||
if(
|
if(
|
||||||
// the item matches if the GUID matches...
|
// the item matches if the GUID matches...
|
||||||
($i->id && $i->id === $a['guid']) ||
|
($i->id && $i->id === $a['guid']) ||
|
||||||
|
@ -252,9 +258,6 @@ class Feed {
|
||||||
$found = true;
|
$found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// if we don't have a match, add the item to the definite new list
|
|
||||||
$new[] = $index;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(!$found) $new[] = $index;
|
if(!$found) $new[] = $index;
|
||||||
|
|
|
@ -27,6 +27,9 @@ class TestFeed extends \PHPUnit\Framework\TestCase {
|
||||||
$f = new Feed(null, $this->base."Deduplication/ID-Dates");
|
$f = new Feed(null, $this->base."Deduplication/ID-Dates");
|
||||||
$this->assertCount(2, $f->newItems);
|
$this->assertCount(2, $f->newItems);
|
||||||
$this->assertTime($t, $f->newItems[0]->updatedDate);
|
$this->assertTime($t, $f->newItems[0]->updatedDate);
|
||||||
|
$f = new Feed(null, $this->base."Deduplication/IdenticalHashes");
|
||||||
|
$this->assertCount(2, $f->newItems);
|
||||||
|
$this->assertTime($t, $f->newItems[0]->updatedDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testHandleCacheHeadersOn304() {
|
function testHandleCacheHeadersOn304() {
|
||||||
|
|
37
tests/docroot/Feed/Deduplication/IdenticalHashes.php
Normal file
37
tests/docroot/Feed/Deduplication/IdenticalHashes.php
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
<?php return [
|
||||||
|
'mime' => "application/rss+xml",
|
||||||
|
'content' => <<<MESSAGE_BODY
|
||||||
|
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||||
|
<channel>
|
||||||
|
<title>Test feed</title>
|
||||||
|
<link>http://example.com/</link>
|
||||||
|
<description>A basic feed for testing</description>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<guid>1</guid>
|
||||||
|
<description>Sample article 2</description>
|
||||||
|
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate> <!-- This is the correct item and date -->
|
||||||
|
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<guid>1</guid>
|
||||||
|
<description>Sample article 2</description>
|
||||||
|
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
|
||||||
|
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<guid>1</guid>
|
||||||
|
<description>Sample article 2</description>
|
||||||
|
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
|
||||||
|
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<guid>2</guid>
|
||||||
|
<description>Sample article 2</description>
|
||||||
|
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
|
||||||
|
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
MESSAGE_BODY
|
||||||
|
];
|
Loading…
Reference in a new issue