2019-03-28 21:53:04 -04:00
|
|
|
<?php
|
|
|
|
/** @license MIT
|
|
|
|
* Copyright 2017 J. King, Dustin Wilson et al.
|
|
|
|
* See LICENSE and AUTHORS files for details */
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace JKingWeb\Arsse\ImportExport;
|
|
|
|
|
|
|
|
use JKingWeb\Arsse\Arsse;
|
2019-04-27 19:50:03 -04:00
|
|
|
use JKingWeb\Arsse\Database;
|
|
|
|
use JKingWeb\Arsse\Db\ExceptionInput as InputException;
|
2019-04-01 16:54:14 -04:00
|
|
|
use JKingWeb\Arsse\User\Exception as UserException;
|
2019-03-28 21:53:04 -04:00
|
|
|
|
|
|
|
class OPML {
|
2019-04-19 18:01:31 -04:00
|
|
|
public function import(string $user, string $opml, bool $flat = false, bool $replace = false): bool {
|
2019-04-27 19:50:03 -04:00
|
|
|
// first extract useful information from the input
|
2019-04-21 13:10:47 -04:00
|
|
|
list($feeds, $folders) = $this->parse($opml, $flat);
|
2019-04-27 19:50:03 -04:00
|
|
|
$folderMap = [];
|
|
|
|
foreach ($folders as $f) {
|
|
|
|
// check to make sure folder names are all valid
|
|
|
|
if (!strlen(trim($f['name']))) {
|
2019-05-01 10:46:44 -04:00
|
|
|
throw new Exception("invalidFolderName");
|
2019-04-27 19:50:03 -04:00
|
|
|
}
|
|
|
|
// check for duplicates
|
|
|
|
if (!isset($folderMap[$f['parent']])) {
|
|
|
|
$folderMap[$f['parent']] = [];
|
|
|
|
}
|
|
|
|
if (isset($folderMap[$f['parent']][$f['name']])) {
|
2019-05-01 10:46:44 -04:00
|
|
|
throw new Exception("invalidFolderCopy");
|
2019-04-27 19:50:03 -04:00
|
|
|
} else {
|
|
|
|
$folderMap[$f['parent']][$f['name']] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// get feed IDs for each URL, adding feeds where necessary
|
|
|
|
foreach ($feeds as $k => $f) {
|
|
|
|
$feeds[$k]['id'] = Arsse::$db->feedAdd(($f['url']));
|
|
|
|
}
|
|
|
|
// start a transaction for atomic rollback
|
|
|
|
$tr = Arsse::$db->begin();
|
|
|
|
// get current state of database
|
|
|
|
$foldersDb = iterator_to_array(Arsse::$db->folderList(Arsse::$user->id));
|
|
|
|
$feedsDb = iterator_to_array(Arsse::$db->subscriptionList(Arsse::$user->id));
|
|
|
|
$tagsDb = iterator_to_array(Arsse::$db->tagList(Arsse::$user->id));
|
|
|
|
// reconcile folders
|
|
|
|
$folderMap = [0 => 0];
|
|
|
|
foreach ($folders as $id => $f) {
|
|
|
|
$parent = $folderMap[$f['parent']];
|
|
|
|
// find a match for the import folder in the existing folders
|
|
|
|
foreach ($foldersDb as $db) {
|
|
|
|
if ((int) $db['parent'] == $parent && $db['name'] === $f['name']) {
|
|
|
|
$folderMap[$id] = (int) $db['id'];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!isset($folderMap[$id])) {
|
|
|
|
// if no existing folder exists, add one
|
|
|
|
$folderMap[$id] = Arsse::$db->folderAdd(Arsse::$user->id, ['name' => $f['name'], 'parent' -> $parent]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// process newsfeed subscriptions
|
|
|
|
$feedMap = [];
|
|
|
|
$tagMap = [];
|
|
|
|
foreach ($feeds as $f) {
|
|
|
|
$folder = $folderMap[$f['folder']];
|
|
|
|
$title = strlen(trim($f['title'])) ? $f['title'] : null;
|
|
|
|
$found = false;
|
|
|
|
// find a match for the import feed is existing subscriptions
|
|
|
|
foreach ($feedsDb as $db) {
|
|
|
|
if ((int) $db['feed'] == $f['id']) {
|
|
|
|
$found = true;
|
|
|
|
$feedMap[$f['id']] = (int) $db['id'];
|
2019-05-01 22:52:20 -04:00
|
|
|
break;
|
2019-04-27 19:50:03 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!$found) {
|
|
|
|
// if no subscription exists, add one
|
|
|
|
$feedMap[$f['id']] = Arsse::$db->subscriptionAdd(Arsse::$user->id, $f['url']);
|
|
|
|
}
|
|
|
|
if (!$found || $replace) {
|
|
|
|
// set the subscription's properties, if this is a new feed or we're doing a full replacement
|
|
|
|
Arsse::$db->subscriptionPropertiesSet(Arsse::$user->id, $feedMap[$f['id']], ['title' => $title, 'folder' => $folder]);
|
|
|
|
// compile the set of used tags, if this is a new feed or we're doing a full replacement
|
|
|
|
foreach ($f['tags'] as $t) {
|
|
|
|
if (!strlen(trim($t))) {
|
|
|
|
// ignore any blank tags
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!isset($tagMap[$t])) {
|
|
|
|
// populate the tag map
|
|
|
|
$tagMap[$t] = [];
|
|
|
|
}
|
|
|
|
$tagMap[$t][] = $f['id'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// set tags
|
|
|
|
$mode = $replace ? Database::ASSOC_REPLACE : Database::ASSOC_ADD;
|
|
|
|
foreach ($tagMap as $tag => $subs) {
|
|
|
|
// make sure the tag exists
|
|
|
|
$found = false;
|
|
|
|
foreach ($tagsDb as $db) {
|
|
|
|
if ($tag === $db['name']) {
|
|
|
|
$found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!$found) {
|
|
|
|
// add the tag if it wasn't found
|
|
|
|
Arsse::$db->tagAdd(Arsse::$user->id, ['name' => $tag]);
|
|
|
|
}
|
|
|
|
Arsse::$db->tagSubscriptionsSet(Arsse::$user->id, $tag, $subs, $mode, true);
|
|
|
|
}
|
|
|
|
// finally, if we're performing a replacement, delete any subscriptions, folders, or tags which were not present in the import
|
|
|
|
if ($replace) {
|
|
|
|
foreach (array_diff(array_column($feedsDb, "id"), $feedMap) as $id) {
|
|
|
|
try {
|
|
|
|
Arsse::$db->subscriptionRemove(Arsse::$user->id, $id);
|
|
|
|
} catch (InputException $e) {
|
|
|
|
// ignore errors
|
|
|
|
}
|
|
|
|
}
|
|
|
|
foreach (array_diff(array_column($foldersDb, "id"), $folderMap) as $id) {
|
|
|
|
try {
|
|
|
|
Arsse::$db->folderRemove(Arsse::$user->id, $id);
|
|
|
|
} catch (InputException $e) {
|
|
|
|
// ignore errors
|
|
|
|
}
|
|
|
|
}
|
|
|
|
foreach (array_diff(array_column($tagsDb, "name"), array_keys($tagMap)) as $id) {
|
|
|
|
try {
|
|
|
|
Arsse::$db->tagRemove(Arsse::$user->id, $id, true);
|
|
|
|
} catch (InputException $e) {
|
|
|
|
// ignore errors
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$tr->commit();
|
2019-04-19 18:01:31 -04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-05-01 22:52:20 -04:00
|
|
|
public function parse(string $opml, bool $flat): array {
|
2019-04-19 18:01:31 -04:00
|
|
|
$d = new \DOMDocument;
|
|
|
|
if (!@$d->loadXML($opml)) {
|
|
|
|
// not a valid XML document
|
2019-05-01 10:46:44 -04:00
|
|
|
$err = libxml_get_last_error();
|
|
|
|
throw new Exception("invalidSyntax", ['line' => $err->line, 'column' => $err->column]);
|
2019-04-19 18:01:31 -04:00
|
|
|
}
|
2019-05-02 21:54:49 -04:00
|
|
|
$body = (new \DOMXPath($d))->query("/opml/body");
|
|
|
|
if ($body->length != 1) {
|
2019-04-19 18:01:31 -04:00
|
|
|
// not a valid OPML document
|
2019-05-01 10:46:44 -04:00
|
|
|
throw new Exception("invalidSemantics", ['type' => "OPML"]);
|
2019-04-19 18:01:31 -04:00
|
|
|
}
|
|
|
|
$body = $body->item(0);
|
2019-05-06 19:36:39 -04:00
|
|
|
// function to find the next node in the tree
|
|
|
|
$next = function(\DOMNode $node, bool $visitChildren = true) use ($body) {
|
|
|
|
if ($visitChildren && $node->hasChildNodes()) {
|
|
|
|
return $node->firstChild;
|
|
|
|
} elseif ($node->nextSibling) {
|
|
|
|
return $node->nextSibling;
|
|
|
|
} else {
|
|
|
|
while (!$node->nextSibling && !$node->isSameNode($body)) {
|
|
|
|
$node = $node->parentNode;
|
|
|
|
}
|
|
|
|
if (!$node->isSameNode($body)) {
|
|
|
|
return $node->nextSibling;
|
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
2019-04-19 18:01:31 -04:00
|
|
|
$folders = [];
|
|
|
|
$feeds = [];
|
2019-04-21 13:10:47 -04:00
|
|
|
// add the root folder to a map from folder DOM nodes to folder ID numbers
|
2019-04-19 18:01:31 -04:00
|
|
|
$folderMap = new \SplObjectStorage;
|
|
|
|
$folderMap[$body] = sizeof($folderMap);
|
2019-04-21 13:10:47 -04:00
|
|
|
// iterate through each node in the body
|
2019-04-19 18:01:31 -04:00
|
|
|
$node = $body->firstChild;
|
2019-05-06 19:36:39 -04:00
|
|
|
while ($node) {
|
2019-04-19 18:01:31 -04:00
|
|
|
if ($node->nodeType == \XML_ELEMENT_NODE && $node->nodeName === "outline") {
|
2019-04-21 13:10:47 -04:00
|
|
|
// process any nodes which are outlines
|
2019-04-19 18:01:31 -04:00
|
|
|
if ($node->getAttribute("type") === "rss") {
|
2019-04-21 13:10:47 -04:00
|
|
|
// feed nodes
|
2019-04-19 18:01:31 -04:00
|
|
|
$url = $node->getAttribute("xmlUrl");
|
2019-04-27 19:50:03 -04:00
|
|
|
$title = $node->getAttribute("text");
|
|
|
|
$folder = $folderMap[$node->parentNode] ?? 0;
|
|
|
|
$categories = $node->getAttribute("category");
|
|
|
|
if (strlen($categories)) {
|
|
|
|
// collapse and trim whitespace from category names, if any, splitting along commas
|
|
|
|
$categories = array_map(function($v) {
|
2019-05-05 20:29:44 -04:00
|
|
|
return trim(preg_replace("/\s+/", " ", $v));
|
2019-04-27 19:50:03 -04:00
|
|
|
}, explode(",", $categories));
|
|
|
|
} else {
|
|
|
|
$categories = [];
|
2019-04-19 18:01:31 -04:00
|
|
|
}
|
2019-04-27 19:50:03 -04:00
|
|
|
$feeds[] = ['url' => $url, 'title' => $title, 'folder' => $folder, 'tags' => $categories];
|
2019-04-21 13:10:47 -04:00
|
|
|
// skip any child nodes of a feed outline-entry
|
2019-04-19 18:01:31 -04:00
|
|
|
$node = $node->nextSibling ?: $node->parentNode;
|
|
|
|
} else {
|
2019-04-21 13:10:47 -04:00
|
|
|
// any outline entries which are not feeds are treated as folders
|
2019-04-19 18:01:31 -04:00
|
|
|
if (!$flat) {
|
2019-04-21 13:10:47 -04:00
|
|
|
// only process folders if we're not treating he file as flat
|
2019-04-19 18:01:31 -04:00
|
|
|
$id = sizeof($folderMap);
|
|
|
|
$folderMap[$node] = $id;
|
|
|
|
$folders[$id] = ['id' => $id, 'name' => $node->getAttribute("text"), 'parent' => $folderMap[$node->parentNode]];
|
|
|
|
}
|
2019-04-21 13:10:47 -04:00
|
|
|
// proceed to child nodes, if any
|
2019-05-06 19:36:39 -04:00
|
|
|
$node = $next($node);
|
2019-04-19 18:01:31 -04:00
|
|
|
}
|
|
|
|
} else {
|
2019-04-21 13:10:47 -04:00
|
|
|
// skip any node which is not an outline element; if the node has descendents they are skipped as well
|
2019-05-06 19:36:39 -04:00
|
|
|
$node = $next($node, false);
|
2019-04-19 18:01:31 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return [$feeds, $folders];
|
|
|
|
}
|
|
|
|
|
2019-03-28 21:53:04 -04:00
|
|
|
public function export(string $user, bool $flat = false): string {
|
2019-04-01 16:54:14 -04:00
|
|
|
if (!Arsse::$user->exists($user)) {
|
|
|
|
throw new UserException("doesNotExist", ["action" => __FUNCTION__, "user" => $user]);
|
|
|
|
}
|
2019-03-29 09:02:39 -04:00
|
|
|
$tags = [];
|
2019-03-28 21:53:04 -04:00
|
|
|
$folders = [];
|
|
|
|
$parents = [0 => null];
|
2019-03-29 09:02:39 -04:00
|
|
|
// create a base document
|
2019-03-28 21:53:04 -04:00
|
|
|
$document = new \DOMDocument("1.0", "utf-8");
|
|
|
|
$document->formatOutput = true;
|
|
|
|
$document->appendChild($document->createElement("opml"));
|
|
|
|
$document->documentElement->setAttribute("version", "2.0");
|
|
|
|
$document->documentElement->appendChild($document->createElement("head"));
|
|
|
|
// create the "root folder" node (the body node, in OPML terms)
|
|
|
|
$folders[0] = $document->createElement("body");
|
2019-03-29 09:02:39 -04:00
|
|
|
// begin a transaction for read isolation
|
2019-03-28 21:53:04 -04:00
|
|
|
$transaction = Arsse::$db->begin();
|
2019-03-29 09:02:39 -04:00
|
|
|
// gather up the list of tags for each subscription
|
2019-03-28 21:53:04 -04:00
|
|
|
foreach (Arsse::$db->tagSummarize($user) as $r) {
|
|
|
|
$sub = $r['subscription'];
|
|
|
|
$tag = $r['name'];
|
2019-03-29 09:02:39 -04:00
|
|
|
// strip out any commas in the tag name; sadly this is lossy as OPML has no escape mechanism
|
2019-03-28 21:53:04 -04:00
|
|
|
$tag = str_replace(",", "", $tag);
|
|
|
|
if (!isset($tags[$sub])) {
|
|
|
|
$tags[$sub] = [];
|
|
|
|
}
|
|
|
|
$tags[$sub][] = $tag;
|
|
|
|
}
|
|
|
|
if (!$flat) {
|
2019-03-29 09:02:39 -04:00
|
|
|
// unless the output is requested flat, gather up the list of folders, using their database IDs as array indices
|
2019-03-28 21:53:04 -04:00
|
|
|
foreach (Arsse::$db->folderList($user) as $r) {
|
2019-03-29 09:02:39 -04:00
|
|
|
// note the index of its parent folder for later tree construction
|
2019-03-28 21:53:04 -04:00
|
|
|
$parents[$r['id']] = $r['parent'] ?? 0;
|
2019-03-29 09:02:39 -04:00
|
|
|
// create a DOM node for each folder; we don't insert it yet
|
2019-03-28 21:53:04 -04:00
|
|
|
$el = $document->createElement("outline");
|
|
|
|
$el->setAttribute("text", $r['name']);
|
|
|
|
$folders[$r['id']] = $el;
|
|
|
|
}
|
2019-03-30 10:01:12 -04:00
|
|
|
}
|
|
|
|
// insert each folder into its parent node; for the root folder the parent is the document root node
|
|
|
|
foreach ($folders as $id => $el) {
|
|
|
|
$parent = $folders[$parents[$id]] ?? $document->documentElement;
|
|
|
|
$parent->appendChild($el);
|
2019-03-28 21:53:04 -04:00
|
|
|
}
|
2019-03-29 09:02:39 -04:00
|
|
|
// create a DOM node for each subscription and insert them directly into their folder DOM node
|
2019-03-28 21:53:04 -04:00
|
|
|
foreach (Arsse::$db->subscriptionList($user) as $r) {
|
|
|
|
$el = $document->createElement(("outline"));
|
|
|
|
$el->setAttribute("type", "rss");
|
2019-03-30 10:01:12 -04:00
|
|
|
$el->setAttribute("text", $r['title']);
|
2019-03-28 21:53:04 -04:00
|
|
|
$el->setAttribute("xmlUrl", $r['url']);
|
2019-03-29 09:02:39 -04:00
|
|
|
// include the category attribute only if there are tags
|
2019-03-30 10:01:12 -04:00
|
|
|
if (isset($tags[$r['id']]) && sizeof($tags[$r['id']])) {
|
2019-03-28 21:53:04 -04:00
|
|
|
$el->setAttribute("category", implode(",", $tags[$r['id']]));
|
|
|
|
}
|
2019-03-29 09:02:39 -04:00
|
|
|
// if flat output was requested subscriptions are inserted into the root folder
|
2019-03-28 21:53:04 -04:00
|
|
|
($folders[$r['folder'] ?? 0] ?? $folders[0])->appendChild($el);
|
|
|
|
}
|
2019-03-29 09:02:39 -04:00
|
|
|
// release the transaction
|
2019-03-28 21:53:04 -04:00
|
|
|
$transaction->rollback();
|
|
|
|
// return the serialization
|
|
|
|
return $document->saveXML();
|
|
|
|
}
|
2019-04-01 16:54:14 -04:00
|
|
|
|
|
|
|
public function exportFile(string $file, string $user, bool $flat = false): bool {
|
|
|
|
$data = $this->export($user, $flat);
|
|
|
|
if (!@file_put_contents($file, $data)) {
|
|
|
|
// if it fails throw an exception
|
|
|
|
$err = file_exists($file) ? "fileUnwritable" : "fileUncreatable";
|
|
|
|
throw new Exception($err, ['file' => $file, 'format' => str_replace(__NAMESPACE__."\\", "", __CLASS__)]);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2019-05-01 10:46:44 -04:00
|
|
|
|
|
|
|
public function imortFile(string $file, string $user, bool $flat = false, bool $replace): bool {
|
|
|
|
$data = @file_get_contents($file);
|
|
|
|
if ($data === false) {
|
|
|
|
// if it fails throw an exception
|
|
|
|
$err = file_exists($file) ? "fileUnreadable" : "fileMissing";
|
|
|
|
throw new Exception($err, ['file' => $file, 'format' => str_replace(__NAMESPACE__."\\", "", __CLASS__)]);
|
|
|
|
}
|
|
|
|
return $this->import($user, $data, $flat, $replace);
|
|
|
|
}
|
2019-03-28 21:53:04 -04:00
|
|
|
}
|