1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2025-01-18 17:10:33 +00:00
Arsse/lib/ImportExport/OPML.php

157 lines
7.1 KiB
PHP
Raw Normal View History

2019-03-28 21:53:04 -04:00
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
2021-04-14 11:17:01 -04:00
2019-03-28 21:53:04 -04:00
namespace JKingWeb\Arsse\ImportExport;
use JKingWeb\Arsse\Arsse;
use JKingWeb\Arsse\User\ExceptionConflict as UserException;
2019-03-28 21:53:04 -04:00
class OPML extends AbstractImportExport {
2019-07-05 14:58:05 -04:00
protected function parse(string $opml, bool $flat): array {
2019-04-19 18:01:31 -04:00
$d = new \DOMDocument;
if (!@$d->loadXML($opml)) {
// not a valid XML document
$err = libxml_get_last_error();
throw new Exception("invalidSyntax", ['line' => $err->line, 'column' => $err->column]);
2019-04-19 18:01:31 -04:00
}
2019-05-02 21:54:49 -04:00
$body = (new \DOMXPath($d))->query("/opml/body");
if ($body->length != 1) {
2019-04-19 18:01:31 -04:00
// not a valid OPML document
throw new Exception("invalidSemantics", ['type' => "OPML"]);
2019-04-19 18:01:31 -04:00
}
$body = $body->item(0);
2019-05-06 19:36:39 -04:00
// function to find the next node in the tree
$next = function(\DOMNode $node, bool $visitChildren = true) use ($body) {
if ($visitChildren && $node->hasChildNodes()) {
return $node->firstChild;
} elseif ($node->nextSibling) {
return $node->nextSibling;
} else {
while (!$node->nextSibling && !$node->isSameNode($body)) {
$node = $node->parentNode;
}
if (!$node->isSameNode($body)) {
return $node->nextSibling;
} else {
return null;
}
}
};
2019-04-19 18:01:31 -04:00
$folders = [];
$feeds = [];
2019-04-21 13:10:47 -04:00
// add the root folder to a map from folder DOM nodes to folder ID numbers
2019-04-19 18:01:31 -04:00
$folderMap = new \SplObjectStorage;
$folderMap[$body] = sizeof($folderMap);
2019-04-21 13:10:47 -04:00
// iterate through each node in the body
2019-04-19 18:01:31 -04:00
$node = $body->firstChild;
2019-05-06 19:36:39 -04:00
while ($node) {
2019-04-19 18:01:31 -04:00
if ($node->nodeType == \XML_ELEMENT_NODE && $node->nodeName === "outline") {
2019-04-21 13:10:47 -04:00
// process any nodes which are outlines
2019-04-19 18:01:31 -04:00
if ($node->getAttribute("type") === "rss") {
2019-04-21 13:10:47 -04:00
// feed nodes
2019-04-19 18:01:31 -04:00
$url = $node->getAttribute("xmlUrl");
$title = $node->getAttribute("text");
$folder = $folderMap[$node->parentNode] ?? 0;
$categories = $node->getAttribute("category");
if (strlen($categories)) {
// collapse and trim whitespace from category names, if any, splitting along commas
$categories = array_map(function($v) {
2019-05-05 20:29:44 -04:00
return trim(preg_replace("/\s+/", " ", $v));
}, explode(",", $categories));
// filter out any blank categories
$categories = array_filter($categories, function($v) {
return strlen($v);
});
} else {
$categories = [];
2019-04-19 18:01:31 -04:00
}
$feeds[] = ['url' => $url, 'title' => $title, 'folder' => $folder, 'tags' => $categories];
2019-04-21 13:10:47 -04:00
// skip any child nodes of a feed outline-entry
2019-04-19 18:01:31 -04:00
$node = $node->nextSibling ?: $node->parentNode;
} else {
2019-04-21 13:10:47 -04:00
// any outline entries which are not feeds are treated as folders
2019-04-19 18:01:31 -04:00
if (!$flat) {
2019-04-21 13:10:47 -04:00
// only process folders if we're not treating he file as flat
2019-04-19 18:01:31 -04:00
$id = sizeof($folderMap);
$folderMap[$node] = $id;
$folders[$id] = ['id' => $id, 'name' => $node->getAttribute("text"), 'parent' => $folderMap[$node->parentNode]];
}
2019-04-21 13:10:47 -04:00
// proceed to child nodes, if any
2019-05-06 19:36:39 -04:00
$node = $next($node);
2019-04-19 18:01:31 -04:00
}
} else {
2019-04-21 13:10:47 -04:00
// skip any node which is not an outline element; if the node has descendents they are skipped as well
2019-05-06 19:36:39 -04:00
$node = $next($node, false);
2019-04-19 18:01:31 -04:00
}
}
return [$feeds, $folders];
}
2019-03-28 21:53:04 -04:00
public function export(string $user, bool $flat = false): string {
if (!Arsse::$db->userExists($user)) {
2019-04-01 16:54:14 -04:00
throw new UserException("doesNotExist", ["action" => __FUNCTION__, "user" => $user]);
}
$tags = [];
2019-03-28 21:53:04 -04:00
$folders = [];
$parents = [0 => null];
// create a base document
2019-03-28 21:53:04 -04:00
$document = new \DOMDocument("1.0", "utf-8");
$document->formatOutput = true;
$document->appendChild($document->createElement("opml"));
$document->documentElement->setAttribute("version", "2.0");
$document->documentElement->appendChild($document->createElement("head"));
// create the "root folder" node (the body node, in OPML terms)
$folders[0] = $document->createElement("body");
// begin a transaction for read isolation
2019-03-28 21:53:04 -04:00
$transaction = Arsse::$db->begin();
// gather up the list of tags for each subscription
2019-03-28 21:53:04 -04:00
foreach (Arsse::$db->tagSummarize($user) as $r) {
$sub = $r['subscription'];
$tag = $r['name'];
// strip out any commas in the tag name; sadly this is lossy as OPML has no escape mechanism
2019-03-28 21:53:04 -04:00
$tag = str_replace(",", "", $tag);
if (!isset($tags[$sub])) {
$tags[$sub] = [];
}
$tags[$sub][] = $tag;
}
if (!$flat) {
// unless the output is requested flat, gather up the list of folders, using their database IDs as array indices
2019-03-28 21:53:04 -04:00
foreach (Arsse::$db->folderList($user) as $r) {
// note the index of its parent folder for later tree construction
2019-03-28 21:53:04 -04:00
$parents[$r['id']] = $r['parent'] ?? 0;
// create a DOM node for each folder; we don't insert it yet
2019-03-28 21:53:04 -04:00
$el = $document->createElement("outline");
$el->setAttribute("text", $r['name']);
$folders[$r['id']] = $el;
}
2019-03-30 10:01:12 -04:00
}
// insert each folder into its parent node; for the root folder the parent is the document root node
foreach ($folders as $id => $el) {
$parent = $folders[$parents[$id]] ?? $document->documentElement;
$parent->appendChild($el);
2019-03-28 21:53:04 -04:00
}
// create a DOM node for each subscription and insert them directly into their folder DOM node
2019-03-28 21:53:04 -04:00
foreach (Arsse::$db->subscriptionList($user) as $r) {
$el = $document->createElement(("outline"));
$el->setAttribute("type", "rss");
2019-03-30 10:01:12 -04:00
$el->setAttribute("text", $r['title']);
2019-03-28 21:53:04 -04:00
$el->setAttribute("xmlUrl", $r['url']);
// include the category attribute only if there are tags
2019-03-30 10:01:12 -04:00
if (isset($tags[$r['id']]) && sizeof($tags[$r['id']])) {
2019-03-28 21:53:04 -04:00
$el->setAttribute("category", implode(",", $tags[$r['id']]));
}
// if flat output was requested subscriptions are inserted into the root folder
2019-03-28 21:53:04 -04:00
($folders[$r['folder'] ?? 0] ?? $folders[0])->appendChild($el);
}
// release the transaction
2019-03-28 21:53:04 -04:00
$transaction->rollback();
// return the serialization
return $document->saveXML();
}
}