1
1
Fork 0
mirror of https://code.mensbeam.com/MensBeam/Arsse.git synced 2024-12-22 21:22:40 +00:00

Work around limit to SQL parameter placeholders for IN() clauses

Improves #150

LIKE-based matches also need to be similarly conservative
This commit is contained in:
J. King 2019-03-01 22:36:25 -05:00
parent 6857e8ec1b
commit 21fdd66d37
8 changed files with 155 additions and 178 deletions

View file

@ -38,8 +38,10 @@ use JKingWeb\Arsse\Misc\ValueInfo;
class Database {
/** The version number of the latest schema the interface is aware of */
const SCHEMA_VERSION = 4;
/** The maximum number of articles to mark in one query without chunking */
const LIMIT_ARTICLES = 50;
/** The size of a set of values beyond which the set will be embedded into the query text */
const LIMIT_SET_SIZE = 25;
/** The length of a string in an embedded set beyond which a parameter placeholder will be used for the string */
const LIMIT_SET_STRING_LENGTH = 200;
/** A map database driver short-names and their associated class names */
const DRIVER_NAMES = [
'sqlite3' => \JKingWeb\Arsse\Db\SQLite3\Driver::class,
@ -126,29 +128,50 @@ class Database {
return $out;
}
/** Conputes the contents of an SQL "IN()" clause, producing one parameter placeholder for each input value
/** Computes the contents of an SQL "IN()" clause, for each input value either embedding the value or producing a parameter placeholder
*
* Returns an indexed array containing the clause text, an array of types, and the array of values
* Returns an indexed array containing the clause text, an array of types, and an array of values. Note that the array of output values may not match the array of input values
*
* @param array $values Arbitrary values
* @param string $type A single data type applied to each value
*/
protected function generateIn(array $values, string $type): array {
$out = [
"", // query clause
[], // binding types
$values, // binding values
];
if (sizeof($values)) {
// the query clause is just a series of question marks separated by commas
$out[0] = implode(",", array_fill(0, sizeof($values), "?"));
// the binding types are just a repetition of the supplied type
$out[1] = array_fill(0, sizeof($values), $type);
} else {
if (!sizeof($values)) {
// if the set is empty, some databases require an explicit null
$out[0] = "null";
return ["null", [], []];
}
$t = (Statement::TYPES[$type] ?? 0) % Statement::T_NOT_NULL;
if (sizeof($values) > self::LIMIT_SET_SIZE && ($t == Statement::T_INTEGER || $t == Statement::T_STRING)) {
$clause = [];
$params = [];
$count = 0;
$convType = Db\AbstractStatement::TYPE_NORM_MAP[Statement::TYPES[$type]];
foreach($values as $v) {
$v = ValueInfo::normalize($v, $convType, null, "sql");
if (is_null($v)) {
// nulls are pointless to have
continue;
} elseif (is_string($v)) {
if (strlen($v) > self::LIMIT_SET_STRING_LENGTH) {
$clause[] = "?";
$params[] = $v;
} else {
$clause[] = $this->db->literalString($v);
}
} else {
$clause[] = ValueInfo::normalize($v, ValueInfo::T_STRING, null, "sql");
}
$count++;
}
if (!$count) {
// the set is actually empty
return ["null", [], []];
} else {
return [implode(",", $clause), array_fill(0, sizeof($params), $type), $params];
}
} else {
return [implode(",", array_fill(0, sizeof($values), "?")), array_fill(0, sizeof($values), $type), $values];
}
return $out;
}
/** Computes basic LIKE-based text search constraints for use in a WHERE clause
@ -1074,10 +1097,10 @@ class Database {
*/
public function feedMatchIds(int $feedID, array $ids = [], array $hashesUT = [], array $hashesUC = [], array $hashesTC = []): Db\Result {
// compile SQL IN() clauses and necessary type bindings for the four identifier lists
list($cId, $tId) = $this->generateIn($ids, "str");
list($cHashUT, $tHashUT) = $this->generateIn($hashesUT, "str");
list($cHashUC, $tHashUC) = $this->generateIn($hashesUC, "str");
list($cHashTC, $tHashTC) = $this->generateIn($hashesTC, "str");
list($cId, $tId, $vId) = $this->generateIn($ids, "str");
list($cHashUT, $tHashUT, $vHashUT) = $this->generateIn($hashesUT, "str");
list($cHashUC, $tHashUC, $vHashUC) = $this->generateIn($hashesUC, "str");
list($cHashTC, $tHashTC, $vHashTC) = $this->generateIn($hashesTC, "str");
// perform the query
return $articles = $this->db->prepare(
"SELECT id, edited, guid, url_title_hash, url_content_hash, title_content_hash FROM arsse_articles WHERE feed = ? and (guid in($cId) or url_title_hash in($cHashUT) or url_content_hash in($cHashUC) or title_content_hash in($cHashTC))",
@ -1086,7 +1109,7 @@ class Database {
$tHashUT,
$tHashUC,
$tHashTC
)->run($feedID, $ids, $hashesUT, $hashesUC, $hashesTC);
)->run($feedID, $vId, $vHashUT, $vHashUC, $vHashTC);
}
/** Computes an SQL query to find and retrieve data about articles in the database
@ -1180,25 +1203,25 @@ class Database {
$q->setLimit($context->limit, $context->offset);
// handle the simple context options
$options = [
// each context array consists of a column identifier (see $colDefs above), a comparison operator, a data type, an option to pair with for BETWEEN evaluation, and an upper bound if the value is an array
"edition" => ["edition", "=", "int", "", 1],
"editions" => ["edition", "in", "int", "", self::LIMIT_ARTICLES],
"article" => ["id", "=", "int", "", 1],
"articles" => ["id", "in", "int", "", self::LIMIT_ARTICLES],
"oldestArticle" => ["id", ">=", "int", "latestArticle", 1],
"latestArticle" => ["id", "<=", "int", "oldestArticle", 1],
"oldestEdition" => ["edition", ">=", "int", "latestEdition", 1],
"latestEdition" => ["edition", "<=", "int", "oldestEdition", 1],
"modifiedSince" => ["modified_date", ">=", "datetime", "notModifiedSince", 1],
"notModifiedSince" => ["modified_date", "<=", "datetime", "modifiedSince", 1],
"markedSince" => ["marked_date", ">=", "datetime", "notMarkedSince", 1],
"notMarkedSince" => ["marked_date", "<=", "datetime", "markedSince", 1],
"folderShallow" => ["folder", "=", "int", "", 1],
"subscription" => ["subscription", "=", "int", "", 1],
"unread" => ["unread", "=", "bool", "", 1],
"starred" => ["starred", "=", "bool", "", 1],
// each context array consists of a column identifier (see $colDefs above), a comparison operator, a data type, and an option to pair with for BETWEEN evaluation
"edition" => ["edition", "=", "int", ""],
"editions" => ["edition", "in", "int", ""],
"article" => ["id", "=", "int", ""],
"articles" => ["id", "in", "int", ""],
"oldestArticle" => ["id", ">=", "int", "latestArticle"],
"latestArticle" => ["id", "<=", "int", "oldestArticle"],
"oldestEdition" => ["edition", ">=", "int", "latestEdition"],
"latestEdition" => ["edition", "<=", "int", "oldestEdition"],
"modifiedSince" => ["modified_date", ">=", "datetime", "notModifiedSince"],
"notModifiedSince" => ["modified_date", "<=", "datetime", "modifiedSince"],
"markedSince" => ["marked_date", ">=", "datetime", "notMarkedSince"],
"notMarkedSince" => ["marked_date", "<=", "datetime", "markedSince"],
"folderShallow" => ["folder", "=", "int", ""],
"subscription" => ["subscription", "=", "int", ""],
"unread" => ["unread", "=", "bool", ""],
"starred" => ["starred", "=", "bool", ""],
];
foreach ($options as $m => list($col, $op, $type, $pair, $max)) {
foreach ($options as $m => list($col, $op, $type, $pair)) {
if (!$context->$m()) {
// context is not being used
continue;
@ -1206,8 +1229,6 @@ class Database {
// context option is an array of values
if (!$context->$m) {
throw new Db\ExceptionInput("tooShort", ['field' => $m, 'action' => $this->caller(), 'min' => 1]); // must have at least one array element
} elseif (sizeof($context->$m) > $max) {
throw new Db\ExceptionInput("tooLong", ['field' => $m, 'action' => $this->caller(), 'max' => $max]); // @codeCoverageIgnore
}
list($clause, $types, $values) = $this->generateIn($context->$m, $type);
$q->setWhere("{$colDefs[$col]} $op ($clause)", $types, $values);
@ -1224,7 +1245,7 @@ class Database {
}
}
// further handle exclusionary options if specified
foreach ($options as $m => list($col, $op, $type, $pair, $max)) {
foreach ($options as $m => list($col, $op, $type, $pair)) {
if (!method_exists($context->not, $m) || !$context->not->$m()) {
// context option is not being used
continue;
@ -1232,8 +1253,6 @@ class Database {
if (!$context->not->$m) {
// for exclusions we don't care if the array is empty
continue;
} elseif (sizeof($context->not->$m) > $max) {
throw new Db\ExceptionInput("tooLong", ['field' => "$m (not)", 'action' => $this->caller(), 'max' => $max]);
}
list($clause, $types, $values) = $this->generateIn($context->not->$m, $type);
$q->setWhereNot("{$colDefs[$col]} $op ($clause)", $types, $values);
@ -1315,35 +1334,10 @@ class Database {
$q->setWhereNot(...$this->generateSearch($context->not->$m, $cols, true));
}
// return the query
//var_export((string) $q);
return $q;
}
/** Chunk a context with more than the maximum number of articles or editions into an array of contexts */
protected function contextChunk(Context $context): array {
$exception = "";
if ($context->editions()) {
// editions take precedence over articles
if (sizeof($context->editions) > self::LIMIT_ARTICLES) {
$exception = "editions";
}
} elseif ($context->articles()) {
if (sizeof($context->articles) > self::LIMIT_ARTICLES) {
$exception = "articles";
}
}
if ($exception) {
$out = [];
$list = array_chunk($context->$exception, self::LIMIT_ARTICLES);
foreach ($list as $chunk) {
$out[] = (clone $context)->$exception($chunk);
}
return $out;
} else {
return [];
}
}
/** Lists articles in the database which match a given query context
*
* If an empty column list is supplied, a count of articles is returned instead
@ -1357,23 +1351,12 @@ class Database {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
$context = $context ?? new Context;
// if the context has more articles or editions than we can process in one query, perform a series of queries and return an aggregate result
if ($contexts = $this->contextChunk($context)) {
$out = [];
$tr = $this->begin();
foreach ($contexts as $context) {
$out[] = $this->articleList($user, $context, $fields);
}
$tr->commit();
return new Db\ResultAggregate(...$out);
} else {
$q = $this->articleQuery($user, $context, $fields);
$q->setOrder("arsse_articles.edited".($context->reverse ? " desc" : ""));
$q->setOrder("latest_editions.edition".($context->reverse ? " desc" : ""));
// perform the query and return results
return $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues());
}
}
/** Returns a count of articles which match the given query context
*
@ -1385,20 +1368,9 @@ class Database {
throw new User\ExceptionAuthz("notAuthorized", ["action" => __FUNCTION__, "user" => $user]);
}
$context = $context ?? new Context;
// if the context has more articles or editions than we can process in one query, perform a series of queries and return an aggregate result
if ($contexts = $this->contextChunk($context)) {
$out = 0;
$tr = $this->begin();
foreach ($contexts as $context) {
$out += $this->articleCount($user, $context);
}
$tr->commit();
return $out;
} else {
$q = $this->articleQuery($user, $context, []);
return (int) $this->db->prepare($q->getQuery(), $q->getTypes())->run($q->getValues())->getValue();
}
}
/** Applies one or multiple modifications to all articles matching the given query context
*
@ -1425,16 +1397,6 @@ class Database {
return 0;
}
$context = $context ?? new Context;
// if the context has more articles or editions than we can process in one query, perform a series of queries and return an aggregate result
if ($contexts = $this->contextChunk($context)) {
$out = 0;
$tr = $this->begin();
foreach ($contexts as $context) {
$out += $this->articleMark($user, $data, $context);
}
$tr->commit();
return $out;
} else {
$tr = $this->begin();
$out = 0;
if ($data['read'] || $data['starred'] || strlen($data['note'] ?? "")) {
@ -1499,7 +1461,6 @@ class Database {
$tr->commit();
return $out;
}
}
/** Returns statistics about the articles starred by the given user
*
@ -1685,21 +1646,10 @@ class Database {
public function editionArticle(int ...$edition): array {
$out = [];
$context = (new Context)->editions($edition);
// if the context has more articles or editions than we can process in one query, perform a series of queries and return an aggregate result
if ($contexts = $this->contextChunk($context)) {
$articles = $editions = [];
foreach ($contexts as $context) {
$out = $this->editionArticle(...$context->editions);
$editions = array_merge($editions, array_map("intval", array_keys($out)));
$articles = array_merge($articles, array_map("intval", array_values($out)));
}
return array_combine($editions, $articles);
} else {
list($in, $inTypes) = $this->generateIn($context->editions, "int");
$out = $this->db->prepare("SELECT id as edition, article from arsse_editions where id in($in)", $inTypes)->run($context->editions)->getAll();
list($in, $inTypes, $inValues) = $this->generateIn($context->editions, "int");
$out = $this->db->prepare("SELECT id as edition, article from arsse_editions where id in($in)", $inTypes)->run($inValues)->getAll();
return $out ? array_combine(array_column($out, "edition"), array_column($out, "article")) : [];
}
}
/** Creates a label, and returns its numeric identifier
*

View file

@ -76,4 +76,10 @@ interface Driver {
* - "like": the case-insensitive LIKE operator
*/
public function sqlToken(string $token): string;
/** Returns a string literal which is properly escaped to guard against SQL injections. Delimiters are included in the output string
*
* This functionality should be avoided in favour of using statement parameters whenever possible
*/
public function literalString(string $str): string;
}

View file

@ -212,4 +212,8 @@ class Driver extends \JKingWeb\Arsse\Db\AbstractDriver {
public function prepareArray(string $query, array $paramTypes): \JKingWeb\Arsse\Db\Statement {
return new Statement($this->db, $query, $paramTypes, $this->packetSize);
}
public function literalString(string $str): string {
return "'".$this->db->real_escape_string($str)."'";
}
}

View file

@ -28,4 +28,8 @@ trait PDODriver {
}
return new PDOResult($this->db, $r);
}
public function literalString(string $str): string {
return $this->db->quote($str);
}
}

View file

@ -221,4 +221,8 @@ class Driver extends \JKingWeb\Arsse\Db\AbstractDriver {
public function prepareArray(string $query, array $paramTypes): \JKingWeb\Arsse\Db\Statement {
return new Statement($this->db, $query, $paramTypes);
}
public function literalString(string $str): string {
return pg_escape_literal($this->db, $str);
}
}

View file

@ -179,4 +179,8 @@ class Driver extends \JKingWeb\Arsse\Db\AbstractDriver {
$this->exec((!$rollback) ? "COMMIT" : "ROLLBACK");
return true;
}
public function literalString(string $str): string {
return "'".\SQLite3::escapeString($str)."'";
}
}

View file

@ -432,7 +432,7 @@ trait SeriesArticle {
"Multiple unstarred articles" => [(new Context)->articles([1,2,3])->starred(false), [2,3]],
"Multiple articles" => [(new Context)->articles([1,20,50]), [1,20]],
"Multiple editions" => [(new Context)->editions([1,1001,50]), [1,20]],
"150 articles" => [(new Context)->articles(range(1, Database::LIMIT_ARTICLES * 3)), [1,2,3,4,5,6,7,8,19,20]],
"150 articles" => [(new Context)->articles(range(1, Database::LIMIT_SET_SIZE * 3)), [1,2,3,4,5,6,7,8,19,20]],
"Search title or content 1" => [(new Context)->searchTerms(["Article"]), [1,2,3]],
"Search title or content 2" => [(new Context)->searchTerms(["one", "first"]), [1]],
"Search title or content 3" => [(new Context)->searchTerms(["one first"]), []],
@ -455,6 +455,7 @@ trait SeriesArticle {
"Search with exclusion" => [(new Context)->searchTerms(["Article"])->not->searchTerms(["one", "two"]), [3]],
"Excluded folder tree" => [(new Context)->not->folder(1), [1,2,3,4,19,20]],
"Excluding label ID 2" => [(new Context)->not->label(2), [2,3,4,6,7,8,19]],
"Excluding label 'Fascinating'" => [(new Context)->not->labelName("Fascinating"), [2,3,4,6,7,8,19]],
];
}
@ -744,7 +745,7 @@ trait SeriesArticle {
}
public function testMarkTooManyMultipleArticles() {
$this->assertSame(7, Arsse::$db->articleMark($this->user, ['read'=>false,'starred'=>true], (new Context)->articles(range(1, Database::LIMIT_ARTICLES * 3))));
$this->assertSame(7, Arsse::$db->articleMark($this->user, ['read'=>false,'starred'=>true], (new Context)->articles(range(1, Database::LIMIT_SET_SIZE * 3))));
}
public function testMarkAMissingArticle() {
@ -907,7 +908,7 @@ trait SeriesArticle {
$this->assertSame(2, Arsse::$db->articleCount("john.doe@example.com", (new Context)->starred(true)));
$this->assertSame(4, Arsse::$db->articleCount("john.doe@example.com", (new Context)->folder(1)));
$this->assertSame(0, Arsse::$db->articleCount("jane.doe@example.com", (new Context)->starred(true)));
$this->assertSame(10, Arsse::$db->articleCount("john.doe@example.com", (new Context)->articles(range(1, Database::LIMIT_ARTICLES *3))));
$this->assertSame(10, Arsse::$db->articleCount("john.doe@example.com", (new Context)->articles(range(1, Database::LIMIT_SET_SIZE * 3))));
}
public function testCountArticlesWithoutAuthority() {

View file

@ -378,4 +378,8 @@ abstract class BaseDriver extends \JKingWeb\Arsse\Test\AbstractTest {
$this->drv->savepointUndo();
$this->assertTrue($this->exec(str_replace("#", "3", $this->setVersion)));
}
public function testProduceAStringLiteral() {
$this->assertSame("'It''s a string!'", $this->drv->literalString("It's a string!"));
}
}