cql3/select_statement: reject PER PARTITION LIMIT with SELECT DISTINCT

Before this patch we silently allowed and ignored PER PARTITION LIMIT.
SELECT DISTINCT requires all the partition key columns, which means that
setting PER PARTITION LIMIT is redundant - only one result will be
returned from every partition anyway.

Cassandra behaves the same way, so this patch also ensures
compatibility.

Fixes scylladb/scylladb#15109

Closes scylladb/scylladb#22950
This commit is contained in:
Paweł Zakrzewski
2025-02-20 01:04:56 +01:00
committed by Botond Dénes
parent e6227f9a25
commit 854d2917a1
4 changed files with 16 additions and 6 deletions

View File

@@ -135,9 +135,9 @@ private:
selection::selection& selection,
const restrictions::statement_restrictions& restrictions);
static void validate_distinct_selection(const schema& schema,
void validate_distinct_selection(const schema& schema,
const selection::selection& selection,
const restrictions::statement_restrictions& restrictions);
const restrictions::statement_restrictions& restrictions) const;
/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
void check_needs_filtering(

View File

@@ -2383,8 +2383,12 @@ select_statement::ordering_comparator_type select_statement::get_ordering_compar
void select_statement::validate_distinct_selection(const schema& schema,
const selection::selection& selection,
const restrictions::statement_restrictions& restrictions)
const restrictions::statement_restrictions& restrictions) const
{
if (_per_partition_limit) {
throw exceptions::invalid_request_exception("PER PARTITION LIMIT is not allowed with SELECT DISTINCT queries");
}
if (restrictions.has_non_primary_key_restriction() || restrictions.has_clustering_columns_restriction()) {
throw exceptions::invalid_request_exception(
"SELECT DISTINCT with WHERE clause only supports restriction by partition key.");

View File

@@ -16,7 +16,7 @@ def testSparseTable(cql, test_keyspace):
execute(cql, table, "INSERT INTO %s (userid, url, day, month, year) VALUES (?, ?, 1, 'jan', 2012)", i, f"http://foo.{tld}")
assertRowCount(execute(cql, table, "SELECT * FROM %s LIMIT 4"), 4)
@pytest.mark.xfail(reason="issues #9879, #15099, #15109")
@pytest.mark.xfail(reason="issues #9879, #15099")
def testPerPartitionLimit(cql, test_keyspace):
with create_table(cql, test_keyspace, "(a int, b int, c int, PRIMARY KEY (a, b))") as table:
for i in range(5):
@@ -110,7 +110,7 @@ def testPerPartitionLimit(cql, test_keyspace):
assertInvalidMessage(cql, table, "PER PARTITION LIMIT is not allowed with aggregate queries.",
"SELECT COUNT(*) FROM %s PER PARTITION LIMIT ?", 3)
@pytest.mark.xfail(reason="issues #9879, #15109")
@pytest.mark.xfail(reason="issues #9879")
def testPerPartitionLimitWithStaticDataAndPaging(cql, test_keyspace):
with create_table(cql, test_keyspace, "(a int, b int, s int static, c int, PRIMARY KEY (a, b))") as table:
for i in range(5):

View File

@@ -74,7 +74,13 @@ def test_distinct_limit(cql, test_keyspace):
assert n == len(results)
assert set(results).issubset(set(all))
# Test combination of SELECT DISTINCT, COUNT, GROUP BY and LIMIT.
# Test that SELECT DISTINCT and PER PARTITION LIMIT is not allowed in the same query
# reproduces #15109
def test_distinct_and_per_partition_limit(cql, table1):
with pytest.raises(InvalidRequest, match='PER PARTITION LIMIT is not allowed with SELECT DISTINCT queries'):
cql.execute(f"select distinct p from {table1} per partition limit 1")
# Test combination of SELECT DISTINCT, COUNT, GROUP BY and LIMIT.
# COUNT + GROUP BY means generate one count per partition, and the
# SELECT DISTINCT simply hands the counter just one row per partition
# to count, so all the counts come up 1. Adding a LIMIT to all of