Merge 'Fix index name conflicts with regular tables' from Piotr Sarna

When an index is created without an explicit name, a default name is chosen. However, there was no check if a table with conflicting name already exists. The check is now in place and if any conflicts are found, a new index name is chosen instead. When an index is created *with* an explicit name and a conflicting regular table is found, index creation should simply fail. This series comes with a test. Fixes #8620 Tests: unit(release) Closes #8632 * github.com:scylladb/scylla: cql-pytest: add regression tests for index creation cql3: fail to create an index if there is a name conflict database: check for conflicting table names for indexes (cherry picked from commit cee4c075d2)
2021-05-11 18:40:15 +03:00
parent 69daa9fd00
commit cb3225f2de
3 changed files with 79 additions and 2 deletions
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -288,6 +288,13 @@ create_index_statement::announce_migration(service::storage_proxy& proxy) const
        }
        accepted_name = db.get_available_index_name(keyspace(), column_family(), index_name_root);
    }
+    auto index_table_name = secondary_index::index_table_name(accepted_name);
+    if (db.has_schema(keyspace(), index_table_name)) {
+        return make_exception_future<::shared_ptr<cql_transport::event::schema_change>>(
+            exceptions::invalid_request_exception(format("Index {} cannot be created, because table {} already exists",
+                    accepted_name, index_table_name))
+        );
+    }
    index_metadata_kind kind;
    index_options_map index_options;
    if (_properties->is_custom) {
--- a/database.cc
+++ b/database.cc
@@ -1948,7 +1948,11 @@ sstring database::get_available_index_name(const sstring &ks_name, const sstring
    auto base_name = index_metadata::get_default_index_name(cf_name, index_name_root);
    sstring accepted_name = base_name;
    int i = 0;
-    while (existing_names.contains(accepted_name)) {
+    auto name_accepted = [&] {
+        auto index_table_name = secondary_index::index_table_name(accepted_name);
+        return !has_schema(ks_name, index_table_name) && !existing_names.contains(accepted_name);
+    };
+    while (!name_accepted()) {
        accepted_name = base_name + "_" + std::to_string(++i);
    }
    return accepted_name;
--- a/test/cql-pytest/test_secondary_index.py
+++ b/test/cql-pytest/test_secondary_index.py
@@ -22,7 +22,7 @@ import pytest
 from cassandra.protocol import SyntaxException, AlreadyExists, InvalidRequest, ConfigurationException, ReadFailure
 from cassandra.query import SimpleStatement

-from util import new_test_table
+from util import new_test_table, unique_name

 # A reproducer for issue #7443: Normally, when the entire table is SELECTed,
 # the partitions are returned sorted by the partitions' token. When there
@@ -81,3 +81,69 @@ def test_paging_with_desc_clustering_order(cql, test_keyspace):
            cql.execute(f"INSERT INTO {table}(p,c) VALUES ({i}, 42)")
        stmt = SimpleStatement(f"SELECT * FROM {table} WHERE c = 42", fetch_size=1)
        assert len([row for row in cql.execute(stmt)]) == 3
+
+# Test which ensures that indexes for a query are picked by the order in which
+# they appear in restrictions. That way, users can deterministically pick
+# which indexes are used for which queries.
+# Note that the order of picking indexing is not set in stone and may be
+# subject to change - in which case this test case should be amended as well.
+# The order tested in this case was decided as a good first step in issue
+# #7969, but it's possible that it will eventually be implemented another
+# way, e.g. dynamically based on estimated query selectivity statistics.
+# Ref: #7969
+@pytest.mark.xfail(reason="The order of picking indexes is currently arbitrary. Issue #7969")
+def test_order_of_indexes(scylla_only, cql, test_keyspace):
+    schema = 'p int primary key, v1 int, v2 int, v3 int'
+    with new_test_table(cql, test_keyspace, schema) as table:
+        cql.execute(f"CREATE INDEX my_v3_idx ON {table}(v3)")
+        cql.execute(f"CREATE INDEX my_v1_idx ON {table}(v1)")
+        cql.execute(f"CREATE INDEX my_v2_idx ON {table}((p),v2)")
+        # All queries below should use the first index they find in the list
+        # of restrictions. Tracing information will be consulted to ensure
+        # it's true. Currently some of the cases below succeed, because the
+        # order is not well defined (and may, for instance, change upon
+        # server restart), but some of them fail. Once a proper ordering
+        # is implemented, all cases below should succeed.
+        def index_used(query, index_name):
+            assert any([index_name in event.description for event in cql.execute(query, trace=True).get_query_trace().events])
+        index_used(f"SELECT * FROM {table} WHERE v3 = 1", "my_v3_idx")
+        index_used(f"SELECT * FROM {table} WHERE v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
+        index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 allow filtering", "my_v1_idx")
+        index_used(f"SELECT * FROM {table} WHERE p = 1 and v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
+        # Local indexes are still skipped if they cannot be used
+        index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v1 = 2 allow filtering", "my_v1_idx")
+        index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v3 = 2 and v1 = 3 allow filtering", "my_v3_idx")
+        index_used(f"SELECT * FROM {table} WHERE v1 = 1 and v2 = 2 and v3 = 3 allow filtering", "my_v1_idx")
+        # Local indexes are still preferred over global ones, if they can be used
+        index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 and v2 = 2 allow filtering", "my_v2_idx")
+        index_used(f"SELECT * FROM {table} WHERE p = 1 and v2 = 1 and v1 = 2 allow filtering", "my_v2_idx")
+
+# Indexes can be created without an explicit name, in which case a default name is chosen.
+# However, due to #8620 it was possible to break the index creation mechanism by creating
+# a properly named regular table, which conflicts with the generated index name.
+def test_create_unnamed_index_when_its_name_is_taken(cql, test_keyspace):
+    schema = 'p int primary key, v int'
+    with new_test_table(cql, test_keyspace, schema) as table:
+        try:
+            cql.execute(f"CREATE TABLE {table}_v_idx_index (i_do_not_exist_in_the_base_table int primary key)")
+            # Creating an index should succeed, even though its default name is taken
+            # by the table above
+            cql.execute(f"CREATE INDEX ON {table}(v)")
+        finally:
+            cql.execute(f"DROP TABLE {table}_v_idx_index")
+
+# Indexed created with an explicit name cause a materialized view to be created,
+# and this view has a specific name - <index-name>_index. If there happens to be
+# a regular table (or another view) named just like that, index creation should fail.
+def test_create_named_index_when_its_name_is_taken(scylla_only, cql, test_keyspace):
+    schema = 'p int primary key, v int'
+    with new_test_table(cql, test_keyspace, schema) as table:
+        index_name = unique_name()
+        try:
+            cql.execute(f"CREATE TABLE {test_keyspace}.{index_name}_index (i_do_not_exist_in_the_base_table int primary key)")
+            # Creating an index should fail, because it's impossible to create
+            # its underlying materialized view, because its name is taken by a regular table
+            with pytest.raises(InvalidRequest, match="already exists"):
+                cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
+        finally:
+            cql.execute(f"DROP TABLE {test_keyspace}.{index_name}_index")