From ff113dd43a405c058a37b311c77a2b16b16b07f2 Mon Sep 17 00:00:00 2001 From: Michael Litvak Date: Wed, 19 Nov 2025 13:37:00 +0100 Subject: [PATCH] tombstone_gc: don't use 'repair' mode for colocated tables For tables of special types that can be located: MV, CDC, and paxos table, we should not use tombstone_gc=repair mode because colocated tablets are never repaired, hence they will not have repair_time set and will never be GC'd using 'repair' mode. (cherry picked from commit 868ac42a8b6d3618b1a96f6c688869c2ba7e510b) --- cdc/log.cc | 2 +- cql3/statements/alter_table_statement.cc | 9 +++- cql3/statements/alter_view_statement.cc | 23 +++++++++- cql3/statements/cf_prop_defs.cc | 4 +- cql3/statements/cf_prop_defs.hh | 2 +- cql3/statements/create_table_statement.cc | 2 +- cql3/statements/create_view_statement.cc | 25 ++++++++++- index/secondary_index_manager.cc | 9 +++- test/cluster/test_mv.py | 1 + test/cluster/test_tablets_colocation.py | 54 ++++++++++++++++++++++- tombstone_gc.cc | 12 +++-- tombstone_gc.hh | 4 +- 12 files changed, 131 insertions(+), 16 deletions(-) diff --git a/cdc/log.cc b/cdc/log.cc index 00631115d4..2cf2a029a4 100644 --- a/cdc/log.cc +++ b/cdc/log.cc @@ -696,7 +696,7 @@ static schema_ptr create_log_schema(const schema& s, const replica::database& db } auto rs = generate_replication_strategy(ksm); - auto tombstone_gc_ext = seastar::make_shared(get_default_tombstone_gc_mode(*rs, db.get_token_metadata())); + auto tombstone_gc_ext = seastar::make_shared(get_default_tombstone_gc_mode(*rs, db.get_token_metadata(), false)); b.add_extension(tombstone_gc_extension::NAME, std::move(tombstone_gc_ext)); /** diff --git a/cql3/statements/alter_table_statement.cc b/cql3/statements/alter_table_statement.cc index ba9abe83aa..6d589e3cfb 100644 --- a/cql3/statements/alter_table_statement.cc +++ b/cql3/statements/alter_table_statement.cc @@ -422,7 +422,14 @@ std::pair> alter_table_statement::prepare_sche throw exceptions::invalid_request_exception(format("The synchronous_updates option is only applicable to materialized views, not to base tables")); } - _properties->apply_to_builder(cfm, std::move(schema_extensions), db, keyspace()); + if (is_cdc_log_table) { + auto gc_opts = _properties->get_tombstone_gc_options(schema_extensions); + if (gc_opts && gc_opts->mode() == tombstone_gc_mode::repair) { + throw exceptions::invalid_request_exception("The 'repair' mode for tombstone_gc is not allowed on CDC log tables."); + } + } + + _properties->apply_to_builder(cfm, std::move(schema_extensions), db, keyspace(), !is_cdc_log_table); } break; diff --git a/cql3/statements/alter_view_statement.cc b/cql3/statements/alter_view_statement.cc index 8b19f98476..78ea99e1ec 100644 --- a/cql3/statements/alter_view_statement.cc +++ b/cql3/statements/alter_view_statement.cc @@ -55,8 +55,29 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const auto schema_extensions = _properties->make_schema_extensions(db.extensions()); _properties->validate(db, keyspace(), schema_extensions); + bool is_colocated = [&] { + if (!db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) { + return false; + } + auto base_schema = db.find_schema(schema->view_info()->base_id()); + if (!base_schema) { + return false; + } + return std::ranges::equal( + schema->partition_key_columns(), + base_schema->partition_key_columns(), + [](const column_definition& a, const column_definition& b) { return a.name() == b.name(); }); + }(); + + if (is_colocated) { + auto gc_opts = _properties->get_tombstone_gc_options(schema_extensions); + if (gc_opts && gc_opts->mode() == tombstone_gc_mode::repair) { + throw exceptions::invalid_request_exception("The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables."); + } + } + auto builder = schema_builder(schema); - _properties->apply_to_builder(builder, std::move(schema_extensions), db, keyspace()); + _properties->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated); if (builder.get_gc_grace_seconds() == 0) { throw exceptions::invalid_request_exception( diff --git a/cql3/statements/cf_prop_defs.cc b/cql3/statements/cf_prop_defs.cc index e3e32273d7..438f074eb6 100644 --- a/cql3/statements/cf_prop_defs.cc +++ b/cql3/statements/cf_prop_defs.cc @@ -284,7 +284,7 @@ std::optional cf_prop_defs::get_tablet_options() c return std::nullopt; } -void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_map schema_extensions, const data_dictionary::database& db, sstring ks_name) const { +void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_map schema_extensions, const data_dictionary::database& db, sstring ks_name, bool supports_repair) const { if (has_property(KW_COMMENT)) { builder.set_comment(get_string(KW_COMMENT, "")); } @@ -370,7 +370,7 @@ void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_ } // Set default tombstone_gc mode. if (!schema_extensions.contains(tombstone_gc_extension::NAME)) { - auto ext = seastar::make_shared(get_default_tombstone_gc_mode(db, ks_name)); + auto ext = seastar::make_shared(get_default_tombstone_gc_mode(db, ks_name, supports_repair)); schema_extensions.emplace(tombstone_gc_extension::NAME, std::move(ext)); } builder.set_extensions(std::move(schema_extensions)); diff --git a/cql3/statements/cf_prop_defs.hh b/cql3/statements/cf_prop_defs.hh index 8b2ec47c93..0aeb64c1eb 100644 --- a/cql3/statements/cf_prop_defs.hh +++ b/cql3/statements/cf_prop_defs.hh @@ -110,7 +110,7 @@ public: bool get_synchronous_updates_flag() const; std::optional get_tablet_options() const; - void apply_to_builder(schema_builder& builder, schema::extensions_map schema_extensions, const data_dictionary::database& db, sstring ks_name) const; + void apply_to_builder(schema_builder& builder, schema::extensions_map schema_extensions, const data_dictionary::database& db, sstring ks_name, bool supports_repair) const; void validate_minimum_int(const sstring& field, int32_t minimum_value, int32_t default_value) const; }; diff --git a/cql3/statements/create_table_statement.cc b/cql3/statements/create_table_statement.cc index b8d5d64c6a..f30e1afa7d 100644 --- a/cql3/statements/create_table_statement.cc +++ b/cql3/statements/create_table_statement.cc @@ -128,7 +128,7 @@ void create_table_statement::apply_properties_to(schema_builder& builder, const builder.set_compressor_params(db.get_config().sstable_compression_user_table_options()); } - _properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace()); + _properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true); } void create_table_statement::add_column_metadata_from_aliases(schema_builder& builder, std::vector aliases, const std::vector& types, column_kind kind) const diff --git a/cql3/statements/create_view_statement.cc b/cql3/statements/create_view_statement.cc index e799ef5118..eda7cbb5a0 100644 --- a/cql3/statements/create_view_statement.cc +++ b/cql3/statements/create_view_statement.cc @@ -373,7 +373,30 @@ std::pair create_view_statement::prepare_view( db::view::create_virtual_column(builder, def->name(), def->type); } } - _properties.properties()->apply_to_builder(builder, std::move(schema_extensions), db, keyspace()); + + bool is_colocated = [&] { + if (!db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) { + return false; + } + if (target_partition_keys.size() != schema->partition_key_columns().size()) { + return false; + } + for (size_t i = 0; i < target_partition_keys.size(); ++i) { + if (target_partition_keys[i] != &schema->partition_key_columns()[i]) { + return false; + } + } + return true; + }(); + + if (is_colocated) { + auto gc_opts = _properties.properties()->get_tombstone_gc_options(schema_extensions); + if (gc_opts && gc_opts->mode() == tombstone_gc_mode::repair) { + throw exceptions::invalid_request_exception("The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables."); + } + } + + _properties.properties()->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated); if (builder.default_time_to_live().count() > 0) { throw exceptions::invalid_request_exception( diff --git a/index/secondary_index_manager.cc b/index/secondary_index_manager.cc index a49f2fc66e..58ab274a1c 100644 --- a/index/secondary_index_manager.cc +++ b/index/secondary_index_manager.cc @@ -320,7 +320,14 @@ view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im ""; builder.with_view_info(schema, false, where_clause); - auto tombstone_gc_ext = seastar::make_shared(get_default_tombstone_gc_mode(db, schema->ks_name())); + bool is_colocated = [&] { + if (!db.find_keyspace(schema->ks_name()).get_replication_strategy().uses_tablets()) { + return false; + } + return im.local(); + }(); + + auto tombstone_gc_ext = seastar::make_shared(get_default_tombstone_gc_mode(db, schema->ks_name(), !is_colocated)); builder.add_extension(tombstone_gc_extension::NAME, std::move(tombstone_gc_ext)); // A local secondary index should be backed by a *synchronous* view, diff --git a/test/cluster/test_mv.py b/test/cluster/test_mv.py index 33c8d52bd8..874381f15c 100644 --- a/test/cluster/test_mv.py +++ b/test/cluster/test_mv.py @@ -22,6 +22,7 @@ pytestmark = pytest.mark.prepare_3_racks_cluster @pytest.mark.asyncio +@pytest.mark.skip(reason="tombstone_gc repair mode is not supported yet for MVs due to #24816") async def test_mv_tombstone_gc_setting(manager): """ Test that the tombstone_gc parameter can be set on a materialized view, diff --git a/test/cluster/test_tablets_colocation.py b/test/cluster/test_tablets_colocation.py index 87e9d5a78f..bd9a509958 100644 --- a/test/cluster/test_tablets_colocation.py +++ b/test/cluster/test_tablets_colocation.py @@ -9,13 +9,14 @@ from test.pylib.rest_client import inject_error_one_shot from test.pylib.tablets import get_tablet_replica, get_base_table, get_tablet_count, get_tablet_info from test.pylib.util import wait_for, wait_for_cql_and_get_hosts, wait_for_view from test.cluster.conftest import skip_mode -from test.cluster.util import new_test_keyspace +from test.cluster.util import new_test_keyspace, new_test_table, new_materialized_view import time import pytest import logging import asyncio import random from cassandra.query import SimpleStatement, ConsistencyLevel +from cassandra.protocol import InvalidRequest, Unauthorized logger = logging.getLogger(__name__) @@ -440,3 +441,54 @@ async def test_repair_colocated_base_and_view(manager: ManagerClient): rows = await cql_server2.run_async(SimpleStatement(f"SELECT * FROM {ks}.tv", consistency_level=ConsistencyLevel.ONE)) pks = set(row.pk for row in rows) assert 1 in pks and 2 in pks + +# Verify the default tombstone GC mode for colocated tables is 'timeout', +# and that altering it to 'repair' is not allowed. +@pytest.mark.asyncio +async def test_colocated_tables_gc_mode(manager: ManagerClient): + servers = await manager.servers_add(3, auto_rack_dc="dc1") + cql = manager.get_cql() + + def check_tombstone_gc_mode_timeout(cql, table): + s = list(cql.execute(f"DESC {table}"))[0].create_statement + assert "'mode': 'timeout'" in s or "mode" not in s # default is timeout + + async with new_test_keyspace(manager, "WITH replication = {'dc1': 3 }") as ks: + async with new_test_table(manager, ks, "pk int PRIMARY KEY, v int") as table: + async with new_materialized_view(manager, table, "*", "pk, v", "pk IS NOT NULL AND v IS NOT NULL") as mv: + check_tombstone_gc_mode_timeout(cql, mv) + + with pytest.raises(InvalidRequest, match="The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables."): + await cql.run_async(f"ALTER MATERIALIZED VIEW {mv} WITH tombstone_gc = {{'mode': 'repair'}}") + + with pytest.raises(InvalidRequest, match="The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables."): + await cql.run_async(f"CREATE MATERIALIZED VIEW {ks}.mv2 AS SELECT * FROM {table} WHERE pk IS NOT NULL AND v IS NOT NULL PRIMARY KEY (pk, v) WITH tombstone_gc = {{'mode': 'repair'}}") + + # a not colocated view with 'repair' mode - should succeed + async with new_materialized_view(manager, table, "*", "v, pk", "pk IS NOT NULL AND v IS NOT NULL", "WITH tombstone_gc = {'mode': 'repair'}") as mv: + await cql.run_async(f"ALTER MATERIALIZED VIEW {mv} WITH tombstone_gc = {{'mode': 'timeout'}}") + await cql.run_async(f"ALTER MATERIALIZED VIEW {mv} WITH tombstone_gc = {{'mode': 'repair'}}") + + await cql.run_async(f"CREATE INDEX ON {table}((pk),v)") + view_name = cql.execute("SELECT view_name FROM system_schema.views").one().view_name + check_tombstone_gc_mode_timeout(cql, f"{ks}.{view_name}") + + with pytest.raises(InvalidRequest, match="The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables."): + await cql.run_async(f"ALTER MATERIALIZED VIEW {ks}.{view_name} WITH tombstone_gc = {{'mode': 'repair'}}") + + async with new_test_table(manager, ks, "pk int PRIMARY KEY, v int", " WITH cdc={'enabled': true}") as table: + check_tombstone_gc_mode_timeout(cql, f"{table}_scylla_cdc_log") + + with pytest.raises(InvalidRequest, match="The 'repair' mode for tombstone_gc is not allowed on CDC log tables."): + await cql.run_async(f"ALTER TABLE {table}_scylla_cdc_log WITH tombstone_gc = {{'mode': 'repair'}}") + + async with new_test_table(manager, ks, "pk int PRIMARY KEY, v int") as table: + await cql.run_async(f"INSERT INTO {table}(pk, v) VALUES(0, 0)") + await cql.run_async(f"UPDATE {table} SET v = 1 WHERE pk = 0 IF v = 0") + ks_name, cf_name = table.split('.') + table_name = f"{ks_name}.\"{cf_name}$paxos\"" + check_tombstone_gc_mode_timeout(cql, table_name) + + # paxos table is not allowed to be altered, even not by a superuser. + with pytest.raises(Unauthorized): + await cql.run_async(f"ALTER TABLE {table_name} WITH tombstone_gc = {{'mode': 'repair'}}") diff --git a/tombstone_gc.cc b/tombstone_gc.cc index e28f85f065..2b5607bbf4 100644 --- a/tombstone_gc.cc +++ b/tombstone_gc.cc @@ -269,20 +269,24 @@ static bool requires_repair_before_gc(const locator::abstract_replication_strate return rs.uses_tablets() && needs_repair_before_gc(rs, tm); } -std::map get_default_tombstone_gc_mode(const locator::abstract_replication_strategy& rs, const locator::token_metadata& tm) { - return {{"mode", requires_repair_before_gc(rs, tm) ? "repair" : "timeout"}}; +std::map get_default_tombstone_gc_mode(const locator::abstract_replication_strategy& rs, const locator::token_metadata& tm, bool supports_repair) { + return {{"mode", (supports_repair && requires_repair_before_gc(rs, tm)) ? "repair" : "timeout"}}; } -std::map get_default_tombstone_gc_mode(data_dictionary::database db, sstring ks_name) { +std::map get_default_tombstone_gc_mode(data_dictionary::database db, sstring ks_name, bool supports_repair) { auto real_db_ptr = db.real_database_ptr(); if (!real_db_ptr) { return {{"mode", "timeout"}}; } + if (!supports_repair) { + return {{"mode", "timeout"}}; + } + const replica::keyspace& ks = real_db_ptr->find_keyspace(ks_name); const locator::token_metadata& tm = real_db_ptr->get_token_metadata(); - return get_default_tombstone_gc_mode(ks.get_replication_strategy(), tm); + return get_default_tombstone_gc_mode(ks.get_replication_strategy(), tm, supports_repair); } void validate_tombstone_gc_options(const tombstone_gc_options* options, data_dictionary::database db, sstring ks_name) { diff --git a/tombstone_gc.hh b/tombstone_gc.hh index bf6078fab2..469033e257 100644 --- a/tombstone_gc.hh +++ b/tombstone_gc.hh @@ -156,7 +156,7 @@ public: [[nodiscard]] tombstone_gc_state with_commitlog_check_disabled() const { return tombstone_gc_state(_shared_state, false); } }; -std::map get_default_tombstone_gc_mode(const locator::abstract_replication_strategy&, const locator::token_metadata&); -std::map get_default_tombstone_gc_mode(data_dictionary::database db, sstring ks_name); +std::map get_default_tombstone_gc_mode(const locator::abstract_replication_strategy&, const locator::token_metadata&, bool supports_repair); +std::map get_default_tombstone_gc_mode(data_dictionary::database db, sstring ks_name, bool supports_repair); void validate_tombstone_gc_options(const tombstone_gc_options* options, data_dictionary::database db, sstring ks_name);