From b88f422a53971871a7fad6f85d458cb0966fc195 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 23 Jan 2024 22:13:10 +0200 Subject: [PATCH 1/2] schema: provide method to get sharder, iff it is static The current get_sharder() method only allows getting a static sharder (since a dynamic sharder needs additional protection). However, it chooses to abort if someone attempt to get a dynamic sharder. In one case, it's useful to get a sharder only if it's static, so provide a method to do that. This is for providing sstable sharding metadata, which isn't useful with tablets. --- schema/schema.cc | 13 +++++++++++-- schema/schema.hh | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/schema/schema.cc b/schema/schema.cc index 40b60f36d4..4ef004b5d3 100644 --- a/schema/schema.cc +++ b/schema/schema.cc @@ -151,13 +151,22 @@ const dht::i_partitioner& schema::get_partitioner() const { return _raw._partitioner.get(); } -const dht::sharder& schema::get_sharder() const { +const dht::sharder* schema::try_get_static_sharder() const { auto t = maybe_table(); if (t && !t->uses_static_sharding()) { + // Use table()->get_effective_replication_map()->get_sharder() instead. + return nullptr; + } + return &_raw._sharder.get(); +} + +const dht::sharder& schema::get_sharder() const { + auto* s = try_get_static_sharder(); + if (!s) { // Use table()->get_effective_replication_map()->get_sharder() instead. on_internal_error(dblog, format("Attempted to obtain static sharder for table {}.{}", ks_name(), cf_name())); } - return _raw._sharder.get(); + return *s; } replica::table* schema::maybe_table() const { diff --git a/schema/schema.hh b/schema/schema.hh index 61d08692d1..bc14d4a5d2 100644 --- a/schema/schema.hh +++ b/schema/schema.hh @@ -821,6 +821,10 @@ public: // To obtain a sharder which is valid for all kinds of tables, use table::get_effective_replication_map()->get_sharder() const dht::sharder& get_sharder() const; + // Returns a sharder for this table, but only if it is a static sharder (token->shard mappings + // don't change while the node is up) + const dht::sharder* try_get_static_sharder() const; + // Returns a pointer to the table if the local database has a table which this object references by id(). // The table pointer is not guaranteed to be stable, schema_ptr doesn't keep the table alive. replica::table* maybe_table() const; From 8ee75ae8f445898aa976dd13b97581d407f2c0a4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 23 Jan 2024 20:21:04 +0200 Subject: [PATCH 2/2] sstables: writer: don't require effective_replication_map for sharding metadata Currently, we pass an effective_replication_map_ptr to sstable_writer, so that we can get a stable dht::sharder for writing the sharding metadata. This is needed because with tablets, the sharder can change dynamically. However, this is both bad and unnecessary: - bad: holding on to an effective_replication_map_ptr is a barrier for topology operations, preventing tablet migrations (etc) while an sstable is being written - unnecessary: tablets don't require sharding metadata at all, since two tablets cannot overlap (unlike two sstables from different shards in the same node). So the first/last key is sufficient to determine the shard/tablet ownership. Given that, just pass the sharder for vnode sstables, and don't generate sharding metadata for tablet sstables. --- replica/table.cc | 2 -- sstables/mx/writer.cc | 4 +--- sstables/sstables.cc | 11 +++++++---- sstables/sstables.hh | 4 +--- streaming/consumer.cc | 1 - 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/replica/table.cc b/replica/table.cc index 81bddfc127..83fb2d9cae 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -1121,7 +1121,6 @@ table::try_flush_memtable_to_sstable(compaction_group& cg, lw_shared_ptrget_sharder(_schema) - : _schema.get_sharder(); // Used in tests - _sst.write_scylla_metadata(_shard, sharder, std::move(features), std::move(identifier), std::move(ld_stats), _cfg.origin); + _sst.write_scylla_metadata(_shard, std::move(features), std::move(identifier), std::move(ld_stats), _cfg.origin); _sst.seal_sstable(_cfg.backup).get(); } diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 9ea2c4922c..4a6939831d 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1629,16 +1629,17 @@ sstable::read_scylla_metadata() noexcept { } void -sstable::write_scylla_metadata(shard_id shard, const dht::sharder& sharder, sstable_enabled_features features, struct run_identifier identifier, +sstable::write_scylla_metadata(shard_id shard, sstable_enabled_features features, struct run_identifier identifier, std::optional ld_stats, sstring origin) { auto&& first_key = get_first_decorated_key(); auto&& last_key = get_last_decorated_key(); - auto sm = create_sharding_metadata(_schema, sharder, first_key, last_key, shard); + auto* sharder_opt = _schema->try_get_static_sharder(); + auto sm = sharder_opt ? std::optional(create_sharding_metadata(_schema, *sharder_opt, first_key, last_key, shard)) : std::nullopt; // sstable write may fail to generate empty metadata if mutation source has only data from other shard. // see https://github.com/scylladb/scylla/issues/2932 for details on how it can happen. - if (sm.token_ranges.elements.empty()) { + if (sm && sm->token_ranges.elements.empty()) { throw std::runtime_error(format("Failed to generate sharding metadata for {}", get_filename())); } @@ -1646,7 +1647,9 @@ sstable::write_scylla_metadata(shard_id shard, const dht::sharder& sharder, ssta _components->scylla_metadata.emplace(); } - _components->scylla_metadata->data.set(std::move(sm)); + if (sm) { + _components->scylla_metadata->data.set(std::move(*sm)); + } _components->scylla_metadata->data.set(std::move(features)); _components->scylla_metadata->data.set(std::move(identifier)); if (ld_stats) { diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 02d148b349..9ed6c3ab5b 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -39,7 +39,7 @@ #include "readers/flat_mutation_reader_fwd.hh" #include "tracing/trace_state.hh" #include "utils/updateable_value.hh" -#include "locator/abstract_replication_strategy.hh" +#include "dht/decorated_key.hh" #include @@ -111,7 +111,6 @@ struct sstable_writer_config { run_id run_identifier = run_id::create_random_id(); size_t summary_byte_cost; sstring origin; - locator::effective_replication_map_ptr erm; private: explicit sstable_writer_config() {} @@ -642,7 +641,6 @@ private: future<> read_scylla_metadata() noexcept; void write_scylla_metadata(shard_id shard, - const dht::sharder& sharder, sstable_enabled_features features, run_identifier identifier, std::optional ld_stats, diff --git a/streaming/consumer.cc b/streaming/consumer.cc index 85d792235b..f513cde7b9 100644 --- a/streaming/consumer.cc +++ b/streaming/consumer.cc @@ -58,7 +58,6 @@ std::function (flat_mutation_reader_v2)> make_streaming_consumer(sstrin schema_ptr s = reader.schema(); auto cfg = cf->get_sstables_manager().configure_writer(origin); - cfg.erm = cf->get_effective_replication_map(); return sst->write_components(std::move(reader), adjusted_estimated_partitions, s, cfg, encoding_stats{}).then([sst] { return sst->open_data();