Update iterator methods and utility methods (range, bounds, tombstone_for_row, row_count)

Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
Update critical row access methods (clustered_row, insert_row, find_row, empty)
2025-12-24 21:38:26 +00:00 · 2025-12-24 21:36:36 +00:00 · 2025-12-24 21:34:07 +00:00 · 2025-12-24 21:31:35 +00:00 · 2025-12-24 21:29:08 +00:00 · 2025-12-24 21:22:11 +00:00
98 changed files with 3445 additions and 494 deletions
--- a/.github/workflows/call_validate_pr_author_email.yml
+++ b/.github/workflows/call_validate_pr_author_email.yml
@@ -0,0 +1,13 @@
+name: validate_pr_author_email
+
+on:
+  pull_request_target:
+    types:
+      - opened
+      - synchronize
+      - reopened
+
+jobs:
+  validate_pr_author_email:
+    uses: scylladb/github-automation/.github/workflows/validate_pr_author_email.yml@main
+
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -169,7 +169,7 @@ future<> controller::request_stop_server() {
    });
 }

-future<utils::chunked_vector<client_data>> controller::get_client_data() {
+future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> controller::get_client_data() {
    return _server.local().get_client_data();
 }

--- a/alternator/controller.hh
+++ b/alternator/controller.hh
@@ -93,7 +93,7 @@ public:
    // This virtual function is called (on each shard separately) when the
    // virtual table "system.clients" is read. It is expected to generate a
    // list of clients connected to this server (on this shard).
-    virtual future<utils::chunked_vector<client_data>> get_client_data() override;
+    virtual future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data() override;
 };

 }
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -708,8 +708,12 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    // As long as the system_clients_entry object is alive, this request will
    // be visible in the "system.clients" virtual table. When requested, this
    // entry will be formatted by server::ongoing_request::make_client_data().
+    auto user_agent_header = co_await _connection_options_keys_and_values.get_or_load(req->get_header("User-Agent"), [] (const client_options_cache_key_type&) {
+        return make_ready_future<options_cache_value_type>(options_cache_value_type{});
+    });
+
    auto system_clients_entry = _ongoing_requests.emplace(
-        req->get_client_address(), req->get_header("User-Agent"),
+        req->get_client_address(), std::move(user_agent_header),
        username, current_scheduling_group(),
        req->get_protocol_name() == "https");

@@ -985,10 +989,10 @@ client_data server::ongoing_request::make_client_data() const {
    return cd;
 }

-future<utils::chunked_vector<client_data>> server::get_client_data() {
-    utils::chunked_vector<client_data> ret;
+future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> server::get_client_data() {
+    utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>> ret;
    co_await _ongoing_requests.for_each_gently([&ret] (const ongoing_request& r) {
-        ret.emplace_back(r.make_client_data());
+        ret.emplace_back(make_foreign(std::make_unique<client_data>(r.make_client_data())));
    });
    co_return ret;
 }
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -55,6 +55,7 @@ class server : public peering_sharded_service<server> {
    // though it isn't really relevant for Alternator which defines its own
    // timeouts separately. We can create this object only once.
    updateable_timeout_config _timeout_config;
+    client_options_cache_type _connection_options_keys_and_values;

    alternator_callbacks_map _callbacks;

@@ -88,7 +89,7 @@ class server : public peering_sharded_service<server> {
    // is called when reading the "system.clients" virtual table.
    struct ongoing_request {
        socket_address _client_address;
-        sstring _user_agent;
+        client_options_cache_entry_type _user_agent;
        sstring _username;
        scheduling_group _scheduling_group;
        bool _is_https;
@@ -107,7 +108,7 @@ public:
    // table "system.clients" is read. It is expected to generate a list of
    // clients connected to this server (on this shard). This function is
    // called by alternator::controller::get_client_data().
-    future<utils::chunked_vector<client_data>> get_client_data();
+    future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data();
 private:
    void set_routes(seastar::httpd::routes& r);
    // If verification succeeds, returns the authenticated user's username
--- a/api/client_routes.cc
+++ b/api/client_routes.cc
@@ -100,9 +100,8 @@ rest_set_client_routes(http_context& ctx, sharded<service::client_routes_service
    rapidjson::Document root;
    auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
    root.Parse(content.c_str());
-    const auto route_entries = parse_set_client_array(root);

-    co_await cr.local().set_client_routes(route_entries);
+    co_await cr.local().set_client_routes(parse_set_client_array(root));
    co_return seastar::json::json_void();
 }

@@ -132,8 +131,7 @@ rest_delete_client_routes(http_context& ctx, sharded<service::client_routes_serv
    auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
    root.Parse(content.c_str());

-    const auto route_keys = parse_delete_client_array(root);
-    co_await cr.local().delete_client_routes(route_keys);
+    co_await cr.local().delete_client_routes(parse_delete_client_array(root));
    co_return seastar::json::json_void();
 }

--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -547,17 +547,13 @@ void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_build
                vp.insert(b.second);
            }
        }
-        std::vector<sstring> res;
        replica::database& db = vb.local().get_db();
        auto uuid = validate_table(db, ks, cf_name);
        replica::column_family& cf = db.find_column_family(uuid);
-        res.reserve(cf.get_index_manager().list_indexes().size());
-        for (auto&& i : cf.get_index_manager().list_indexes()) {
-            if (vp.contains(secondary_index::index_table_name(i.metadata().name()))) {
-                res.emplace_back(i.metadata().name());
-            }
-        }
-        co_return res;
+        co_return cf.get_index_manager().list_indexes()
+                | std::views::transform([] (const auto& i) { return i.metadata().name(); })
+                | std::views::filter([&vp] (const auto& n) { return vp.contains(secondary_index::index_table_name(n)); })
+                | std::ranges::to<std::vector>();
    });

 }
--- a/client_data.hh
+++ b/client_data.hh
@@ -10,7 +10,9 @@
 #include <seastar/net/inet_address.hh>
 #include <seastar/core/sstring.hh>
 #include "seastarx.hh"
+#include "utils/loading_shared_values.hh"

+#include <list>
 #include <optional>

 enum class client_type {
@@ -27,6 +29,20 @@ enum class client_connection_stage {
    ready,
 };

+// We implement a keys cache using a map-like utils::loading_shared_values container by storing empty values.
+struct options_cache_value_type {};
+using client_options_cache_type = utils::loading_shared_values<sstring, options_cache_value_type>;
+using client_options_cache_entry_type = client_options_cache_type::entry_ptr;
+using client_options_cache_key_type = client_options_cache_type::key_type;
+
+// This struct represents a single OPTION key-value pair from the client's connection options.
+// Both key and value are represented by corresponding "references" to their cached values.
+// Each "reference" is effectively a lw_shared_ptr value.
+struct client_option_key_value_cached_entry {
+    client_options_cache_entry_type key;
+    client_options_cache_entry_type value;
+};
+
 sstring to_string(client_connection_stage ct);

 // Representation of a row in `system.clients'. std::optionals are for nullable cells.
@@ -37,8 +53,8 @@ struct client_data {
    client_connection_stage connection_stage = client_connection_stage::established;
    int32_t shard_id;  /// ID of server-side shard which is processing the connection.

-    std::optional<sstring> driver_name;
-    std::optional<sstring> driver_version;
+    std::optional<client_options_cache_entry_type> driver_name;
+    std::optional<client_options_cache_entry_type> driver_version;
    std::optional<sstring> hostname;
    std::optional<int32_t> protocol_version;
    std::optional<sstring> ssl_cipher_suite;
@@ -46,6 +62,7 @@ struct client_data {
    std::optional<sstring> ssl_protocol;
    std::optional<sstring> username;
    std::optional<sstring> scheduling_group_name;
+    std::list<client_option_key_value_cached_entry> client_options;

    sstring stage_str() const { return to_string(connection_stage); }
    sstring client_type_str() const { return to_string(ct); }
--- a/cmake/mode.common.cmake
+++ b/cmake/mode.common.cmake
@@ -125,10 +125,6 @@ if(target_arch)
  add_compile_options("-march=${target_arch}")
 endif()

-if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-  add_compile_options("SHELL:-Xclang -fexperimental-assignment-tracking=disabled")
-endif()
-
 function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
  math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
  set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")
--- a/compaction/compaction_group_view.hh
+++ b/compaction/compaction_group_view.hh
@@ -12,6 +12,7 @@
 #include <seastar/core/condition-variable.hh>

 #include "schema/schema_fwd.hh"
+#include "sstables/open_info.hh"
 #include "compaction_descriptor.hh"

 class reader_permit;
@@ -44,7 +45,7 @@ public:
    virtual compaction_strategy_state& get_compaction_strategy_state() noexcept = 0;
    virtual reader_permit make_compaction_reader_permit() const = 0;
    virtual sstables::sstables_manager& get_sstables_manager() noexcept = 0;
-    virtual sstables::shared_sstable make_sstable() const = 0;
+    virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const = 0;
    virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
    virtual api::timestamp_type min_memtable_timestamp() const = 0;
    virtual api::timestamp_type min_memtable_live_timestamp() const = 0;
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -416,7 +416,9 @@ future<compaction_result> compaction_task_executor::compact_sstables(compaction_
        descriptor.enable_garbage_collection(co_await sstable_set_for_tombstone_gc(t));
    }
    descriptor.creator = [&t] (shard_id) {
-        return t.make_sstable();
+        // All compaction types going through this path will work on normal input sstables only.
+        // Off-strategy, for example, waits until the sstables move out of staging state.
+        return t.make_sstable(sstables::sstable_state::normal);
    };
    descriptor.replacer = [this, &t, &on_replace, offstrategy] (compaction_completion_desc desc) {
        t.get_compaction_strategy().notify_completion(t, desc.old_sstables, desc.new_sstables);
@@ -1847,6 +1849,10 @@ protected:
                throw make_compaction_stopped_exception();
            }
        }, false);
+        if (utils::get_local_injector().is_enabled("split_sstable_force_stop_exception")) {
+            throw make_compaction_stopped_exception();
+        }
+
        co_return co_await do_rewrite_sstable(std::move(sst));
    }
 };
@@ -2284,12 +2290,16 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_spl
 }

 future<std::vector<sstables::shared_sstable>>
-compaction_manager::maybe_split_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
+compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
    if (!split_compaction_task_executor::sstable_needs_split(sst, opt)) {
        co_return std::vector<sstables::shared_sstable>{sst};
    }
-    if (!can_proceed(&t)) {
-        co_return std::vector<sstables::shared_sstable>{sst};
+    // Throw an error if split cannot be performed due to e.g. out of space prevention.
+    // We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
+    // which is uneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
+    if (is_disabled()) {
+        co_return coroutine::exception(std::make_exception_ptr(std::runtime_error(format("Cannot split {} because manager has compaction disabled, " \
+                                                                                         "reason might be out of space prevention", sst->get_filename()))));
    }
    std::vector<sstables::shared_sstable> ret;

@@ -2297,8 +2307,11 @@ compaction_manager::maybe_split_sstable(sstables::shared_sstable sst, compaction
    compaction_progress_monitor monitor;
    compaction_data info = create_compaction_data();
    compaction_descriptor desc = split_compaction_task_executor::make_descriptor(sst, opt);
-    desc.creator = [&t] (shard_id _) {
-        return t.make_sstable();
+    desc.creator = [&t, sst] (shard_id _) {
+        // NOTE: preserves the sstable state, since we want the output to be on the same state as the original.
+        // For example, if base table has views, it's important that sstable produced by repair will be
+        // in the staging state.
+        return t.make_sstable(sst->state());
    };
    desc.replacer = [&] (compaction_completion_desc d) {
        std::move(d.new_sstables.begin(), d.new_sstables.end(), std::back_inserter(ret));
--- a/compaction/compaction_manager.hh
+++ b/compaction/compaction_manager.hh
@@ -376,7 +376,8 @@ public:
    // Splits a single SSTable by segregating all its data according to the classifier.
    // If SSTable doesn't need split, the same input SSTable is returned as output.
    // If SSTable needs split, then output SSTables are returned and the input SSTable is deleted.
-    future<std::vector<sstables::shared_sstable>> maybe_split_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt);
+    // Exception is thrown if the input sstable cannot be split due to e.g. out of space prevention.
+    future<std::vector<sstables::shared_sstable>> maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt);

    // Run a custom job for a given table, defined by a function
    // it completes when future returned by job is ready or returns immediately
--- a/configure.py
+++ b/configure.py
@@ -1698,6 +1698,18 @@ deps['test/vector_search/vector_store_client_test'] =  ['test/vector_search/vect
 deps['test/vector_search/load_balancer_test'] = ['test/vector_search/load_balancer_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/client_test'] = ['test/vector_search/client_test.cc'] + scylla_tests_dependencies

+boost_tests_prefixes = ["test/boost/", "test/vector_search/", "test/raft/", "test/manual/", "test/ldap/"]
+
+# We need to link these files to all Boost tests to make sure that
+# we can execute `--list_json_content` on them. That will produce
+# a similar result as calling `--list_content={HRF,DOT}`.
+# Unfortunately, to be able to do that, we're forced to link the
+# relevant code by hand.
+for key in deps.keys():
+    for prefix in boost_tests_prefixes:
+        if key.startswith(prefix):
+            deps[key] += ["test/lib/boost_tree_lister_injector.cc", "test/lib/boost_test_tree_lister.cc"]
+
 wasm_deps = {}

 wasm_deps['wasm/return_input.wat'] = 'test/resource/wasm/rust/return_input.rs'
@@ -2251,15 +2263,6 @@ def get_extra_cxxflags(mode, mode_config, cxx, debuginfo):
    if debuginfo and mode_config['can_have_debug_info']:
        cxxflags += ['-g', '-gz']

-    if 'clang' in cxx:
-        # Since AssignmentTracking was enabled by default in clang
-        # (llvm/llvm-project@de6da6ad55d3ca945195d1cb109cb8efdf40a52a)
-        # coroutine frame debugging info (`coro_frame_ty`) is broken.
-        #
-        # It seems that we aren't losing much by disabling AssigmentTracking,
-        # so for now we choose to disable it to get `coro_frame_ty` back.
-        cxxflags.append('-Xclang -fexperimental-assignment-tracking=disabled')
-
    return cxxflags


--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -200,6 +200,7 @@ public:
    static constexpr auto DICTS = "dicts";
    static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
    static constexpr auto CLIENT_ROUTES = "client_routes";
+    static constexpr auto VERSIONS = "versions";

    // auth
    static constexpr auto ROLES = "roles";
--- a/db/view/view_building_worker.cc
+++ b/db/view/view_building_worker.cc
@@ -198,6 +198,7 @@ future<> view_building_worker::register_staging_sstable_tasks(std::vector<sstabl

 future<> view_building_worker::run_staging_sstables_registrator() {
    while (!_as.abort_requested()) {
+        bool sleep = false;
        try {
            auto lock = co_await get_units(_staging_sstables_mutex, 1, _as);
            co_await create_staging_sstable_tasks();
@@ -214,6 +215,14 @@ future<> view_building_worker::run_staging_sstables_registrator() {
            vbw_logger.warn("Got group0_concurrent_modification while creating staging sstable tasks");
        } catch (raft::request_aborted&) {
            vbw_logger.warn("Got raft::request_aborted while creating staging sstable tasks");
+        } catch (...) {
+            vbw_logger.error("Exception while creating staging sstable tasks: {}", std::current_exception());
+            sleep = true;
+        }
+
+        if (sleep) {
+            vbw_logger.debug("Sleeping after exception.");
+            co_await seastar::sleep_abortable(1s, _as).handle_exception([] (auto x) { return make_ready_future<>(); });
        }
    }
 }
@@ -417,9 +426,12 @@ future<> view_building_worker::check_for_aborted_tasks() {

        auto my_host_id = vbw._db.get_token_metadata().get_topology().my_host_id();
        auto my_replica = locator::tablet_replica{my_host_id, this_shard_id()};
-        auto tasks_map = vbw._state._batch->tasks; // Potentially, we'll remove elements from the map, so we need a copy to iterate over it
-        for (auto& [id, t]: tasks_map) {
-            auto task_opt = building_state.get_task(t.base_id, my_replica, id);
+        auto it = vbw._state._batch->tasks.begin();
+        while (it != vbw._state._batch->tasks.end()) {
+            auto id = it->first;
+            auto task_opt = building_state.get_task(it->second.base_id, my_replica, id);
+
+            ++it; // Advance the iterator before potentially removing the entry from the map.
            if (!task_opt || task_opt->get().aborted) {
                co_await vbw._state._batch->abort_task(id);
            }
@@ -449,7 +461,7 @@ static std::unordered_set<table_id> get_ids_of_all_views(replica::database& db,
    }) | std::ranges::to<std::unordered_set>();;
 }

-// If `state::processing_base_table` is diffrent that the `view_building_state::currently_processed_base_table`,
+// If `state::processing_base_table` is different that the `view_building_state::currently_processed_base_table`,
 // clear the state, save and flush new base table
 future<> view_building_worker::state::update_processing_base_table(replica::database& db, const view_building_state& building_state, abort_source& as) {
    if (processing_base_table != building_state.currently_processed_base_table) {
@@ -571,8 +583,6 @@ future<> view_building_worker::batch::do_work() {
            break;
        }
    }
-
-    _vbw.local()._vb_state_machine.event.broadcast();
 }

 future<> view_building_worker::do_build_range(table_id base_id, std::vector<table_id> views_ids, dht::token last_token, abort_source& as) {
@@ -774,13 +784,15 @@ future<std::vector<utils::UUID>> view_building_worker::work_on_tasks(raft::term_
            tasks.insert({id, *task_opt});
        }
 #ifdef SEASTAR_DEBUG
-        auto& some_task = tasks.begin()->second;
-        for (auto& [_, t]: tasks) {
-            SCYLLA_ASSERT(t.base_id == some_task.base_id);
-            SCYLLA_ASSERT(t.last_token == some_task.last_token);
-            SCYLLA_ASSERT(t.replica == some_task.replica);
-            SCYLLA_ASSERT(t.type == some_task.type);
-            SCYLLA_ASSERT(t.replica.shard == this_shard_id());
+        {
+            auto& some_task = tasks.begin()->second;
+            for (auto& [_, t]: tasks) {
+                SCYLLA_ASSERT(t.base_id == some_task.base_id);
+                SCYLLA_ASSERT(t.last_token == some_task.last_token);
+                SCYLLA_ASSERT(t.replica == some_task.replica);
+                SCYLLA_ASSERT(t.type == some_task.type);
+                SCYLLA_ASSERT(t.replica.shard == this_shard_id());
+            }
        }
 #endif

@@ -811,25 +823,6 @@ future<std::vector<utils::UUID>> view_building_worker::work_on_tasks(raft::term_
    co_return collect_completed_tasks();
 }

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 }

 }
--- a/db/virtual_tables.cc
+++ b/db/virtual_tables.cc
@@ -605,8 +605,8 @@ public:
    }

    static schema_ptr build_schema() {
-        auto id = generate_legacy_id(system_keyspace::NAME, "versions");
-        return schema_builder(system_keyspace::NAME, "versions", std::make_optional(id))
+        auto id = generate_legacy_id(system_keyspace::NAME, system_keyspace::VERSIONS);
+        return schema_builder(system_keyspace::NAME, system_keyspace::VERSIONS, std::make_optional(id))
            .with_column("key", utf8_type, column_kind::partition_key)
            .with_column("version", utf8_type)
            .with_column("build_mode", utf8_type)
@@ -749,6 +749,7 @@ class clients_table : public streaming_virtual_table {
            .with_column("ssl_protocol", utf8_type)
            .with_column("username", utf8_type)
            .with_column("scheduling_group", utf8_type)
+            .with_column("client_options", map_type_impl::get_instance(utf8_type, utf8_type, false))
            .with_hash_version()
            .build();
    }
@@ -766,7 +767,7 @@ class clients_table : public streaming_virtual_table {

    future<> execute(reader_permit permit, result_collector& result, const query_restrictions& qr) override {
        // Collect
-        using client_data_vec = utils::chunked_vector<client_data>;
+        using client_data_vec = utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>;
        using shard_client_data = std::vector<client_data_vec>;
        std::vector<foreign_ptr<std::unique_ptr<shard_client_data>>> cd_vec;
        cd_vec.resize(smp::count);
@@ -806,13 +807,13 @@ class clients_table : public streaming_virtual_table {
        for (unsigned i = 0; i < smp::count; i++) {
            for (auto&& ps_cdc : *cd_vec[i]) {
                for (auto&& cd : ps_cdc) {
-                    if (cd_map.contains(cd.ip)) {
-                        cd_map[cd.ip].emplace_back(std::move(cd));
+                    if (cd_map.contains(cd->ip)) {
+                        cd_map[cd->ip].emplace_back(std::move(cd));
                    } else {
-                        dht::decorated_key key = make_partition_key(cd.ip);
+                        dht::decorated_key key = make_partition_key(cd->ip);
                        if (this_shard_owns(key) && contains_key(qr.partition_range(), key)) {
-                            ips.insert(decorated_ip{std::move(key), cd.ip});
-                            cd_map[cd.ip].emplace_back(std::move(cd));
+                            ips.insert(decorated_ip{std::move(key), cd->ip});
+                            cd_map[cd->ip].emplace_back(std::move(cd));
                        }
                    }
                    co_await coroutine::maybe_yield();
@@ -825,39 +826,58 @@ class clients_table : public streaming_virtual_table {
            co_await result.emit_partition_start(dip.key);
            auto& clients = cd_map[dip.ip];

-            std::ranges::sort(clients, [] (const client_data& a, const client_data& b) {
-                return a.port < b.port || a.client_type_str() < b.client_type_str();
+            std::ranges::sort(clients, [] (const foreign_ptr<std::unique_ptr<client_data>>& a, const foreign_ptr<std::unique_ptr<client_data>>& b) {
+                return a->port < b->port || a->client_type_str() < b->client_type_str();
            });

            for (const auto& cd : clients) {
-                clustering_row cr(make_clustering_key(cd.port, cd.client_type_str()));
-                set_cell(cr.cells(), "shard_id", cd.shard_id);
-                set_cell(cr.cells(), "connection_stage", cd.stage_str());
-                if (cd.driver_name) {
-                    set_cell(cr.cells(), "driver_name", *cd.driver_name);
+                clustering_row cr(make_clustering_key(cd->port, cd->client_type_str()));
+                set_cell(cr.cells(), "shard_id", cd->shard_id);
+                set_cell(cr.cells(), "connection_stage", cd->stage_str());
+                if (cd->driver_name) {
+                    set_cell(cr.cells(), "driver_name", cd->driver_name->key());
                }
-                if (cd.driver_version) {
-                    set_cell(cr.cells(), "driver_version", *cd.driver_version);
+                if (cd->driver_version) {
+                    set_cell(cr.cells(), "driver_version", cd->driver_version->key());
                }
-                if (cd.hostname) {
-                    set_cell(cr.cells(), "hostname", *cd.hostname);
+                if (cd->hostname) {
+                    set_cell(cr.cells(), "hostname", *cd->hostname);
                }
-                if (cd.protocol_version) {
-                    set_cell(cr.cells(), "protocol_version", *cd.protocol_version);
+                if (cd->protocol_version) {
+                    set_cell(cr.cells(), "protocol_version", *cd->protocol_version);
                }
-                if (cd.ssl_cipher_suite) {
-                    set_cell(cr.cells(), "ssl_cipher_suite", *cd.ssl_cipher_suite);
+                if (cd->ssl_cipher_suite) {
+                    set_cell(cr.cells(), "ssl_cipher_suite", *cd->ssl_cipher_suite);
                }
-                if (cd.ssl_enabled) {
-                    set_cell(cr.cells(), "ssl_enabled", *cd.ssl_enabled);
+                if (cd->ssl_enabled) {
+                    set_cell(cr.cells(), "ssl_enabled", *cd->ssl_enabled);
                }
-                if (cd.ssl_protocol) {
-                    set_cell(cr.cells(), "ssl_protocol", *cd.ssl_protocol);
+                if (cd->ssl_protocol) {
+                    set_cell(cr.cells(), "ssl_protocol", *cd->ssl_protocol);
                }
-                set_cell(cr.cells(), "username", cd.username ? *cd.username : sstring("anonymous"));
-                if (cd.scheduling_group_name) {
-                    set_cell(cr.cells(), "scheduling_group", *cd.scheduling_group_name);
+                set_cell(cr.cells(), "username", cd->username ? *cd->username : sstring("anonymous"));
+                if (cd->scheduling_group_name) {
+                    set_cell(cr.cells(), "scheduling_group", *cd->scheduling_group_name);
                }
+
+                auto map_type = map_type_impl::get_instance(
+                    utf8_type,
+                    utf8_type,
+                    false
+                );
+
+                auto prepare_client_options = [] (const auto& client_options) {
+                    map_type_impl::native_type tmp;
+                    for (auto& co: client_options) {
+                        auto map_element = std::make_pair(data_value(co.key.key()), data_value(co.value.key()));
+                        tmp.push_back(std::move(map_element));
+                    }
+                    return tmp;
+                };
+
+                set_cell(cr.cells(), "client_options",
+                    make_map_value(map_type, prepare_client_options(cd->client_options)));
+
                co_await result.emit_row(std::move(cr));
            }
            co_await result.emit_partition_end();
--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -365,7 +365,7 @@ Modifying a keyspace with tablets enabled is possible and doesn't require any sp

 - The replication factor (RF) can be increased or decreased by at most 1 at a time. To reach the desired RF value, modify the RF repeatedly.
 - The ``ALTER`` statement rejects the ``replication_factor`` tag. List the DCs explicitly when altering a keyspace. See :ref:`NetworkTopologyStrategy <replication-strategy>`.
- If there's any other ongoing global topology operation, executing the ``ALTER`` statement will fail (with an explicit and specific error) and needs to be repeated.
+- An RF change cannot be requested while another RF change is pending for the same keyspace. Attempting to execute an ``ALTER`` statement in this scenario will fail with an explicit error. Wait for the ongoing RF change to complete before issuing another ``ALTER`` statement.
 - The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
 - The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
 - The ``ALTER`` statement will fail if it would make the keyspace :term:`RF-rack-invalid <RF-rack-valid keyspace>`.
--- a/docs/dev/docker-hub.md
+++ b/docs/dev/docker-hub.md
@@ -2,8 +2,11 @@

 ## What is ScyllaDB?

-ScyllaDB is a high-performance NoSQL database system, fully compatible with Apache Cassandra.
-ScyllaDB is released under the GNU Affero General Public License version 3 and the Apache License, ScyllaDB is free and open-source software.
+ScyllaDB is a high-performance NoSQL database optimized for speed and scalability.
+It is designed to efficiently handle large volumes of data with minimal latency,
+making it ideal for data-intensive applications.
+
+ScyllaDB is distributed under the [ScyllaDB Source Available License](https://github.com/scylladb/scylladb/blob/master/LICENSE-ScyllaDB-Source-Available.md).

 > [ScyllaDB](http://www.scylladb.com/)

--- a/docs/dev/protocol-extensions.md
+++ b/docs/dev/protocol-extensions.md
@@ -74,6 +74,8 @@ The keys and values are:
    as an indicator to which shard client wants to connect. The desired shard number
    is calculated as: `desired_shard_no = client_port % SCYLLA_NR_SHARDS`.
    Its value is a decimal representation of type `uint16_t`, by default `19142`.
+  - `CLIENT_OPTIONS` is a string containing a JSON object representation that
+    contains CQL Driver configuration, e.g. load balancing policy, retry policy, timeouts, etc.

 Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
 `biased-token-round-robin`. To apply the algorithm,
--- a/docs/dev/view-building-coordinator.md
+++ b/docs/dev/view-building-coordinator.md
@@ -41,12 +41,12 @@ Unless the task was aborted, the worker will eventually reply that the task was
 it temporarily saves list of ids of finished tasks and removes those tasks from group0 state (pernamently marking them as finished) in 200ms intervals. (*)
 This batching of removing finished tasks is done in order to reduce number of generated group0 operations.

-On the other hand, view buildind tasks can can also be aborted due to 2 main reasons:
+On the other hand, view building tasks can can also be aborted due to 2 main reasons:
 - a keyspace/view was dropped
 - tablet operations (see [tablet operations section](#tablet-operations))
 In the first case we simply delete relevant view building tasks as they are no longer needed.
-But if a task needs to be aborted due to tablet operation, we're firstly setting the `aborted` flag to true. We need to do this because we need the task informations
-to created a new adjusted tasks (if the operation succeeded) or rollback them (if the operation failed).
+But if a task needs to be aborted due to tablet operation, we're firstly setting the `aborted` flag to true. We need to do this because we need the task information
+to create new adjusted tasks (if the operation succeeded) or rollback them (if the operation failed).
 Once a task is aborted by setting the flag, this cannot be revoked, so rolling back a task means creating its duplicate and removing the original task.

 (*) - Because there is a time gap between when the coordinator learns that a task is finished (from the RPC response) and when the task is marked as completed,
--- a/docs/features/index.rst
+++ b/docs/features/index.rst
@@ -17,6 +17,7 @@ This document highlights ScyllaDB's key data modeling features.
   Workload Prioritization </features/workload-prioritization>
   Backup and Restore </features/backup-and-restore>
   Incremental Repair </features/incremental-repair/>
+   Vector Search </features/vector-search/>

 .. panel-box::
  :title: ScyllaDB Features
@@ -43,3 +44,5 @@ This document highlights ScyllaDB's key data modeling features.
  * :doc:`Incremental Repair </features/incremental-repair/>` provides a much more
    efficient and lightweight approach to maintaining data consistency by
    repairing only the data that has changed since the last repair.
+  * :doc:`Vector Search in ScyllaDB </features/vector-search/>` enables
+    similarity-based queries on vector embeddings.
--- a/docs/features/vector-search.rst
+++ b/docs/features/vector-search.rst
@@ -0,0 +1,55 @@
+=================================
+Vector Search in ScyllaDB
+=================================
+
+.. note::
+
+    This feature is currently available only in `ScyllaDB Cloud <https://cloud.docs.scylladb.com/>`_.
+
+What Is Vector Search
+-------------------------
+
+Vector Search enables similarity-based queries over high-dimensional data,
+such as text, images, audio, or user behavior. Instead of searching for exact
+matches, it allows applications to find items that are semantically similar to
+a given input.
+
+To do this, Vector Search works on vector embeddings, which are numerical
+representations of data that capture semantic meaning. This enables queries
+such as:
+
+* “Find documents similar to this paragraph”
+* “Find products similar to what the user just viewed”
+* “Find previous tickets related to this support request”
+
+Rather than relying on exact values or keywords, Vector Search returns results
+based on distance or similarity between vectors. This capability is
+increasingly used in modern workloads such as AI-powered search, recommendation
+systems, and retrieval-augmented generation (RAG).
+
+Why Vector Search Matters
+------------------------------------
+
+Many applications already rely on ScyllaDB for high throughput, low and
+predictable latency, and large-scale data storage.
+
+Vector Search complements these strengths by enabling new classes of workloads,
+including:
+
+* Semantic search over text or documents
+* Recommendations based on user or item similarity
+* AI and ML applications, including RAG pipelines
+* Anomaly and pattern detection
+
+With Vector Search, ScyllaDB can serve as the similarity search backend for
+AI-driven applications.
+
+Availability
+--------------
+
+Vector Search is currently available only in ScyllaDB Cloud, the fully managed
+ScyllaDB service.
+
+
+👉 For details on using Vector Search, refer to the
+`ScyllaDB Cloud documentation <https://cloud.docs.scylladb.com/stable/vector-search/index.html>`_.
--- a/docs/getting-started/cloud-instance-recommendations.rst
+++ b/docs/getting-started/cloud-instance-recommendations.rst
@@ -20,7 +20,10 @@ You can run your ScyllaDB workloads on AWS, GCE, and Azure using a ScyllaDB imag
 Amazon Web Services (AWS)
 -----------------------------

-The recommended instance types are :ref:`i3en <system-requirements-i3en-instances>`, :ref:`i4i <system-requirements-i4i-instances>`, :ref:`i7i <system-requirements-i7i-instances>`, and :ref:`i7ie <system-requirements-i7ie-instances>`.
+The recommended instance types are :ref:`i3en <system-requirements-i3en-instances>`,
+:ref:`i4i <system-requirements-i4i-instances>`, :ref:`i7i <system-requirements-i7i-instances>`,
+:ref:`i7ie <system-requirements-i7ie-instances>`, :ref:`i8g<system-requirements-i8g-instances>`,
+and :ref:`i8ge <system-requirements-i8ge-instances>`.

 .. note::

@@ -195,6 +198,118 @@ All i7i instances have the following specs:

 See `Amazon EC2 I7i Instances <https://aws.amazon.com/ec2/instance-types/i7i/>`_ for details.

+
+.. _system-requirements-i8g-instances:
+
+i8g instances
+^^^^^^^^^^^^^^
+
+The following i8g instances are supported.
+
+.. list-table::
+   :widths: 30 20 20 30
+   :header-rows: 1
+
+   * - Model
+     - vCPU
+     - Mem (GiB)
+     - Storage (GB)
+   * - i8g.large
+     - 2
+     - 16
+     - 1 x 468 GB
+   * - i8g.xlarge
+     - 4
+     - 32
+     - 1 x 937 GB
+   * - i8g.2xlarge
+     - 8
+     - 64
+     - 1 x 1,875 GB
+   * - i8g.4xlarge
+     - 16
+     - 128
+     - 1 x 3,750 GB
+   * - i8g.8xlarge
+     - 32
+     - 256
+     - 2 x 3,750 GB
+   * - i8g.12xlarge
+     - 48
+     - 384
+     - 3 x 3,750 GB
+   * - i8g.16xlarge
+     - 64
+     - 512
+     - 4 x 3,750 GB
+
+All i8g instances have the following specs:
+
+* Powered by AWS Graviton4 processors
+* 3rd generation AWS Nitro SSD storage
+* DDR5-5600 memory for improved throughput
+* Up to 100 Gbps of networking bandwidth and up to 60 Gbps of bandwidth to
+  Amazon Elastic Block Store (EBS)
+* Instance sizes offer up to 45 TB of total local NVMe instance storage
+
+See `Amazon EC2 I8g Instances <https://aws.amazon.com/ec2/instance-types/i8g/>`_ for details.
+
+.. _system-requirements-i8ge-instances:
+
+i8ge instances
+^^^^^^^^^^^^^^
+
+The following i8ge instances are supported.
+
+.. list-table::
+   :widths: 30 20 20 30
+   :header-rows: 1
+
+   * - Model
+     - vCPU
+     - Mem (GiB)
+     - Storage (GB)
+   * - i8ge.large
+     - 2
+     - 16
+     - 1 x 1,250 GB
+   * - i8ge.xlarge
+     - 4
+     - 32
+     - 1 x 2,500 GB
+   * - i8ge.2xlarge
+     - 8
+     - 64
+     - 2 x 2,500 GB
+   * - i8ge.3xlarge
+     - 12
+     - 96
+     - 1 x 7,500 GB
+   * - i8ge.6xlarge
+     - 24
+     - 192
+     - 2 x 7,500 GB
+   * - i8ge.12xlarge
+     - 48
+     - 384
+     - 4 x 7,500 GB
+   * - i8ge.18xlarge
+     - 72
+     - 576
+     - 6 x 7,500 GB
+
+All i8ge instances have the following specs:
+
+* Powered by AWS Graviton4 processors
+* 3rd generation AWS Nitro SSD storage
+* DDR5-5600 memory for improved throughput
+* Up to 300 Gbps of networking bandwidth and up to 60 Gbps of bandwidth to
+  Amazon Elastic Block Store (EBS)
+* Instance sizes offer up to 120 TB of total local NVMe instance storage
+
+See `Amazon EC2 I8g Instances <https://aws.amazon.com/ec2/instance-types/i8g/>`_ for details.
+
+
 Im4gn and Is4gen instances
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 ScyllaDB supports Arm-based Im4gn and Is4gen instances. See  `Amazon EC2 Im4gn and Is4gen instances <https://aws.amazon.com/ec2/instance-types/i4g/>`_ for specification details. 
--- a/mutation/mutation_partition.cc
+++ b/mutation/mutation_partition.cc
@@ -45,7 +45,9 @@ mutation_partition::mutation_partition(const schema& s, const mutation_partition
        : _tombstone(x._tombstone)
        , _static_row(s, column_kind::static_column, x._static_row)
        , _static_row_continuous(x._static_row_continuous)
-        , _rows()
+        , _rows(use_single_row_storage(s) ? 
+            rows_storage_type(std::optional<deletable_row>{}) : 
+            rows_storage_type(rows_type{}))
        , _row_tombstones(x._row_tombstones)
 #ifdef SEASTAR_DEBUG
        , _schema_version(s.version())
@@ -54,10 +56,30 @@ mutation_partition::mutation_partition(const schema& s, const mutation_partition
 #ifdef SEASTAR_DEBUG
    SCYLLA_ASSERT(x._schema_version == _schema_version);
 #endif
-    auto cloner = [&s] (const rows_entry* x) -> rows_entry* {
-        return current_allocator().construct<rows_entry>(s, *x);
-    };
-    _rows.clone_from(x._rows, cloner, current_deleter<rows_entry>());
+    if (use_single_row_storage(s)) {
+        // Copy single row if it exists
+        if (x.uses_single_row_storage()) {
+            const auto& x_row = x.get_single_row_storage();
+            if (x_row) {
+                get_single_row_storage() = deletable_row(s, *x_row);
+            }
+        } else if (!x.get_rows_storage().empty()) {
+            // Converting from multi-row to single-row - take the first row
+            // This shouldn't normally happen as schema doesn't change this way
+            on_internal_error(mplog, "mutation_partition: cannot convert multi-row partition to single-row");
+        }
+    } else {
+        // Multi-row storage
+        if (x.uses_single_row_storage()) {
+            // Converting from single-row to multi-row - this shouldn't normally happen
+            on_internal_error(mplog, "mutation_partition: cannot convert single-row partition to multi-row");
+        } else {
+            auto cloner = [&s] (const rows_entry* x) -> rows_entry* {
+                return current_allocator().construct<rows_entry>(s, *x);
+            };
+            get_rows_storage().clone_from(x.get_rows_storage(), cloner, current_deleter<rows_entry>());
+        }
+    }
 }

 mutation_partition::mutation_partition(const mutation_partition& x, const schema& schema,
@@ -65,7 +87,9 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
        : _tombstone(x._tombstone)
        , _static_row(schema, column_kind::static_column, x._static_row)
        , _static_row_continuous(x._static_row_continuous)
-        , _rows()
+        , _rows(use_single_row_storage(schema) ? 
+            rows_storage_type(std::optional<deletable_row>{}) : 
+            rows_storage_type(rows_type{}))
        , _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only())
 #ifdef SEASTAR_DEBUG
        , _schema_version(schema.version())
@@ -74,19 +98,37 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
 #ifdef SEASTAR_DEBUG
    SCYLLA_ASSERT(x._schema_version == _schema_version);
 #endif
-    try {
-        for(auto&& r : ck_ranges) {
-            for (const rows_entry& e : x.range(schema, r)) {
-                auto ce = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(schema, e));
-                _rows.insert_before_hint(_rows.end(), std::move(ce), rows_entry::tri_compare(schema));
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: just copy the row if it exists
+        if (x.uses_single_row_storage()) {
+            const auto& x_row = x.get_single_row_storage();
+            if (x_row) {
+                get_single_row_storage() = deletable_row(schema, *x_row);
            }
-            for (auto&& rt : x._row_tombstones.slice(schema, r)) {
-                _row_tombstones.apply(schema, rt.tombstone());
+        } else {
+            // Filtering from multi-row - shouldn't happen with consistent schema
+            on_internal_error(mplog, "mutation_partition: filtering from multi-row to single-row storage");
+        }
+    } else {
+        // Multi-row storage with filtering
+        if (x.uses_single_row_storage()) {
+            on_internal_error(mplog, "mutation_partition: filtering from single-row to multi-row storage");
+        } else {
+            try {
+                for(auto&& r : ck_ranges) {
+                    for (const rows_entry& e : x.range(schema, r)) {
+                        auto ce = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(schema, e));
+                        get_rows_storage().insert_before_hint(get_rows_storage().end(), std::move(ce), rows_entry::tri_compare(schema));
+                    }
+                    for (auto&& rt : x._row_tombstones.slice(schema, r)) {
+                        _row_tombstones.apply(schema, rt.tombstone());
+                    }
+                }
+            } catch (...) {
+                get_rows_storage().clear_and_dispose(current_deleter<rows_entry>());
+                throw;
            }
        }
-    } catch (...) {
-        _rows.clear_and_dispose(current_deleter<rows_entry>());
-        throw;
    }
 }

@@ -104,14 +146,20 @@ mutation_partition::mutation_partition(mutation_partition&& x, const schema& sch
 #ifdef SEASTAR_DEBUG
    SCYLLA_ASSERT(x._schema_version == _schema_version);
 #endif
-    {
-        auto deleter = current_deleter<rows_entry>();
-        auto it = _rows.begin();
-        for (auto&& range : ck_ranges.ranges()) {
-            _rows.erase_and_dispose(it, lower_bound(schema, range), deleter);
-            it = upper_bound(schema, range);
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: no filtering needed, row either exists or doesn't
+        // The move constructor has already moved the row if it exists
+    } else {
+        // Multi-row storage: filter the rows
+        if (!uses_single_row_storage()) {
+            auto deleter = current_deleter<rows_entry>();
+            auto it = get_rows_storage().begin();
+            for (auto&& range : ck_ranges.ranges()) {
+                get_rows_storage().erase_and_dispose(it, lower_bound(schema, range), deleter);
+                it = upper_bound(schema, range);
+            }
+            get_rows_storage().erase_and_dispose(it, get_rows_storage().end(), deleter);
        }
-        _rows.erase_and_dispose(it, _rows.end(), deleter);
    }
    {
        for (auto&& range : ck_ranges.ranges()) {
@@ -127,7 +175,11 @@ mutation_partition::mutation_partition(mutation_partition&& x, const schema& sch
 }

 mutation_partition::~mutation_partition() {
-    _rows.clear_and_dispose(current_deleter<rows_entry>());
+    if (uses_single_row_storage()) {
+        // Single-row storage: optional destructor handles cleanup
+    } else {
+        get_rows_storage().clear_and_dispose(current_deleter<rows_entry>());
+    }
 }

 mutation_partition&
@@ -141,10 +193,14 @@ mutation_partition::operator=(mutation_partition&& x) noexcept {

 void mutation_partition::ensure_last_dummy(const schema& s) {
    check_schema(s);
-    if (_rows.empty() || !_rows.rbegin()->is_last_dummy()) {
+    if (uses_single_row_storage()) {
+        // Single-row storage doesn't use dummy entries
+        return;
+    }
+    if (get_rows_storage().empty() || !get_rows_storage().rbegin()->is_last_dummy()) {
        auto e = alloc_strategy_unique_ptr<rows_entry>(
                current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::yes));
-        _rows.insert_before(_rows.end(), std::move(e));
+        get_rows_storage().insert_before(get_rows_storage().end(), std::move(e));
    }
 }

@@ -419,9 +475,18 @@ mutation_partition::tombstone_for_row(const schema& schema, const clustering_key
    check_schema(schema);
    row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key));

-    auto j = _rows.find(key, rows_entry::tri_compare(schema));
-    if (j != _rows.end()) {
-        t.apply(j->row().deleted_at(), j->row().marker());
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: check if the single row exists and has tombstone
+        const auto& row_opt = get_single_row_storage();
+        if (row_opt) {
+            t.apply(row_opt->deleted_at(), row_opt->marker());
+        }
+    } else {
+        // Multi-row storage: search in B-tree
+        auto j = get_rows_storage().find(key, rows_entry::tri_compare(schema));
+        if (j != get_rows_storage().end()) {
+            t.apply(j->row().deleted_at(), j->row().marker());
+        }
    }

    return t;
@@ -504,97 +569,178 @@ void mutation_partition::apply_insert(const schema& s, clustering_key_view key,
    clustered_row(s, key).apply(row_marker(created_at, ttl, expiry));
 }
 void mutation_partition::insert_row(const schema& s, const clustering_key& key, deletable_row&& row) {
-    auto e = alloc_strategy_unique_ptr<rows_entry>(
-        current_allocator().construct<rows_entry>(key, std::move(row)));
-    _rows.insert_before_hint(_rows.end(), std::move(e), rows_entry::tri_compare(s));
+    if (use_single_row_storage(s)) {
+        // Single-row storage: just set the row
+        get_single_row_storage() = std::move(row);
+    } else {
+        // Multi-row storage: insert into B-tree
+        auto e = alloc_strategy_unique_ptr<rows_entry>(
+            current_allocator().construct<rows_entry>(key, std::move(row)));
+        get_rows_storage().insert_before_hint(get_rows_storage().end(), std::move(e), rows_entry::tri_compare(s));
+    }
 }

 void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) {
    check_schema(s);
-    auto e = alloc_strategy_unique_ptr<rows_entry>(
-        current_allocator().construct<rows_entry>(s, key, row));
-    _rows.insert_before_hint(_rows.end(), std::move(e), rows_entry::tri_compare(s));
+    if (use_single_row_storage(s)) {
+        // Single-row storage: just copy the row
+        get_single_row_storage() = row;
+    } else {
+        // Multi-row storage: insert into B-tree
+        auto e = alloc_strategy_unique_ptr<rows_entry>(
+            current_allocator().construct<rows_entry>(s, key, row));
+        get_rows_storage().insert_before_hint(get_rows_storage().end(), std::move(e), rows_entry::tri_compare(s));
+    }
 }

 const row*
 mutation_partition::find_row(const schema& s, const clustering_key& key) const {
    check_schema(s);
-    auto i = _rows.find(key, rows_entry::tri_compare(s));
-    if (i == _rows.end()) {
+    if (use_single_row_storage(s)) {
+        // Single-row storage: return the single row's cells if it exists
+        const auto& row_opt = get_single_row_storage();
+        if (row_opt) {
+            return &row_opt->cells();
+        }
        return nullptr;
+    } else {
+        // Multi-row storage: search in B-tree
+        auto i = get_rows_storage().find(key, rows_entry::tri_compare(s));
+        if (i == get_rows_storage().end()) {
+            return nullptr;
+        }
+        return &i->row().cells();
    }
-    return &i->row().cells();
 }

 deletable_row&
 mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
    check_schema(s);
    check_row_key(s, key, is_dummy::no);
-    auto i = _rows.find(key, rows_entry::tri_compare(s));
-    if (i == _rows.end()) {
-        auto e = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(std::move(key)));
-        i = _rows.insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+    
+    if (use_single_row_storage(s)) {
+        // Single-row storage: create row if it doesn't exist
+        auto& row_opt = get_single_row_storage();
+        if (!row_opt) {
+            row_opt = deletable_row();
+        }
+        return *row_opt;
+    } else {
+        // Multi-row storage: find or insert in B-tree
+        auto i = get_rows_storage().find(key, rows_entry::tri_compare(s));
+        if (i == get_rows_storage().end()) {
+            auto e = alloc_strategy_unique_ptr<rows_entry>(
+                current_allocator().construct<rows_entry>(std::move(key)));
+            i = get_rows_storage().insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+        }
+        return i->row();
    }
-    return i->row();
 }

 deletable_row&
 mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
    check_schema(s);
    check_row_key(s, key, is_dummy::no);
-    auto i = _rows.find(key, rows_entry::tri_compare(s));
-    if (i == _rows.end()) {
-        auto e = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(key));
-        i = _rows.insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+    
+    if (use_single_row_storage(s)) {
+        // Single-row storage: create row if it doesn't exist
+        auto& row_opt = get_single_row_storage();
+        if (!row_opt) {
+            row_opt = deletable_row();
+        }
+        return *row_opt;
+    } else {
+        // Multi-row storage: find or insert in B-tree
+        auto i = get_rows_storage().find(key, rows_entry::tri_compare(s));
+        if (i == get_rows_storage().end()) {
+            auto e = alloc_strategy_unique_ptr<rows_entry>(
+                current_allocator().construct<rows_entry>(key));
+            i = get_rows_storage().insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+        }
+        return i->row();
    }
-    return i->row();
 }

 deletable_row&
 mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
    check_schema(s);
    check_row_key(s, key, is_dummy::no);
-    auto i = _rows.find(key, rows_entry::tri_compare(s));
-    if (i == _rows.end()) {
-        auto e = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(key));
-        i = _rows.insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+    
+    if (use_single_row_storage(s)) {
+        // Single-row storage: create row if it doesn't exist
+        auto& row_opt = get_single_row_storage();
+        if (!row_opt) {
+            row_opt = deletable_row();
+        }
+        return *row_opt;
+    } else {
+        // Multi-row storage: find or insert in B-tree
+        auto i = get_rows_storage().find(key, rows_entry::tri_compare(s));
+        if (i == get_rows_storage().end()) {
+            auto e = alloc_strategy_unique_ptr<rows_entry>(
+                current_allocator().construct<rows_entry>(key));
+            i = get_rows_storage().insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+        }
+        return i->row();
    }
-    return i->row();
 }

 rows_entry&
 mutation_partition::clustered_rows_entry(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
    check_schema(s);
    check_row_key(s, pos, dummy);
-    auto i = _rows.find(pos, rows_entry::tri_compare(s));
-    if (i == _rows.end()) {
+    
+    if (use_single_row_storage(s)) {
+        // Single-row storage doesn't use rows_entry - this shouldn't be called
+        on_internal_error(mplog, "mutation_partition::clustered_rows_entry() called with single-row storage");
+    }
+    
+    auto i = get_rows_storage().find(pos, rows_entry::tri_compare(s));
+    if (i == get_rows_storage().end()) {
        auto e = alloc_strategy_unique_ptr<rows_entry>(
            current_allocator().construct<rows_entry>(s, pos, dummy, continuous));
-        i = _rows.insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
+        i = get_rows_storage().insert_before_hint(i, std::move(e), rows_entry::tri_compare(s)).first;
    }
    return *i;
 }

 deletable_row&
 mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
-    return clustered_rows_entry(s, pos, dummy, continuous).row();
+    if (use_single_row_storage(s)) {
+        // Single-row storage: ignore dummy/continuous flags, just get/create the row
+        check_row_key(s, pos, dummy);
+        auto& row_opt = get_single_row_storage();
+        if (!row_opt) {
+            row_opt = deletable_row();
+        }
+        return *row_opt;
+    } else {
+        return clustered_rows_entry(s, pos, dummy, continuous).row();
+    }
 }

 deletable_row&
 mutation_partition::append_clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
    check_schema(s);
    check_row_key(s, pos, dummy);
+    
+    if (use_single_row_storage(s)) {
+        // Single-row storage: just create/get the row
+        auto& row_opt = get_single_row_storage();
+        if (!row_opt) {
+            row_opt = deletable_row();
+        }
+        return *row_opt;
+    }
+    
    const auto cmp = rows_entry::tri_compare(s);
-    auto i = _rows.end();
-    if (!_rows.empty() && (cmp(*std::prev(i), pos) >= 0)) {
+    auto i = get_rows_storage().end();
+    if (!get_rows_storage().empty() && (cmp(*std::prev(i), pos) >= 0)) {
        on_internal_error(mplog, format("mutation_partition::append_clustered_row(): cannot append clustering row with key {} to the partition"
                ", last clustering row is equal or greater: {}", pos, std::prev(i)->position()));
    }
    auto e = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(s, pos, dummy, continuous));
-    i = _rows.insert_before_hint(i, std::move(e), cmp).first;
+    i = get_rows_storage().insert_before_hint(i, std::move(e), cmp).first;

    return i->row();
 }
@@ -602,19 +748,33 @@ mutation_partition::append_clustered_row(const schema& s, position_in_partition_
 mutation_partition::rows_type::const_iterator
 mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const {
    check_schema(schema);
-    if (!r.start()) {
-        return std::cbegin(_rows);
+    
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: always return end iterator (empty range)
+        static const rows_type empty_rows;
+        return empty_rows.end();
    }
-    return _rows.lower_bound(position_in_partition_view::for_range_start(r), rows_entry::tri_compare(schema));
+    
+    if (!r.start()) {
+        return std::cbegin(get_rows_storage());
+    }
+    return get_rows_storage().lower_bound(position_in_partition_view::for_range_start(r), rows_entry::tri_compare(schema));
 }

 mutation_partition::rows_type::const_iterator
 mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const {
    check_schema(schema);
-    if (!r.end()) {
-        return std::cend(_rows);
+    
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: always return end iterator (empty range)
+        static const rows_type empty_rows;
+        return empty_rows.end();
    }
-    return _rows.lower_bound(position_in_partition_view::for_range_end(r), rows_entry::tri_compare(schema));
+    
+    if (!r.end()) {
+        return std::cend(get_rows_storage());
+    }
+    return get_rows_storage().lower_bound(position_in_partition_view::for_range_end(r), rows_entry::tri_compare(schema));
 }

 std::ranges::subrange<mutation_partition::rows_type::const_iterator>
@@ -625,17 +785,32 @@ mutation_partition::range(const schema& schema, const query::clustering_range& r

 std::ranges::subrange<mutation_partition::rows_type::iterator>
 mutation_partition::range(const schema& schema, const query::clustering_range& r) {
-    return unconst(_rows, static_cast<const mutation_partition*>(this)->range(schema, r));
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: return empty range (rows_entry iteration not applicable)
+        static rows_type empty_rows;
+        return std::ranges::subrange(empty_rows.begin(), empty_rows.end());
+    }
+    return unconst(get_rows_storage(), static_cast<const mutation_partition*>(this)->range(schema, r));
 }

 mutation_partition::rows_type::iterator
 mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) {
-    return unconst(_rows, static_cast<const mutation_partition*>(this)->lower_bound(schema, r));
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: return end iterator (empty range)
+        static rows_type empty_rows;
+        return empty_rows.end();
+    }
+    return unconst(get_rows_storage(), static_cast<const mutation_partition*>(this)->lower_bound(schema, r));
 }

 mutation_partition::rows_type::iterator
 mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) {
-    return unconst(_rows, static_cast<const mutation_partition*>(this)->upper_bound(schema, r));
+    if (use_single_row_storage(schema)) {
+        // Single-row storage: return end iterator (empty range)
+        static rows_type empty_rows;
+        return empty_rows.end();
+    }
+    return unconst(get_rows_storage(), static_cast<const mutation_partition*>(this)->upper_bound(schema, r));
 }

 template<typename Func>
@@ -1377,7 +1552,15 @@ bool mutation_partition::empty() const
    if (_tombstone.timestamp != api::missing_timestamp) {
        return false;
    }
-    return !_static_row.size() && _rows.empty() && _row_tombstones.empty();
+    if (_static_row.size() || !_row_tombstones.empty()) {
+        return false;
+    }
+    
+    if (uses_single_row_storage()) {
+        return !get_single_row_storage().has_value();
+    } else {
+        return get_rows_storage().empty();
+    }
 }

 bool
@@ -1422,7 +1605,11 @@ mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_t

 uint64_t
 mutation_partition::row_count() const {
-    return _rows.calculate_size();
+    if (uses_single_row_storage()) {
+        return get_single_row_storage().has_value() ? 1 : 0;
+    } else {
+        return get_rows_storage().calculate_size();
+    }
 }

 rows_entry::rows_entry(rows_entry&& o) noexcept
@@ -2219,15 +2406,22 @@ public:
 mutation_partition::mutation_partition(mutation_partition::incomplete_tag, const schema& s, tombstone t)
    : _tombstone(t)
    , _static_row_continuous(!s.has_static_columns())
-    , _rows()
+    , _rows(use_single_row_storage(s) ? 
+        rows_storage_type(std::optional<deletable_row>{}) : 
+        rows_storage_type(rows_type{}))
    , _row_tombstones(s)
 #ifdef SEASTAR_DEBUG
    , _schema_version(s.version())
 #endif
 {
-    auto e = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
-    _rows.insert_before(_rows.end(), std::move(e));
+    if (use_single_row_storage(s)) {
+        // Single-row storage: no dummy entries needed, leave row as empty optional
+    } else {
+        // Multi-row storage: add last dummy entry for discontinuous partition
+        auto e = alloc_strategy_unique_ptr<rows_entry>(
+                current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
+        get_rows_storage().insert_before(get_rows_storage().end(), std::move(e));
+    }
 }

 bool mutation_partition::is_fully_continuous() const {
--- a/mutation/mutation_partition.hh
+++ b/mutation/mutation_partition.hh
@@ -9,6 +9,7 @@
 #pragma once

 #include <iosfwd>
+#include <variant>
 #include <boost/intrusive/parent_from_member.hpp>

 #include <seastar/util/optimized_optional.hh>
@@ -1188,6 +1189,12 @@ inline void check_row_key(const schema& s, position_in_partition_view pos, is_du
    }
 }

+// Returns true if the schema has no clustering keys, meaning partitions can have at most one row.
+// When true, mutation_partition uses std::optional<deletable_row> instead of full rows_type container.
+inline bool use_single_row_storage(const schema& s) {
+    return s.clustering_key_size() == 0;
+}
+
 // Represents a set of writes made to a single partition.
 //
 // The object is schema-dependent. Each instance is governed by some
@@ -1228,20 +1235,45 @@ inline void check_row_key(const schema& s, position_in_partition_view pos, is_du
 class mutation_partition final {
 public:
    using rows_type = rows_entry::container_type;
+    using rows_storage_type = std::variant<rows_type, std::optional<deletable_row>>;
    friend class size_calculator;
 private:
    tombstone _tombstone;
    lazy_row _static_row;
    bool _static_row_continuous = true;
-    rows_type _rows;
+    rows_storage_type _rows;
    // Contains only strict prefixes so that we don't have to lookup full keys
    // in both _row_tombstones and _rows.
+    // Note: empty when using single-row storage (std::optional<deletable_row> variant)
    range_tombstone_list _row_tombstones;
 #ifdef SEASTAR_DEBUG
    table_schema_version _schema_version;
 #endif

    friend class converting_mutation_partition_applier;
+
+    // Returns true if this partition uses single-row storage
+    bool uses_single_row_storage() const {
+        return std::holds_alternative<std::optional<deletable_row>>(_rows);
+    }
+
+    // Get reference to rows container (multi-row storage)
+    rows_type& get_rows_storage() {
+        return std::get<rows_type>(_rows);
+    }
+
+    const rows_type& get_rows_storage() const {
+        return std::get<rows_type>(_rows);
+    }
+
+    // Get reference to single row storage
+    std::optional<deletable_row>& get_single_row_storage() {
+        return std::get<std::optional<deletable_row>>(_rows);
+    }
+
+    const std::optional<deletable_row>& get_single_row_storage() const {
+        return std::get<std::optional<deletable_row>>(_rows);
+    }
 public:
    struct copy_comparators_only {};
    struct incomplete_tag {};
@@ -1251,14 +1283,14 @@ public:
        return mutation_partition(incomplete_tag(), s, t);
    }
    mutation_partition(const schema& s)
-        : _rows()
+        : _rows(use_single_row_storage(s) ? rows_storage_type(std::optional<deletable_row>{}) : rows_storage_type(rows_type{}))
        , _row_tombstones(s)
 #ifdef SEASTAR_DEBUG
        , _schema_version(s.version())
 #endif
    { }
    mutation_partition(mutation_partition& other, copy_comparators_only)
-        : _rows()
+        : _rows(other._rows.index() == 0 ? rows_storage_type(rows_type{}) : rows_storage_type(std::optional<deletable_row>{}))
        , _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only())
 #ifdef SEASTAR_DEBUG
        , _schema_version(other._schema_version)
@@ -1269,6 +1301,8 @@ public:
    mutation_partition(const mutation_partition&, const schema&, query::clustering_key_filter_ranges);
    mutation_partition(mutation_partition&&, const schema&, query::clustering_key_filter_ranges);
    ~mutation_partition();
+    // Returns the mutation_partition containing the given rows_type.
+    // Can only be used when the mutation_partition uses multi-row storage.
    static mutation_partition& container_of(rows_type&);
    mutation_partition& operator=(mutation_partition&& x) noexcept;
    bool equal(const schema&, const mutation_partition&) const;
@@ -1462,9 +1496,31 @@ public:
    const lazy_row& static_row() const { return _static_row; }

    // return a set of rows_entry where each entry represents a CQL row sharing the same clustering key.
-    const rows_type& clustered_rows() const noexcept { return _rows; }
-    utils::immutable_collection<rows_type> clustered_rows() noexcept { return _rows; }
-    rows_type& mutable_clustered_rows() noexcept { return _rows; }
+    // For single-row storage (clustering_key_size() == 0), returns an empty container.
+    // Callers should check uses_single_row_storage() and use get_single_row() for single-row case.
+    const rows_type& clustered_rows() const noexcept { 
+        if (uses_single_row_storage()) {
+            static const rows_type empty_rows;
+            return empty_rows;
+        }
+        return get_rows_storage(); 
+    }
+    utils::immutable_collection<rows_type> clustered_rows() noexcept { 
+        return const_cast<const mutation_partition*>(this)->clustered_rows();
+    }
+    rows_type& mutable_clustered_rows() noexcept { 
+        // Should only be called when NOT using single-row storage
+        return get_rows_storage(); 
+    }
+
+    // Access the single row when using single-row storage (clustering_key_size() == 0)
+    const std::optional<deletable_row>& get_single_row() const {
+        return get_single_row_storage();
+    }
+
+    std::optional<deletable_row>& get_single_row() {
+        return get_single_row_storage();
+    }

    const range_tombstone_list& row_tombstones() const noexcept { return _row_tombstones; }
    utils::immutable_collection<range_tombstone_list> row_tombstones() noexcept { return _row_tombstones; }
@@ -1482,8 +1538,14 @@ public:
    rows_type::iterator upper_bound(const schema& schema, const query::clustering_range& r);
    std::ranges::subrange<rows_type::iterator> range(const schema& schema, const query::clustering_range& r);
    // Returns an iterator range of rows_entry, with only non-dummy entries.
+    // For single-row storage, returns an empty range.
    auto non_dummy_rows() const {
-        return std::ranges::subrange(_rows.begin(), _rows.end())
+        if (uses_single_row_storage()) {
+            static const rows_type empty_rows;
+            return std::ranges::subrange(empty_rows.begin(), empty_rows.end())
+                | std::views::filter([] (const rows_entry& e) { return bool(!e.dummy()); });
+        }
+        return std::ranges::subrange(get_rows_storage().begin(), get_rows_storage().end())
            | std::views::filter([] (const rows_entry& e) { return bool(!e.dummy()); });
    }
    void accept(const schema&, mutation_partition_visitor&) const;
@@ -1517,7 +1579,21 @@ private:

 inline
 mutation_partition& mutation_partition::container_of(rows_type& rows) {
-    return *boost::intrusive::get_parent_from_member(&rows, &mutation_partition::_rows);
+    // This method can only be called when using multi-row storage (rows_type variant alternative).
+    // With std::variant, when rows_type is the active alternative (index 0), it's stored at the beginning of the variant.
+    // We can use pointer arithmetic to get back to the mutation_partition.
+    
+    // Calculate offset from rows_type to the containing variant
+    // The rows reference should be the active rows_type inside the variant
+    static_assert(std::is_same_v<std::variant_alternative_t<0, rows_storage_type>, rows_type>,
+                  "rows_type must be the first alternative in rows_storage_type");
+    
+    // Get address of the variant containing this rows_type
+    // When rows_type is active (index 0), it's at offset 0 in the variant's storage
+    rows_storage_type* variant_ptr = reinterpret_cast<rows_storage_type*>(&rows);
+    
+    // Now get the mutation_partition from the variant
+    return *boost::intrusive::get_parent_from_member(variant_ptr, &mutation_partition::_rows);
 }

 bool has_any_live_data(const schema& s, column_kind kind, const row& cells, tombstone tomb = tombstone(),
--- a/raft/fsm.cc
+++ b/raft/fsm.cc
@@ -176,7 +176,7 @@ void fsm::become_leader() {

    _last_election_time = _clock.now();
    _ping_leader = false;
-    // a new leader needs to commit at lease one entry to make sure that
+    // a new leader needs to commit at least one entry to make sure that
    // all existing entries in its log are committed as well. Also it should
    // send append entries RPC as soon as possible to establish its leadership
    // (3.4). Do both of those by committing a dummy entry.
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -1195,6 +1195,8 @@ private:
            rlogger.info("{}", msg);
            throw std::runtime_error(msg);
        }
+
+        co_await utils::get_local_injector().inject("incremental_repair_prepare_wait", utils::wait_for_message(60s));
        auto reenablers_and_holders = co_await table.get_compaction_reenablers_and_lock_holders_for_repair(_db.local(), _frozen_topology_guard, _range);
        for (auto& lock_holder : reenablers_and_holders.lock_holders) {
            _rs._repair_compaction_locks[_frozen_topology_guard].push_back(std::move(lock_holder));
--- a/replica/compaction_group.hh
+++ b/replica/compaction_group.hh
@@ -84,6 +84,10 @@ class compaction_group {
    seastar::named_gate _async_gate;
    // Gates flushes.
    seastar::named_gate _flush_gate;
+    // Gates sstable being added to the group.
+    // This prevents the group from being considered empty when sstables are being added.
+    // Crucial for tablet split which ACKs split for a table when all pre-split groups are empty.
+    seastar::named_gate _sstable_add_gate;
    bool _tombstone_gc_enabled = true;
    std::optional<compaction::compaction_backlog_tracker> _backlog_tracker;
    repair_classifier_func _repair_sstable_classifier;
@@ -248,6 +252,10 @@ public:
        return _flush_gate;
    }

+    seastar::named_gate& sstable_add_gate() noexcept {
+        return _sstable_add_gate;
+    }
+
    compaction::compaction_manager& get_compaction_manager() noexcept;
    const compaction::compaction_manager& get_compaction_manager() const noexcept;

@@ -434,7 +442,7 @@ public:
    virtual bool all_storage_groups_split() = 0;
    virtual future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) = 0;
    virtual future<> maybe_split_compaction_group_of(size_t idx) = 0;
-    virtual future<std::vector<sstables::shared_sstable>> maybe_split_sstable(const sstables::shared_sstable& sst) = 0;
+    virtual future<std::vector<sstables::shared_sstable>> maybe_split_new_sstable(const sstables::shared_sstable& sst) = 0;
    virtual dht::token_range get_token_range_after_split(const dht::token&) const noexcept = 0;

    virtual lw_shared_ptr<sstables::sstable_set> make_sstable_set() const = 0;
--- a/replica/database.hh
+++ b/replica/database.hh
@@ -604,9 +604,28 @@ public:

    data_dictionary::table as_data_dictionary() const;

+    // The usage of these functions are restricted to preexisting sstables that aren't being
+    // moved anywhere, so should never be used in the context of file streaming and intra
+    // node migration. The only user today is distributed loader, which populates the
+    // sstables for each column family on boot.
    future<> add_sstable_and_update_cache(sstables::shared_sstable sst,
                                          sstables::offstrategy offstrategy = sstables::offstrategy::no);
    future<> add_sstables_and_update_cache(const std::vector<sstables::shared_sstable>& ssts);
+
+    // Restricted to new sstables produced by external processes such as repair.
+    // The sstable might undergo split if table is in split mode.
+    // If no need for split, the input sstable will only be attached to the sstable set.
+    // If split happens, the output sstables will be attached and the input sstable unlinked.
+    // On failure, the input sstable is unlinked and exception propagated to the caller.
+    // The on_add callback will be called on all sstables to be added into the set.
+    [[nodiscard]] future<std::vector<sstables::shared_sstable>>
+    add_new_sstable_and_update_cache(sstables::shared_sstable new_sst,
+                                     std::function<future<>(sstables::shared_sstable)> on_add,
+                                     sstables::offstrategy offstrategy = sstables::offstrategy::no);
+    [[nodiscard]] future<std::vector<sstables::shared_sstable>>
+    add_new_sstables_and_update_cache(std::vector<sstables::shared_sstable> new_ssts,
+                                      std::function<future<>(sstables::shared_sstable)> on_add);
+
    future<> move_sstables_from_staging(std::vector<sstables::shared_sstable>);
    sstables::shared_sstable make_sstable();
    void set_truncation_time(db_clock::time_point truncated_at) noexcept {
@@ -724,7 +743,9 @@ private:
        return _config.enable_cache && _schema->caching_options().enabled();
    }
    void update_stats_for_new_sstable(const sstables::shared_sstable& sst) noexcept;
-    future<> do_add_sstable_and_update_cache(compaction_group& cg, sstables::shared_sstable sst, sstables::offstrategy, bool trigger_compaction);
+    // This function can throw even if the sstable was added into the set. When the sstable was successfully
+    // added, the sstable ptr @sst will be set to nullptr. Allowing caller to optionally discard the sstable.
+    future<> do_add_sstable_and_update_cache(compaction_group& cg, sstables::shared_sstable& sst, sstables::offstrategy, bool trigger_compaction);
    future<> do_add_sstable_and_update_cache(sstables::shared_sstable sst, sstables::offstrategy offstrategy, bool trigger_compaction);
    // Helpers which add sstable on behalf of a compaction group and refreshes compound set.
    void add_sstable(compaction_group& cg, sstables::shared_sstable sstable);
@@ -1358,7 +1379,8 @@ public:

    // Clones storage of a given tablet. Memtable is flushed first to guarantee that the
    // snapshot (list of sstables) will include all the data written up to the time it was taken.
-    future<utils::chunked_vector<sstables::entry_descriptor>> clone_tablet_storage(locator::tablet_id tid);
+    // If leave_unsealead is set, all the destination sstables will be left unsealed.
+    future<utils::chunked_vector<sstables::entry_descriptor>> clone_tablet_storage(locator::tablet_id tid, bool leave_unsealed);

    friend class compaction_group;
    friend class compaction::compaction_task_impl;
--- a/replica/table.cc
+++ b/replica/table.cc
@@ -721,7 +721,7 @@ public:
    bool all_storage_groups_split() override { return true; }
    future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) override { return make_ready_future(); }
    future<> maybe_split_compaction_group_of(size_t idx) override { return make_ready_future(); }
-    future<std::vector<sstables::shared_sstable>> maybe_split_sstable(const sstables::shared_sstable& sst) override {
+    future<std::vector<sstables::shared_sstable>> maybe_split_new_sstable(const sstables::shared_sstable& sst) override {
        return make_ready_future<std::vector<sstables::shared_sstable>>(std::vector<sstables::shared_sstable>{sst});
    }
    dht::token_range get_token_range_after_split(const dht::token&) const noexcept override { return dht::token_range(); }
@@ -879,7 +879,7 @@ public:
    bool all_storage_groups_split() override;
    future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) override;
    future<> maybe_split_compaction_group_of(size_t idx) override;
-    future<std::vector<sstables::shared_sstable>> maybe_split_sstable(const sstables::shared_sstable& sst) override;
+    future<std::vector<sstables::shared_sstable>> maybe_split_new_sstable(const sstables::shared_sstable& sst) override;
    dht::token_range get_token_range_after_split(const dht::token& token) const noexcept override {
        return tablet_map().get_token_range_after_split(token);
    }
@@ -1130,7 +1130,8 @@ future<> tablet_storage_group_manager::maybe_split_compaction_group_of(size_t id
 }

 future<std::vector<sstables::shared_sstable>>
-tablet_storage_group_manager::maybe_split_sstable(const sstables::shared_sstable& sst) {
+tablet_storage_group_manager::maybe_split_new_sstable(const sstables::shared_sstable& sst) {
+    co_await utils::get_local_injector().inject("maybe_split_new_sstable_wait", utils::wait_for_message(120s));
    if (!tablet_map().needs_split()) {
        co_return std::vector<sstables::shared_sstable>{sst};
    }
@@ -1138,8 +1139,7 @@ tablet_storage_group_manager::maybe_split_sstable(const sstables::shared_sstable
    auto& cg = compaction_group_for_sstable(sst);
    auto holder = cg.async_gate().hold();
    auto& view = cg.view_for_sstable(sst);
-    auto lock_holder = co_await _t.get_compaction_manager().get_incremental_repair_read_lock(view, "maybe_split_sstable");
-    co_return co_await _t.get_compaction_manager().maybe_split_sstable(sst, view, co_await split_compaction_options());
+    co_return co_await _t.get_compaction_manager().maybe_split_new_sstable(sst, view, co_await split_compaction_options());
 }

 future<> table::maybe_split_compaction_group_of(locator::tablet_id tablet_id) {
@@ -1149,7 +1149,7 @@ future<> table::maybe_split_compaction_group_of(locator::tablet_id tablet_id) {

 future<std::vector<sstables::shared_sstable>> table::maybe_split_new_sstable(const sstables::shared_sstable& sst) {
    auto holder = async_gate().hold();
-    co_return co_await _sg_manager->maybe_split_sstable(sst);
+    co_return co_await _sg_manager->maybe_split_new_sstable(sst);
 }

 dht::token_range table::get_token_range_after_split(const dht::token& token) const noexcept {
@@ -1330,7 +1330,7 @@ future<utils::chunked_vector<sstables::shared_sstable>> table::take_sstable_set_
 }

 future<utils::chunked_vector<sstables::entry_descriptor>>
-table::clone_tablet_storage(locator::tablet_id tid) {
+table::clone_tablet_storage(locator::tablet_id tid, bool leave_unsealed) {
    utils::chunked_vector<sstables::entry_descriptor> ret;
    auto holder = async_gate().hold();

@@ -1342,7 +1342,7 @@ table::clone_tablet_storage(locator::tablet_id tid) {
    // by compaction while we are waiting for the lock.
    auto deletion_guard = co_await get_sstable_list_permit();
    co_await sg.make_sstable_set()->for_each_sstable_gently([&] (const sstables::shared_sstable& sst) -> future<> {
-        ret.push_back(co_await sst->clone(calculate_generation_for_new_table()));
+        ret.push_back(co_await sst->clone(calculate_generation_for_new_table(), leave_unsealed));
    });
    co_return ret;
 }
@@ -1354,10 +1354,10 @@ void table::update_stats_for_new_sstable(const sstables::shared_sstable& sst) no
 }

 future<>
-table::do_add_sstable_and_update_cache(compaction_group& cg, sstables::shared_sstable sst, sstables::offstrategy offstrategy,
+table::do_add_sstable_and_update_cache(compaction_group& cg, sstables::shared_sstable& sst, sstables::offstrategy offstrategy,
                                       bool trigger_compaction) {
    auto permit = co_await seastar::get_units(_sstable_set_mutation_sem, 1);
-    co_return co_await get_row_cache().invalidate(row_cache::external_updater([&] () noexcept {
+    co_return co_await get_row_cache().invalidate(row_cache::external_updater([&] () mutable noexcept {
        // FIXME: this is not really noexcept, but we need to provide strong exception guarantees.
        // atomically load all opened sstables into column family.
        if (!offstrategy) {
@@ -1369,6 +1369,8 @@ table::do_add_sstable_and_update_cache(compaction_group& cg, sstables::shared_ss
        if (trigger_compaction) {
            try_trigger_compaction(cg);
        }
+        // Reseting sstable ptr to inform the caller the sstable has been loaded successfully.
+        sst = nullptr;
    }), dht::partition_range::make({sst->get_first_decorated_key(), true}, {sst->get_last_decorated_key(), true}), [sst, schema = _schema] (const dht::decorated_key& key) {
        return sst->filter_has_key(sstables::key::from_partition_key(*schema, key.key()));
    });
@@ -1376,12 +1378,10 @@ table::do_add_sstable_and_update_cache(compaction_group& cg, sstables::shared_ss

 future<>
 table::do_add_sstable_and_update_cache(sstables::shared_sstable new_sst, sstables::offstrategy offstrategy, bool trigger_compaction) {
-    for (auto sst : co_await maybe_split_new_sstable(new_sst)) {
-        auto& cg = compaction_group_for_sstable(sst);
-        // Hold gate to make share compaction group is alive.
-        auto holder = cg.async_gate().hold();
-        co_await do_add_sstable_and_update_cache(cg, std::move(sst), offstrategy, trigger_compaction);
-    }
+    auto& cg = compaction_group_for_sstable(new_sst);
+    // Hold gate to make share compaction group is alive.
+    auto holder = cg.async_gate().hold();
+    co_await do_add_sstable_and_update_cache(cg, new_sst, offstrategy, trigger_compaction);
 }

 future<>
@@ -1399,6 +1399,85 @@ table::add_sstables_and_update_cache(const std::vector<sstables::shared_sstable>
    trigger_compaction();
 }

+future<std::vector<sstables::shared_sstable>>
+table::add_new_sstable_and_update_cache(sstables::shared_sstable new_sst,
+                                        std::function<future<>(sstables::shared_sstable)> on_add,
+                                        sstables::offstrategy offstrategy) {
+    std::vector<sstables::shared_sstable> ret, ssts;
+    std::exception_ptr ex;
+    try {
+        bool trigger_compaction = offstrategy == sstables::offstrategy::no;
+        auto& cg = compaction_group_for_sstable(new_sst);
+        // This prevents compaction group from being considered empty until the holder is released.
+        // Helpful for tablet split, where split is acked for a table when all pre-split groups are empty.
+        auto sstable_add_holder = cg.sstable_add_gate().hold();
+
+        ret = ssts = co_await maybe_split_new_sstable(new_sst);
+        // on sucessful split, input sstable is unlinked.
+        new_sst = nullptr;
+        for (auto& sst : ssts) {
+            auto& cg = compaction_group_for_sstable(sst);
+            // Hold gate to make sure compaction group is alive.
+            auto holder = cg.async_gate().hold();
+            co_await on_add(sst);
+            // If do_add_sstable_and_update_cache() throws after sstable has been loaded, the pointer
+            // sst passed by reference will be set to nullptr, so it won't be unlinked in the exception
+            // handler below.
+            co_await do_add_sstable_and_update_cache(cg, sst, offstrategy, trigger_compaction);
+            sst = nullptr;
+        }
+    } catch (...) {
+        ex = std::current_exception();
+    }
+
+    if (ex) {
+        // on failed split, input sstable is unlinked here.
+        if (new_sst) {
+            tlogger.error("Failed to load SSTable {} of origin {} due to {}, it will be unlinked...", new_sst->get_filename(), new_sst->get_origin(), ex);
+            co_await new_sst->unlink();
+        }
+        // on failure after sucessful split, sstables not attached yet will be unlinked
+        co_await coroutine::parallel_for_each(ssts, [&ex] (sstables::shared_sstable sst) -> future<> {
+            if (sst) {
+                tlogger.error("Failed to load SSTable {} of origin {} due to {}, it will be unlinked...", sst->get_filename(), sst->get_origin(), ex);
+                co_await sst->unlink();
+            }
+        });
+        co_await coroutine::return_exception_ptr(std::move(ex));
+    }
+    co_return std::move(ret);
+}
+
+future<std::vector<sstables::shared_sstable>>
+table::add_new_sstables_and_update_cache(std::vector<sstables::shared_sstable> new_ssts,
+                                         std::function<future<>(sstables::shared_sstable)> on_add) {
+    std::exception_ptr ex;
+    std::vector<sstables::shared_sstable> ret;
+
+    // We rely on add_new_sstable_and_update_cache() to unlink the sstable feeded into it,
+    // so the exception handling below will only have to unlink sstables not processed yet.
+    try {
+        for (auto& sst: new_ssts) {
+            auto ssts = co_await add_new_sstable_and_update_cache(std::exchange(sst, nullptr), on_add);
+            std::ranges::move(ssts, std::back_inserter(ret));
+
+        }
+    } catch (...) {
+        ex = std::current_exception();
+    }
+
+    if (ex) {
+        co_await coroutine::parallel_for_each(new_ssts, [&ex] (sstables::shared_sstable sst) -> future<> {
+            if (sst) {
+                tlogger.error("Failed to load SSTable {} of origin {} due to {}, it will be unlinked...", sst->get_filename(), sst->get_origin(), ex);
+                co_await sst->unlink();
+            }
+        });
+        co_await coroutine::return_exception_ptr(std::move(ex));
+    }
+    co_return std::move(ret);
+}
+
 future<>
 table::update_cache(compaction_group& cg, lw_shared_ptr<memtable> m, std::vector<sstables::shared_sstable> ssts) {
    auto permit = co_await seastar::get_units(_sstable_set_mutation_sem, 1);
@@ -2612,8 +2691,8 @@ public:
    sstables::sstables_manager& get_sstables_manager() noexcept override {
        return _t.get_sstables_manager();
    }
-    sstables::shared_sstable make_sstable() const override {
-        return _t.make_sstable();
+    sstables::shared_sstable make_sstable(sstables::sstable_state state) const override {
+        return _t.make_sstable(state);
    }
    sstables::sstable_writer_config configure_writer(sstring origin) const override {
        auto cfg = _t.get_sstables_manager().configure_writer(std::move(origin));
@@ -2731,6 +2810,7 @@ future<> compaction_group::stop(sstring reason) noexcept {
    auto flush_future = co_await seastar::coroutine::as_future(flush());

    co_await _flush_gate.close();
+    co_await _sstable_add_gate.close();
  // FIXME: indentation
  _compaction_disabler_for_views.clear();
  co_await utils::get_local_injector().inject("compaction_group_stop_wait", utils::wait_for_message(60s));
@@ -2744,7 +2824,7 @@ future<> compaction_group::stop(sstring reason) noexcept {
 }

 bool compaction_group::empty() const noexcept {
-    return _memtables->empty() && live_sstable_count() == 0;
+    return _memtables->empty() && live_sstable_count() == 0 && _sstable_add_gate.get_count() == 0;
 }

 const schema_ptr& compaction_group::schema() const {
@@ -3200,7 +3280,7 @@ db::replay_position table::highest_flushed_replay_position() const {
 }

 struct manifest_json : public json::json_base {
-    json::json_chunked_list<sstring> files;
+    json::json_chunked_list<std::string_view> files;

    manifest_json() {
        register_params();
@@ -3224,7 +3304,7 @@ table::seal_snapshot(sstring jsondir, std::vector<snapshot_file_set> file_sets)
    manifest_json manifest;
    for (const auto& fsp : file_sets) {
        for (auto& rf : *fsp) {
-            manifest.files.push(std::move(rf));
+            manifest.files.push(std::string_view(rf));
        }
    }
    auto streamer = json::stream_object(std::move(manifest));
@@ -3385,16 +3465,15 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
                continue;
            }

-            lister::scan_dir(snapshots_dir,  lister::dir_entry_types::of<directory_entry_type::directory>(), [datadir, &all_snapshots] (fs::path snapshots_dir, directory_entry de) {
-                auto snapshot_name = de.name;
+            auto lister = directory_lister(snapshots_dir, lister::dir_entry_types::of<directory_entry_type::directory>());
+            while (auto de = lister.get().get()) {
+                auto snapshot_name = de->name;
                all_snapshots.emplace(snapshot_name, snapshot_details());
-                return get_snapshot_details(snapshots_dir / fs::path(snapshot_name), datadir).then([&all_snapshots, snapshot_name] (auto details) {
-                    auto& sd = all_snapshots.at(snapshot_name);
-                    sd.total += details.total;
-                    sd.live += details.live;
-                    return make_ready_future<>();
-                });
-            }).get();
+                auto details = get_snapshot_details(snapshots_dir / fs::path(snapshot_name), datadir).get();
+                auto& sd = all_snapshots.at(snapshot_name);
+                sd.total += details.total;
+                sd.live += details.live;
+            }
        }
        return all_snapshots;
    });
@@ -3402,38 +3481,61 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot

 future<table::snapshot_details> table::get_snapshot_details(fs::path snapshot_dir, fs::path datadir) {
    table::snapshot_details details{};
+    std::optional<fs::path> staging_dir = snapshot_dir / sstables::staging_dir;
+    if (!co_await file_exists(staging_dir->native())) {
+        staging_dir.reset();
+    }

-    co_await lister::scan_dir(snapshot_dir, lister::dir_entry_types::of<directory_entry_type::regular>(), [datadir, &details] (fs::path snapshot_dir, directory_entry de) -> future<> {
-        auto sd = co_await io_check(file_stat, (snapshot_dir / de.name).native(), follow_symlink::no);
+    auto lister = directory_lister(snapshot_dir, lister::dir_entry_types::of<directory_entry_type::regular>());
+    while (auto de = co_await lister.get()) {
+        const auto& name = de->name;
+        // FIXME: optimize stat calls by keeping the base directory open and use statat instead, here and below.
+        // See https://github.com/scylladb/seastar/pull/3163
+        auto sd = co_await io_check(file_stat, (snapshot_dir / name).native(), follow_symlink::no);
        auto size = sd.allocated_size;

        // The manifest and schema.sql files are the only files expected to be in this directory not belonging to the SSTable.
        //
        // All the others should just generate an exception: there is something wrong, so don't blindly
        // add it to the size.
-        if (de.name != "manifest.json" && de.name != "schema.cql") {
+        if (name != "manifest.json" && name != "schema.cql") {
            details.total += size;
+            if (sd.number_of_links == 1) {
+                // File exists only in the snapshot directory.
+                details.live += size;
+                continue;
+            }
+            // If the number of linkes is greater than 1, it is still possible that the file is linked to another snapshot
+            // So check the datadir for the file too.
        } else {
-            size = 0;
+            continue;
        }

-        try {
+        auto exists_in_dir = [&] (fs::path path) -> future<bool> {
+          try {
            // File exists in the main SSTable directory. Snapshots are not contributing to size
-            auto psd = co_await io_check(file_stat, (datadir / de.name).native(), follow_symlink::no);
+            auto psd = co_await io_check(file_stat, path.native(), follow_symlink::no);
            // File in main SSTable directory must be hardlinked to the file in the snapshot dir with the same name.
            if (psd.device_id != sd.device_id || psd.inode_number != sd.inode_number) {
                dblog.warn("[{} device_id={} inode_number={} size={}] is not the same file as [{} device_id={} inode_number={} size={}]",
-                        (datadir / de.name).native(), psd.device_id, psd.inode_number, psd.size,
-                        (snapshot_dir / de.name).native(), sd.device_id, sd.inode_number, sd.size);
-                details.live += size;
+                        (datadir / name).native(), psd.device_id, psd.inode_number, psd.size,
+                        (snapshot_dir / name).native(), sd.device_id, sd.inode_number, sd.size);
+                co_return false;
            }
-        } catch (std::system_error& e) {
+            co_return true;
+          } catch (std::system_error& e) {
            if (e.code() != std::error_code(ENOENT, std::system_category())) {
                throw;
            }
+            co_return false;
+          }
+        };
+        // Check staging dir first, as files might be moved from there to the datadir concurrently to this check
+        if ((!staging_dir || !co_await exists_in_dir(*staging_dir / name)) &&
+                !co_await exists_in_dir(datadir / name)) {
            details.live += size;
        }
-    });
+    }

    co_return details;
 }
--- a/scripts/tablet-mon.py
+++ b/scripts/tablet-mon.py
@@ -390,9 +390,11 @@ dark_green = (195, 215, 195)
 light_red = (255, 200, 200)
 light_green = (200, 255, 200)
 light_gray = (240, 240, 240)
+scylla_blue = (87, 209, 229)

 tablet_colors = {
    (Tablet.STATE_NORMAL, None): GRAY,
+    (Tablet.STATE_NORMAL, 'repair'): scylla_blue,
    (Tablet.STATE_JOINING, 'allow_write_both_read_old'): dark_green,
    (Tablet.STATE_LEAVING, 'allow_write_both_read_old'): dark_red,
    (Tablet.STATE_JOINING, 'write_both_read_old'): dark_green,
@@ -532,6 +534,8 @@ def update_from_cql(initial=False):
                state = (Tablet.STATE_JOINING, tablet.stage)
            elif replica in leaving:
                state = (Tablet.STATE_LEAVING, tablet.stage)
+            elif tablet.stage == 'repair':
+                state = (Tablet.STATE_NORMAL, tablet.stage)
            else:
                state = (Tablet.STATE_NORMAL, None)

--- a/service/client_routes.cc
+++ b/service/client_routes.cc
@@ -82,7 +82,7 @@ seastar::future<> service::client_routes_service::set_client_routes_inner(const
    auto guard = co_await _group0_client.start_operation(_abort_source, service::raft_timeout{});
    utils::chunked_vector<canonical_mutation> cmuts;

-    for (auto& entry : route_entries) {
+    for (const auto& entry : route_entries) {
        auto mut = co_await make_update_client_route_mutation(guard.write_timestamp(), entry);
        cmuts.emplace_back(std::move(mut));
    }
@@ -103,24 +103,24 @@ seastar::future<> service::client_routes_service::delete_client_routes_inner(con
    co_await _group0_client.add_entry(std::move(cmd), std::move(guard), _abort_source);
 }

-seastar::future<> service::client_routes_service::set_client_routes(const std::vector<service::client_routes_service::client_route_entry>& route_entries) {
-    return container().invoke_on(0, [route_entries = std::move(route_entries)] (service::client_routes_service& cr) -> future<> {
-        return cr.with_retry([&] {
+seastar::future<> service::client_routes_service::set_client_routes(std::vector<service::client_routes_service::client_route_entry> route_entries) {
+    return container().invoke_on(0, [route_entries = std::move(route_entries)] (service::client_routes_service& cr) mutable -> future<> {
+        return cr.with_retry([&cr, route_entries = std::move(route_entries)]  {
            return cr.set_client_routes_inner(route_entries);
        });
    });
 }

-seastar::future<> service::client_routes_service::delete_client_routes(const std::vector<service::client_routes_service::client_route_key>& route_keys) {
-    return container().invoke_on(0, [route_keys = std::move(route_keys)] (service::client_routes_service& cr) -> future<> {
-        return cr.with_retry([&] {
+seastar::future<> service::client_routes_service::delete_client_routes(std::vector<service::client_routes_service::client_route_key> route_keys) {
+    return container().invoke_on(0, [route_keys = std::move(route_keys)] (service::client_routes_service& cr) mutable -> future<> {
+        return cr.with_retry([&cr, route_keys = std::move(route_keys)]  {
            return cr.delete_client_routes_inner(route_keys);
        });
    });
 }

 template <typename Func>
-seastar::future<> service::client_routes_service::with_retry(Func&& func) const {
+seastar::future<> service::client_routes_service::with_retry(Func func) const {
    int retries = 10;
    while (true) {
        try {
--- a/service/client_routes.hh
+++ b/service/client_routes.hh
@@ -66,8 +66,8 @@ public:
    future<mutation> make_remove_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_key& key);
    future<mutation> make_update_client_route_mutation(api::timestamp_type ts, const client_route_entry& entry);
    future<std::vector<client_route_entry>> get_client_routes() const;
-    seastar::future<> set_client_routes(const std::vector<service::client_routes_service::client_route_entry>& route_entries);
-    seastar::future<> delete_client_routes(const std::vector<service::client_routes_service::client_route_key>& route_keys);
+    seastar::future<> set_client_routes(std::vector<service::client_routes_service::client_route_entry> route_entries);
+    seastar::future<> delete_client_routes(std::vector<service::client_routes_service::client_route_key> route_keys);


    // notifications
@@ -76,7 +76,7 @@ private:
    seastar::future<> set_client_routes_inner(const std::vector<service::client_routes_service::client_route_entry>& route_entries);
    seastar::future<> delete_client_routes_inner(const std::vector<service::client_routes_service::client_route_key>& route_keys);
    template <typename Func>
-    seastar::future<> with_retry(Func&& func) const;
+    seastar::future<> with_retry(Func func) const;

    abort_source& _abort_source;
    gms::feature_service& _feature_service;
--- a/service/client_state.cc
+++ b/service/client_state.cc
@@ -224,7 +224,13 @@ future<> service::client_state::has_access(const sstring& ks, auth::command_desc
                ks + " can be granted only SELECT or DESCRIBE permissions to a non-superuser.");
    }

-    if (cmd.resource.kind() == auth::resource_kind::data && cmd.permission == auth::permission::SELECT && is_vector_indexed.has_value() && is_vector_indexed.value()) {
+    static const std::unordered_set<auth::resource> vector_search_system_resources = {
+        auth::make_data_resource(db::system_keyspace::NAME, db::system_keyspace::GROUP0_HISTORY),
+        auth::make_data_resource(db::system_keyspace::NAME, db::system_keyspace::VERSIONS),
+    };
+
+    if ((cmd.resource.kind() == auth::resource_kind::data && cmd.permission == auth::permission::SELECT && is_vector_indexed.has_value() && is_vector_indexed.value()) ||
+        (cmd.permission == auth::permission::SELECT && vector_search_system_resources.contains(cmd.resource))) {

        co_return co_await ensure_has_permission<auth::command_desc_with_permission_set>({auth::permission_set::of<auth::permission::SELECT, auth::permission::VECTOR_SEARCH_INDEXING>(), cmd.resource});

@@ -344,3 +350,17 @@ void service::client_state::update_per_service_level_params(qos::service_level_o

    _workload_type = slo.workload;
 }
+
+future<> service::client_state::set_client_options(
+        client_options_cache_type& keys_and_values_cache,
+        const std::unordered_map<sstring, sstring>& client_options) {
+    for (const auto& [key, value] : client_options) {
+        auto cached_key = co_await keys_and_values_cache.get_or_load(key, [] (const client_options_cache_key_type&) {
+            return make_ready_future<options_cache_value_type>(options_cache_value_type{});
+        });
+        auto cached_value = co_await keys_and_values_cache.get_or_load(value, [] (const client_options_cache_key_type&) {
+            return make_ready_future<options_cache_value_type>(options_cache_value_type{});
+        });
+        _client_options.emplace_back(std::move(cached_key), std::move(cached_value));
+    }
+}
--- a/service/client_state.hh
+++ b/service/client_state.hh
@@ -18,6 +18,7 @@
 #include "auth/authenticated_user.hh"
 #include "auth/authenticator.hh"
 #include "auth/permission.hh"
+#include "client_data.hh"

 #include "transport/cql_protocol_extension.hh"
 #include "service/qos/service_level_controller.hh"
@@ -102,7 +103,8 @@ private:
    private volatile String keyspace;
 #endif
    std::optional<auth::authenticated_user> _user;
-    std::optional<sstring> _driver_name, _driver_version;
+    std::optional<client_options_cache_entry_type> _driver_name, _driver_version;
+	std::list<client_option_key_value_cached_entry> _client_options;

    auth_state _auth_state = auth_state::UNINITIALIZED;
    bool _control_connection = false;
@@ -151,18 +153,33 @@ public:
        return _control_connection = true;
    }

-    std::optional<sstring> get_driver_name() const {
+    std::optional<client_options_cache_entry_type> get_driver_name() const {
        return _driver_name;
    }
-    void set_driver_name(sstring driver_name) {
-        _driver_name = std::move(driver_name);
+    future<> set_driver_name(client_options_cache_type& keys_and_values_cache, const sstring& driver_name) {
+        _driver_name = co_await keys_and_values_cache.get_or_load(driver_name, [] (const client_options_cache_key_type&) {
+            return make_ready_future<options_cache_value_type>(options_cache_value_type{});
+        });
    }

-    std::optional<sstring> get_driver_version() const {
+    const auto& get_client_options() const {
+        return _client_options;
+    }
+
+    future<> set_client_options(
+        client_options_cache_type& keys_and_values_cache,
+        const std::unordered_map<sstring, sstring>& client_options);
+
+    std::optional<client_options_cache_entry_type> get_driver_version() const {
        return _driver_version;
    }
-    void set_driver_version(sstring driver_version) {
-        _driver_version = std::move(driver_version);
+    future<> set_driver_version(
+        client_options_cache_type& keys_and_values_cache,
+        const sstring& driver_version)
+    {
+        _driver_version = co_await keys_and_values_cache.get_or_load(driver_version, [] (const client_options_cache_key_type&) {
+            return make_ready_future<options_cache_value_type>(options_cache_value_type{});
+        });
    }

    client_state(external_tag,
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -6526,14 +6526,19 @@ future<> storage_service::clone_locally_tablet_storage(locator::global_tablet_id
                                             leaving.host, pending.host));
    }

-    auto d = co_await smp::submit_to(leaving.shard, [this, tablet] () -> future<utils::chunked_vector<sstables::entry_descriptor>> {
+    // All sstables cloned locally will be left unsealed, until they're loaded into the table.
+    // This is to guarantee no unsplit sstables will be left sealed on disk, which could
+    // cause problems if unsplit sstables are found after split was ACKed to coordinator.
+    bool leave_unsealed = true;
+
+    auto d = co_await smp::submit_to(leaving.shard, [this, tablet, leave_unsealed] () -> future<utils::chunked_vector<sstables::entry_descriptor>> {
        auto& table = _db.local().find_column_family(tablet.table);
        auto op = table.stream_in_progress();
-        co_return co_await table.clone_tablet_storage(tablet.tablet);
+        co_return co_await table.clone_tablet_storage(tablet.tablet, leave_unsealed);
    });
    rtlogger.debug("Cloned storage of tablet {} from leaving replica {}, {} sstables were found", tablet, leaving, d.size());

-    auto load_sstable = [] (const dht::sharder& sharder, replica::table& t, sstables::entry_descriptor d) -> future<sstables::shared_sstable> {
+    auto load_sstable = [leave_unsealed] (const dht::sharder& sharder, replica::table& t, sstables::entry_descriptor d) -> future<sstables::shared_sstable> {
        auto& mng = t.get_sstables_manager();
        auto sst = mng.make_sstable(t.schema(), t.get_storage_options(), d.generation, d.state.value_or(sstables::sstable_state::normal),
                                    d.version, d.format, db_clock::now(), default_io_error_handler_gen());
@@ -6541,7 +6546,8 @@ future<> storage_service::clone_locally_tablet_storage(locator::global_tablet_id
        // will still point to leaving replica at this stage in migration. If node goes down,
        // SSTables will be loaded at pending replica and migration is retried, so correctness
        // wise, we're good.
-        auto cfg = sstables::sstable_open_config{ .current_shard_as_sstable_owner = true };
+        auto cfg = sstables::sstable_open_config{ .current_shard_as_sstable_owner = true,
+                                                  .unsealed_sstable = leave_unsealed };
        co_await sst->load(sharder, cfg);
        co_return sst;
    };
@@ -6549,16 +6555,23 @@ future<> storage_service::clone_locally_tablet_storage(locator::global_tablet_id
    co_await smp::submit_to(pending.shard, [this, tablet, load_sstable, d = std::move(d)] () mutable -> future<> {
        // Loads cloned sstables from leaving replica into pending one.
        auto& table = _db.local().find_column_family(tablet.table);
+        auto& sstm = table.get_sstables_manager();
        auto op = table.stream_in_progress();
        dht::auto_refreshing_sharder sharder(table.shared_from_this());

-        std::vector<sstables::shared_sstable> ssts;
-        ssts.reserve(d.size());
+        std::unordered_set<sstables::shared_sstable> ssts;
        for (auto&& sst_desc : d) {
-            ssts.push_back(co_await load_sstable(sharder, table, std::move(sst_desc)));
+            ssts.insert(co_await load_sstable(sharder, table, std::move(sst_desc)));
        }
-        co_await table.add_sstables_and_update_cache(ssts);
-        _view_building_worker.local().load_sstables(tablet.table, ssts);
+        auto on_add = [&ssts, &sstm] (sstables::shared_sstable loading_sst) -> future<> {
+            if (ssts.contains(loading_sst)) {
+                auto cfg = sstm.configure_writer(loading_sst->get_origin());
+                co_await loading_sst->seal_sstable(cfg.backup);
+            }
+            co_return;
+        };
+        auto loaded_ssts = co_await table.add_new_sstables_and_update_cache(std::vector(ssts.begin(), ssts.end()), on_add);
+        _view_building_worker.local().load_sstables(tablet.table, loaded_ssts);
    });
    rtlogger.debug("Successfully loaded storage of tablet {} into pending replica {}", tablet, pending);
 }
--- a/service/tablet_allocator.cc
+++ b/service/tablet_allocator.cc
@@ -1931,6 +1931,10 @@ public:
            const auto& table_groups = _tm->tablets().all_table_groups();

            auto finalize_decision = [&] {
+                if (utils::get_local_injector().enter("tablet_resize_finalization_postpone")) {
+                    return;
+                }
+
                _stats.for_cluster().resizes_finalized++;
                resize_plan.finalize_resize.insert(table);
            };
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -2623,7 +2623,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                        co_await _voter_handler.on_node_removed(replaced_node_id, _as);
                    }
                }
-                utils::get_local_injector().inject("crash_coordinator_before_stream", [] { abort(); });
+                utils::get_local_injector().inject("crash_coordinator_before_stream", [] { 
+                    rtlogger.info("crash_coordinator_before_stream: aborting");
+                    abort(); 
+                });
                raft_topology_cmd cmd{raft_topology_cmd::command::stream_ranges};
                auto state = node.rs->state;
                try {
--- a/sstables/mx/writer.cc
+++ b/sstables/mx/writer.cc
@@ -1696,7 +1696,9 @@ void writer::consume_end_of_stream() {
        .map = _collector.get_ext_timestamp_stats()
    });
    _sst.write_scylla_metadata(_shard, std::move(identifier), std::move(ld_stats), std::move(ts_stats));
-    _sst.seal_sstable(_cfg.backup).get();
+    if (!_cfg.leave_unsealed) {
+        _sst.seal_sstable(_cfg.backup).get();
+    }
 }

 uint64_t writer::data_file_position_for_tests() const {
--- a/sstables/open_info.hh
+++ b/sstables/open_info.hh
@@ -83,6 +83,8 @@ struct sstable_open_config {
    bool current_shard_as_sstable_owner = false;
    // Do not move the sharding metadata to the sharder, keeping it in the scylla metadata..
    bool keep_sharding_metadata = false;
+    // Allows unsealed sstable to be loaded, since it must read components from temporary TOC instead.
+    bool unsealed_sstable = false;
 };

 }
--- a/sstables/sstables.cc
+++ b/sstables/sstables.cc
@@ -836,13 +836,14 @@ future<std::vector<sstring>> sstable::read_and_parse_toc(file f) {

 // This is small enough, and well-defined. Easier to just read it all
 // at once
-future<> sstable::read_toc() noexcept {
+future<> sstable::read_toc(sstable_open_config cfg) noexcept {
    if (_recognized_components.size()) {
        co_return;
    }

    try {
-        co_await do_read_simple(component_type::TOC, [&] (version_types v, file f) -> future<> {
+        auto toc_type = cfg.unsealed_sstable ? component_type::TemporaryTOC : component_type::TOC;
+        co_await do_read_simple(toc_type, [&] (version_types v, file f) -> future<> {
            auto comps = co_await read_and_parse_toc(f);
            for (auto& c: comps) {
                // accept trailing newlines
@@ -900,8 +901,8 @@ future<std::unordered_map<component_type, file>> sstable::readable_file_for_all_
    co_return std::move(files);
 }

-future<entry_descriptor> sstable::clone(generation_type new_generation) const {
-    co_await _storage->snapshot(*this, _storage->prefix(), storage::absolute_path::yes, new_generation);
+future<entry_descriptor> sstable::clone(generation_type new_generation, bool leave_unsealed) const {
+    co_await _storage->snapshot(*this, _storage->prefix(), storage::absolute_path::yes, new_generation, storage::leave_unsealed(leave_unsealed));
    co_return entry_descriptor(new_generation, _version, _format, component_type::TOC, _state);
 }

@@ -1725,7 +1726,7 @@ void sstable::disable_component_memory_reload() {
 }

 future<> sstable::load_metadata(sstable_open_config cfg) noexcept {
-    co_await read_toc();
+    co_await read_toc(cfg);
    // read scylla-meta after toc. Might need it to parse
    // rest (hint extensions)
    co_await read_scylla_metadata();
@@ -3960,11 +3961,13 @@ class sstable_stream_sink_impl : public sstable_stream_sink {
    shared_sstable _sst;
    component_type _type;
    bool _last_component;
+    bool _leave_unsealed;
 public:
-    sstable_stream_sink_impl(shared_sstable sst, component_type type, bool last_component)
+    sstable_stream_sink_impl(shared_sstable sst, component_type type, sstable_stream_sink_cfg cfg)
        : _sst(std::move(sst))
        , _type(type)
-        , _last_component(last_component)
+        , _last_component(cfg.last_component)
+        , _leave_unsealed(cfg.leave_unsealed)
    {}
 private:
    future<> load_metadata() const {
@@ -4011,10 +4014,12 @@ public:

        co_return co_await make_file_output_stream(std::move(f), stream_options);
    }
-    future<shared_sstable> close_and_seal() override {
+    future<shared_sstable> close() override {
        if (_last_component) {
            // If we are the last component in a sequence, we can seal the table.
-            co_await _sst->_storage->seal(*_sst);
+            if (!_leave_unsealed) {
+                co_await _sst->_storage->seal(*_sst);
+            }
            co_return std::move(_sst);
        }
        _sst = {};
@@ -4031,7 +4036,7 @@ public:
    }
 };

-std::unique_ptr<sstable_stream_sink> create_stream_sink(schema_ptr schema, sstables_manager& sstm, const data_dictionary::storage_options& s_opts, sstable_state state, std::string_view component_filename, bool last_component) {
+std::unique_ptr<sstable_stream_sink> create_stream_sink(schema_ptr schema, sstables_manager& sstm, const data_dictionary::storage_options& s_opts, sstable_state state, std::string_view component_filename, sstable_stream_sink_cfg cfg) {
    auto desc = parse_path(component_filename, schema->ks_name(), schema->cf_name());
    auto sst = sstm.make_sstable(schema, s_opts, desc.generation, state, desc.version, desc.format);

@@ -4042,7 +4047,7 @@ std::unique_ptr<sstable_stream_sink> create_stream_sink(schema_ptr schema, sstab
        type = component_type::TemporaryTOC;
    }

-    return std::make_unique<sstable_stream_sink_impl>(std::move(sst), type, last_component);
+    return std::make_unique<sstable_stream_sink_impl>(std::move(sst), type, cfg);
 }

 generation_type
--- a/sstables/sstables.hh
+++ b/sstables/sstables.hh
@@ -109,6 +109,7 @@ struct sstable_writer_config {
    size_t promoted_index_auto_scale_threshold;
    uint64_t max_sstable_size = std::numeric_limits<uint64_t>::max();
    bool backup = false;
+    bool leave_unsealed = false;
    mutation_fragment_stream_validation_level validation_level;
    std::optional<db::replay_position> replay_position;
    std::optional<int> sstable_level;
@@ -417,8 +418,8 @@ public:
        return component_basename(_schema->ks_name(), _schema->cf_name(), _version, _generation, _format, f);
    }

-    component_name get_filename() const {
-        return component_name(*this, component_type::Data);
+    component_name get_filename(component_type f = component_type::Data) const {
+        return component_name(*this, f);
    }

    component_name toc_filename() const {
@@ -693,7 +694,7 @@ private:

    future<> update_info_for_opened_data(sstable_open_config cfg = {});

-    future<> read_toc() noexcept;
+    future<> read_toc(sstable_open_config cfg = {}) noexcept;
    future<> read_summary() noexcept;

    void write_summary() {
@@ -1069,8 +1070,9 @@ public:
    future<std::unordered_map<component_type, file>> readable_file_for_all_components() const;

    // Clones this sstable with a new generation, under the same location as the original one.
+    // If leave_unsealed is true, the destination sstable is left unsealed.
    // Implementation is underlying storage specific.
-    future<entry_descriptor> clone(generation_type new_generation) const;
+    future<entry_descriptor> clone(generation_type new_generation, bool leave_unsealed = false) const;

    struct lesser_reclaimed_memory {
        // comparator class to be used by the _reclaimed set in sstables manager
@@ -1244,13 +1246,18 @@ public:
    // closes this component. If this is the last component in a set (see "last_component" in creating method below)
    // the table on disk will be sealed.
    // Returns sealed sstable if last, or nullptr otherwise.
-    virtual future<shared_sstable> close_and_seal() = 0;
+    virtual future<shared_sstable> close() = 0;
    virtual future<> abort() = 0;
 };

+struct sstable_stream_sink_cfg {
+    bool last_component = false;
+    bool leave_unsealed = false;
+};
+
 // Creates a sink object which can receive a component file sourced from above source object data.

-std::unique_ptr<sstable_stream_sink> create_stream_sink(schema_ptr, sstables_manager&, const data_dictionary::storage_options&, sstable_state, std::string_view component_filename, bool last_component);
+std::unique_ptr<sstable_stream_sink> create_stream_sink(schema_ptr, sstables_manager&, const data_dictionary::storage_options&, sstable_state, std::string_view component_filename, sstable_stream_sink_cfg cfg);

 } // namespace sstables

--- a/sstables/storage.cc
+++ b/sstables/storage.cc
@@ -50,7 +50,14 @@ class filesystem_storage final : public sstables::storage {
    std::optional<std::filesystem::path> _temp_dir; // Valid while the sstable is being created, until sealed

 private:
-    using mark_for_removal = bool_class<class mark_for_removal_tag>;
+    struct mark_for_removal_tag {};
+    struct leave_unsealed_tag {};
+
+    enum class link_mode {
+        default_mode,
+        mark_for_removal,
+        leave_unsealed,
+    };

    template <typename Comp>
    requires std::is_same_v<Comp, component_type> || std::is_same_v<Comp, sstring>
@@ -61,7 +68,9 @@ private:
    future<> check_create_links_replay(const sstable& sst, const sstring& dst_dir, generation_type dst_gen, const std::vector<std::pair<sstables::component_type, sstring>>& comps) const;
    future<> remove_temp_dir();
    virtual future<> create_links(const sstable& sst, const std::filesystem::path& dir) const override;
-    future<> create_links_common(const sstable& sst, sstring dst_dir, generation_type dst_gen, mark_for_removal mark_for_removal) const;
+    future<> create_links_common(const sstable& sst, sstring dst_dir, generation_type dst_gen, link_mode mode) const;
+    future<> create_links_common(const sstable& sst, sstring dst_dir, generation_type dst_gen, mark_for_removal_tag) const;
+    future<> create_links_common(const sstable& sst, const std::filesystem::path& dir, std::optional<generation_type> gen, leave_unsealed_tag) const;
    future<> create_links_common(const sstable& sst, const std::filesystem::path& dir, std::optional<generation_type> dst_gen) const;
    future<> touch_temp_dir(const sstable& sst);
    future<> move(const sstable& sst, sstring new_dir, generation_type generation, delayed_commit_changes* delay) override;
@@ -83,7 +92,7 @@ public:
    {}

    virtual future<> seal(const sstable& sst) override;
-    virtual future<> snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen) const override;
+    virtual future<> snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen, storage::leave_unsealed) const override;
    virtual future<> change_state(const sstable& sst, sstable_state state, generation_type generation, delayed_commit_changes* delay) override;
    // runs in async context
    virtual void open(sstable& sst) override;
@@ -356,8 +365,13 @@ future<> filesystem_storage::check_create_links_replay(const sstable& sst, const
 /// \param sst - the sstable to work on
 /// \param dst_dir - the destination directory.
 /// \param generation - the generation of the destination sstable
-/// \param mark_for_removal - mark the sstable for removal after linking it to the destination dst_dir
-future<> filesystem_storage::create_links_common(const sstable& sst, sstring dst_dir, generation_type generation, mark_for_removal mark_for_removal) const {
+/// \param mode - what will be done after all components were linked
+///     mark_for_removal - mark the sstable for removal after linking it to the destination dst_dir
+///     leave_unsealed - leaves the destination sstable unsealed
+future<> filesystem_storage::create_links_common(const sstable& sst, sstring dst_dir, generation_type generation, link_mode mode) const {
+    // They're mutually exclusive, so we can assume only one is set.
+    bool mark_for_removal = mode == link_mode::mark_for_removal;
+    bool leave_unsealed = mode == link_mode::leave_unsealed;
    sstlog.trace("create_links: {} -> {} generation={} mark_for_removal={}", sst.get_filename(), dst_dir, generation, mark_for_removal);
    auto comps = sst.all_components();
    co_await check_create_links_replay(sst, dst_dir, generation, comps);
@@ -366,7 +380,11 @@ future<> filesystem_storage::create_links_common(const sstable& sst, sstring dst
    co_await sst.sstable_write_io_check(idempotent_link_file, fmt::to_string(sst.filename(component_type::TOC)), std::move(dst));
    auto dir = opened_directory(dst_dir);
    co_await dir.sync(sst._write_error_handler);
-    co_await parallel_for_each(comps, [this, &sst, &dst_dir, generation] (auto p) {
+    co_await parallel_for_each(comps, [this, &sst, &dst_dir, generation, leave_unsealed] (auto p) {
+        // Skips the linking of TOC file if the destination will be left unsealed.
+        if (leave_unsealed && p.first == component_type::TOC) {
+            return make_ready_future<>();
+        }
        auto src = filename(sst, _dir.native(), sst._generation, p.second);
        auto dst = filename(sst, dst_dir, generation, p.second);
        return sst.sstable_write_io_check(idempotent_link_file, std::move(src), std::move(dst));
@@ -379,9 +397,10 @@ future<> filesystem_storage::create_links_common(const sstable& sst, sstring dst
        auto src_temp_toc = filename(sst, _dir.native(), sst._generation, component_type::TemporaryTOC);
        co_await sst.sstable_write_io_check(rename_file, std::move(dst_temp_toc), std::move(src_temp_toc));
        co_await _dir.sync(sst._write_error_handler);
-    } else {
+    } else if (!leave_unsealed) {
        // Now that the source sstable is linked to dir, remove
        // the TemporaryTOC file at the destination.
+        // This is bypassed if destination will be left unsealed.
        co_await sst.sstable_write_io_check(remove_file, std::move(dst_temp_toc));
    }
    co_await dir.sync(sst._write_error_handler);
@@ -389,15 +408,23 @@ future<> filesystem_storage::create_links_common(const sstable& sst, sstring dst
    sstlog.trace("create_links: {} -> {} generation={}: done", sst.get_filename(), dst_dir, generation);
 }

+future<> filesystem_storage::create_links_common(const sstable& sst, sstring dst_dir, generation_type dst_gen, mark_for_removal_tag) const {
+    return create_links_common(sst, dst_dir, dst_gen, link_mode::mark_for_removal);
+}
+
+future<> filesystem_storage::create_links_common(const sstable& sst, const std::filesystem::path& dir, std::optional<generation_type> gen, leave_unsealed_tag) const {
+    return create_links_common(sst, dir.native(), gen.value_or(sst._generation), link_mode::leave_unsealed);
+}
+
 future<> filesystem_storage::create_links_common(const sstable& sst, const std::filesystem::path& dir, std::optional<generation_type> gen) const {
-    return create_links_common(sst, dir.native(), gen.value_or(sst._generation), mark_for_removal::no);
+    return create_links_common(sst, dir.native(), gen.value_or(sst._generation), link_mode::default_mode);
 }

 future<> filesystem_storage::create_links(const sstable& sst, const std::filesystem::path& dir) const {
-    return create_links_common(sst, dir.native(), sst._generation, mark_for_removal::no);
+    return create_links_common(sst, dir.native(), sst._generation, link_mode::default_mode);
 }

-future<> filesystem_storage::snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen) const {
+future<> filesystem_storage::snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen, storage::leave_unsealed leave_unsealed) const {
    std::filesystem::path snapshot_dir;
    if (abs) {
        snapshot_dir = dir;
@@ -405,7 +432,11 @@ future<> filesystem_storage::snapshot(const sstable& sst, sstring dir, absolute_
        snapshot_dir = _dir.path() / dir;
    }
    co_await sst.sstable_touch_directory_io_check(snapshot_dir);
-    co_await create_links_common(sst, snapshot_dir, std::move(gen));
+    if (leave_unsealed) {
+        co_await create_links_common(sst, snapshot_dir, std::move(gen), leave_unsealed_tag{});
+    } else {
+        co_await create_links_common(sst, snapshot_dir, std::move(gen));
+    }
 }

 future<> filesystem_storage::move(const sstable& sst, sstring new_dir, generation_type new_generation, delayed_commit_changes* delay_commit) {
@@ -413,7 +444,7 @@ future<> filesystem_storage::move(const sstable& sst, sstring new_dir, generatio
    sstring old_dir = _dir.native();
    sstlog.debug("Moving {} old_generation={} to {} new_generation={} do_sync_dirs={}",
            sst.get_filename(), sst._generation, new_dir, new_generation, delay_commit == nullptr);
-    co_await create_links_common(sst, new_dir, new_generation, mark_for_removal::yes);
+    co_await create_links_common(sst, new_dir, new_generation, mark_for_removal_tag{});
    co_await change_dir(new_dir);
    generation_type old_generation = sst._generation;
    co_await coroutine::parallel_for_each(sst.all_components(), [&sst, old_generation, old_dir] (auto p) {
@@ -598,7 +629,7 @@ public:
    {}

    future<> seal(const sstable& sst) override;
-    future<> snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type>) const override;
+    future<> snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type>, storage::leave_unsealed) const override;
    future<> change_state(const sstable& sst, sstable_state state, generation_type generation, delayed_commit_changes* delay) override;
    // runs in async context
    void open(sstable& sst) override;
@@ -815,7 +846,7 @@ future<> object_storage_base::unlink_component(const sstable& sst, component_typ
    }
 }

-future<> object_storage_base::snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen) const {
+future<> object_storage_base::snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen, storage::leave_unsealed) const {
    on_internal_error(sstlog, "Snapshotting S3 objects not implemented");
    co_return;
 }
--- a/sstables/storage.hh
+++ b/sstables/storage.hh
@@ -97,9 +97,10 @@ public:

    using absolute_path = bool_class<class absolute_path_tag>; // FIXME -- should go away eventually
    using sync_dir = bool_class<struct sync_dir_tag>; // meaningful only to filesystem storage
+    using leave_unsealed = bool_class<struct leave_unsealed_tag>;

    virtual future<> seal(const sstable& sst) = 0;
-    virtual future<> snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen = {}) const = 0;
+    virtual future<> snapshot(const sstable& sst, sstring dir, absolute_path abs, std::optional<generation_type> gen = {}, leave_unsealed lu = leave_unsealed::no) const = 0;
    virtual future<> change_state(const sstable& sst, sstable_state to, generation_type generation, delayed_commit_changes* delay) = 0;
    // runs in async context
    virtual void open(sstable& sst) = 0;
--- a/streaming/consumer.cc
+++ b/streaming/consumer.cc
@@ -63,30 +63,45 @@ mutation_reader_consumer make_streaming_consumer(sstring origin,
                }
                schema_ptr s = reader.schema();

+                // SSTable will be only sealed when added to the sstable set, so we make sure unsplit sstables aren't
+                // left sealed on the table directory.
                auto cfg = cf->get_sstables_manager().configure_writer(origin);
+                cfg.leave_unsealed = true;
                return sst->write_components(std::move(reader), adjusted_estimated_partitions, s,
                                             cfg, encoding_stats{}).then([sst] {
                    return sst->open_data();
-                }).then([cf, sst, offstrategy, origin, repaired_at, sstable_list_to_mark_as_repaired, frozen_guard] -> future<> {
-                    if (repaired_at && sstables::repair_origin == origin) {
-                        sst->being_repaired = frozen_guard;
-                        if (sstable_list_to_mark_as_repaired) {
-                            sstable_list_to_mark_as_repaired->insert(sst);
+                }).then([cf, sst, offstrategy, origin, repaired_at, sstable_list_to_mark_as_repaired, frozen_guard, cfg] -> future<std::vector<sstables::shared_sstable>> {
+                    auto on_add = [sst, origin, repaired_at, sstable_list_to_mark_as_repaired, frozen_guard, cfg] (sstables::shared_sstable loading_sst) -> future<> {
+                        if (repaired_at && sstables::repair_origin == origin) {
+                            loading_sst->being_repaired = frozen_guard;
+                            if (sstable_list_to_mark_as_repaired) {
+                                sstable_list_to_mark_as_repaired->insert(loading_sst);
+                            }
                        }
-                    }
+                        if (loading_sst == sst) {
+                            co_await loading_sst->seal_sstable(cfg.backup);
+                        }
+                        co_return;
+                    };
                    if (offstrategy && sstables::repair_origin == origin) {
                        sstables::sstlog.debug("Enabled automatic off-strategy trigger for table {}.{}",
                                cf->schema()->ks_name(), cf->schema()->cf_name());
                        cf->enable_off_strategy_trigger();
                    }
-                    co_await cf->add_sstable_and_update_cache(sst, offstrategy);
-                }).then([cf, s, sst, use_view_update_path, &vb, &vbw]() mutable -> future<> {
+                    co_return co_await cf->add_new_sstable_and_update_cache(sst, on_add, offstrategy);
+                }).then([cf, s, sst, use_view_update_path, &vb, &vbw] (std::vector<sstables::shared_sstable> new_sstables) mutable -> future<> {
+                    auto& vb_ = vb;
+                    auto new_sstables_ = std::move(new_sstables);
+                    auto table = cf;
+
                    if (use_view_update_path == db::view::sstable_destination_decision::staging_managed_by_vbc) {
-                        return vbw.local().register_staging_sstable_tasks({sst}, cf->schema()->id());
+                        co_return co_await vbw.local().register_staging_sstable_tasks(new_sstables_, cf->schema()->id());
                    } else if (use_view_update_path == db::view::sstable_destination_decision::staging_directly_to_generator) {
-                        return vb.local().register_staging_sstable(sst, std::move(cf));
+                        co_await coroutine::parallel_for_each(new_sstables_, [&vb_, &table] (sstables::shared_sstable sst) -> future<> {
+                            return vb_.local().register_staging_sstable(sst, table);
+                        });
                    }
-                    return make_ready_future<>();
+                    co_return;
                });
            };
            if (!offstrategy) {
--- a/streaming/stream_blob.cc
+++ b/streaming/stream_blob.cc
@@ -52,8 +52,16 @@ static future<> load_sstable_for_tablet(const file_stream_id& ops_id, replica::d
        auto erm = t.get_effective_replication_map();
        auto& sstm = t.get_sstables_manager();
        auto sst = sstm.make_sstable(t.schema(), t.get_storage_options(), desc.generation, state, desc.version, desc.format);
-        co_await sst->load(erm->get_sharder(*t.schema()));
-        co_await t.add_sstable_and_update_cache(sst);
+        sstables::sstable_open_config cfg { .unsealed_sstable = true };
+        co_await sst->load(erm->get_sharder(*t.schema()), cfg);
+        auto on_add = [sst, &sstm] (sstables::shared_sstable loading_sst) -> future<> {
+            if (loading_sst == sst) {
+                auto cfg = sstm.configure_writer(sst->get_origin());
+                co_await loading_sst->seal_sstable(cfg.backup);
+            }
+            co_return;
+        };
+        auto new_sstables = co_await t.add_new_sstable_and_update_cache(sst, on_add);
        blogger.info("stream_sstables[{}] Loaded sstable {} successfully", ops_id, sst->toc_filename());

        if (state == sstables::sstable_state::staging) {
@@ -64,7 +72,7 @@ static future<> load_sstable_for_tablet(const file_stream_id& ops_id, replica::d
            // so then, the view building coordinator can decide to process it once the migration
            // is finished.
            // (Instead of registering the sstable to view update generator which may process it immediately.)
-            co_await sharded_vbw.local().register_staging_sstable_tasks({sst}, t.schema()->id());
+            co_await sharded_vbw.local().register_staging_sstable_tasks(new_sstables, t.schema()->id());
        }
    });
 }
@@ -343,7 +351,11 @@ future<> stream_blob_handler(replica::database& db, db::view::view_building_work

        auto& table = db.find_column_family(meta.table);
        auto& sstm = table.get_sstables_manager();
-        auto sstable_sink = sstables::create_stream_sink(table.schema(), sstm, table.get_storage_options(), sstable_state(meta), meta.filename, meta.fops == file_ops::load_sstables);
+        // SSTable will be only sealed when added to the sstable set, so we make sure unsplit sstables aren't
+        // left sealed on the table directory.
+        sstables::sstable_stream_sink_cfg cfg { .last_component = meta.fops == file_ops::load_sstables,
+                                                .leave_unsealed = true };
+        auto sstable_sink = sstables::create_stream_sink(table.schema(), sstm, table.get_storage_options(), sstable_state(meta), meta.filename, cfg);
        auto out = co_await sstable_sink->output(foptions, stream_options);
        co_return output_result{
            [sstable_sink = std::move(sstable_sink), &meta, &db, &vbw](store_result res) -> future<> {
@@ -351,7 +363,7 @@ future<> stream_blob_handler(replica::database& db, db::view::view_building_work
                    co_await sstable_sink->abort();
                    co_return;
                }
-                auto sst = co_await sstable_sink->close_and_seal();
+                auto sst = co_await sstable_sink->close();
                if (sst) {
                    blogger.debug("stream_sstables[{}] Loading sstable {} on shard {}", meta.ops_id, sst->toc_filename(), meta.dst_shard_id);
                    auto desc = sst->get_descriptor(sstables::component_type::TOC);
--- a/test/boost/compaction_group_test.cc
+++ b/test/boost/compaction_group_test.cc
@@ -110,7 +110,7 @@ public:
    virtual compaction::compaction_strategy_state& get_compaction_strategy_state() noexcept override { return _compaction_strategy_state; }
    virtual reader_permit make_compaction_reader_permit() const override { return _semaphore.make_permit(); }
    virtual sstables::sstables_manager& get_sstables_manager() noexcept override { return _sst_man; }
-    virtual sstables::shared_sstable make_sstable() const override { return _sstable_factory(); }
+    virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const override { return _sstable_factory(); }
    virtual sstables::sstable_writer_config configure_writer(sstring origin) const override { return _sst_man.configure_writer(std::move(origin)); }
    virtual api::timestamp_type min_memtable_timestamp() const override { return api::min_timestamp; }
    virtual api::timestamp_type min_memtable_live_timestamp() const override { return api::min_timestamp; }
--- a/test/boost/cql_auth_query_test.cc
+++ b/test/boost/cql_auth_query_test.cc
@@ -387,4 +387,27 @@ SEASTAR_TEST_CASE(select_from_vector_indexed_table) {
            enable_tablets(db_config_with_auth()));
 }

+SEASTAR_TEST_CASE(select_from_vector_search_system_table) {
+    return do_with_cql_env_thread(
+            [](auto&& env) {
+                create_user_if_not_exists(env, bob);
+                with_user(env, bob, [&env] {
+                    BOOST_REQUIRE_EXCEPTION(env.execute_cql("SELECT * FROM system.group0_history").get(), exceptions::unauthorized_exception,
+                            exception_predicate::message_contains("User bob has none of the permissions (VECTOR_SEARCH_INDEXING, SELECT) on"));
+                });
+                with_user(env, bob, [&env] {
+                    BOOST_REQUIRE_EXCEPTION(env.execute_cql("SELECT * FROM system.versions").get(), exceptions::unauthorized_exception,
+                            exception_predicate::message_contains("User bob has none of the permissions (VECTOR_SEARCH_INDEXING, SELECT) on"));
+                });
+                cquery_nofail(env, "GRANT VECTOR_SEARCH_INDEXING ON ALL KEYSPACES TO bob");
+                with_user(env, bob, [&env] {
+                    cquery_nofail(env, "SELECT * FROM system.group0_history");
+                });
+                with_user(env, bob, [&env] {
+                    cquery_nofail(env, "SELECT * FROM system.versions");
+                });
+            },
+            db_config_with_auth());
+}
+
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/boost/sstable_compaction_test.cc
+++ b/test/boost/sstable_compaction_test.cc
@@ -6275,11 +6275,11 @@ SEASTAR_TEST_CASE(splitting_compaction_test) {

        auto& cm = t->get_compaction_manager();
        auto split_opt = compaction::compaction_type_options::split{classify_fn};
-        auto new_ssts = cm.maybe_split_sstable(input, t.as_compaction_group_view(), split_opt).get();
+        auto new_ssts = cm.maybe_split_new_sstable(input, t.as_compaction_group_view(), split_opt).get();
        BOOST_REQUIRE(new_ssts.size() == expected_output_size);
        for (auto& sst : new_ssts) {
            // split sstables don't require further split.
-            auto ssts = cm.maybe_split_sstable(sst, t.as_compaction_group_view(), split_opt).get();
+            auto ssts = cm.maybe_split_new_sstable(sst, t.as_compaction_group_view(), split_opt).get();
            BOOST_REQUIRE(ssts.size() == 1);
            BOOST_REQUIRE(ssts.front() == sst);
        }
@@ -6291,9 +6291,97 @@ SEASTAR_TEST_CASE(splitting_compaction_test) {
            }
            return classify_fn(t);
        };
-        BOOST_REQUIRE_THROW(cm.maybe_split_sstable(input, t.as_compaction_group_view(), compaction::compaction_type_options::split{throwing_classifier}).get(),
+        BOOST_REQUIRE_THROW(cm.maybe_split_new_sstable(input, t.as_compaction_group_view(), compaction::compaction_type_options::split{throwing_classifier}).get(),
                            std::runtime_error);
    });
 }

+SEASTAR_TEST_CASE(unsealed_sstable_compaction_test) {
+    BOOST_REQUIRE(smp::count == 1);
+    return test_env::do_with_async([] (test_env& env) {
+        auto s = schema_builder("tests", "unsealed_sstable_compaction_test")
+                .with_column("id", utf8_type, column_kind::partition_key)
+                .with_column("value", int32_type).build();
+
+        auto t = env.make_table_for_tests(s);
+        auto close_t = deferred_stop(t);
+        t->start();
+
+        mutation mut(s, partition_key::from_exploded(*s, {to_bytes("alpha")}));
+        mut.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 0);
+
+        sstable_writer_config sst_cfg = env.manager().configure_writer();
+        sst_cfg.leave_unsealed = true;
+        auto unsealed_sstable = make_sstable_easy(env, make_mutation_reader_from_mutations(s, env.make_reader_permit(), std::move(mut)), sst_cfg);
+
+        BOOST_REQUIRE(file_exists(unsealed_sstable->get_filename(sstables::component_type::TemporaryTOC).format()).get());
+
+        auto sst_gen = env.make_sst_factory(s);
+        auto info = compact_sstables(env, compaction::compaction_descriptor({ unsealed_sstable }), t, sst_gen).get();
+        BOOST_REQUIRE(info.new_sstables.size() == 1);
+    });
+}
+
+SEASTAR_TEST_CASE(sstable_clone_leaving_unsealed_dest_sstable) {
+    return test_env::do_with_async([] (test_env& env) {
+        simple_schema ss;
+        auto s = ss.schema();
+        auto pk = ss.make_pkey();
+
+        auto mut1 = mutation(s, pk);
+        mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
+        auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
+
+        auto table = env.make_table_for_tests(s);
+        auto close_table = deferred_stop(table);
+
+        sstables::sstable_generation_generator gen_generator;
+
+        bool leave_unsealed = true;
+        auto d = sst->clone(gen_generator(), leave_unsealed).get();
+
+        auto sst2 = env.make_sstable(s, d.generation, d.version, d.format);
+        sst2->load(s->get_sharder(), sstable_open_config{ .unsealed_sstable = leave_unsealed }).get();
+        BOOST_REQUIRE(!file_exists(sst2->get_filename(sstables::component_type::TOC).format()).get());
+        BOOST_REQUIRE(file_exists(sst2->get_filename(sstables::component_type::TemporaryTOC).format()).get());
+
+        leave_unsealed = false;
+        d = sst->clone(gen_generator(), leave_unsealed).get();
+
+        auto sst3 = env.make_sstable(s, d.generation, d.version, d.format);
+        sst3->load(s->get_sharder(), sstable_open_config{ .unsealed_sstable = leave_unsealed }).get();
+        BOOST_REQUIRE(file_exists(sst3->get_filename(sstables::component_type::TOC).format()).get());
+        BOOST_REQUIRE(!file_exists(sst3->get_filename(sstables::component_type::TemporaryTOC).format()).get());
+    });
+}
+
+SEASTAR_TEST_CASE(failure_when_adding_new_sstable_test) {
+    return test_env::do_with_async([] (test_env& env) {
+        simple_schema ss;
+        auto s = ss.schema();
+        auto pk = ss.make_pkey();
+
+        auto mut1 = mutation(s, pk);
+        mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
+        auto sst = make_sstable_containing(env.make_sstable(s), {mut1});
+
+        auto table = env.make_table_for_tests(s);
+        auto close_table = deferred_stop(table);
+
+        auto on_add = [] (sstables::shared_sstable) { throw std::runtime_error("fail to seal"); return make_ready_future<>(); };
+        BOOST_REQUIRE_THROW(table->add_new_sstable_and_update_cache(sst, on_add).get(), std::runtime_error);
+
+        // Verify new sstable was unlinked on failure.
+        BOOST_REQUIRE(!file_exists(sst->get_filename(sstables::component_type::Data).format()).get());
+
+        auto sst2 = make_sstable_containing(env.make_sstable(s), {mut1});
+        auto sst3 = make_sstable_containing(env.make_sstable(s), {mut1});
+        BOOST_REQUIRE_THROW(table->add_new_sstables_and_update_cache({sst2, sst3}, on_add).get(), std::runtime_error);
+
+        // Verify both sstables are unlinked on failure.
+        BOOST_REQUIRE(!file_exists(sst2->get_filename(sstables::component_type::Data).format()).get());
+        BOOST_REQUIRE(!file_exists(sst3->get_filename(sstables::component_type::Data).format()).get());
+    });
+}
+
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/cluster/conftest.py
+++ b/test/cluster/conftest.py
@@ -17,6 +17,7 @@ from concurrent.futures.thread import ThreadPoolExecutor
 from multiprocessing import Event
 from pathlib import Path
 from typing import TYPE_CHECKING
+from test import TOP_SRC_DIR, path_to
 from test.pylib.runner import testpy_test_fixture_scope
 from test.pylib.random_tables import RandomTables
 from test.pylib.util import unique_name
@@ -58,6 +59,20 @@ logger = logging.getLogger(__name__)
 print(f"Driver name {DRIVER_NAME}, version {DRIVER_VERSION}")


+async def decode_backtrace(build_mode: str, input: str):
+    executable = Path(path_to(build_mode, "scylla"))
+    proc = await asyncio.create_subprocess_exec(
+        (TOP_SRC_DIR / "seastar" / "scripts" / "seastar-addr2line").absolute(),
+        "-e",
+        executable.absolute(),
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await proc.communicate(input=input.encode())
+    return f"{stdout.decode()}\n{stderr.decode()}"
+
+
 def pytest_addoption(parser):
    parser.addoption('--manager-api', action='store',
                     help='Manager unix socket path')
@@ -243,6 +258,11 @@ async def manager(request: pytest.FixtureRequest,
    # test failure.
    report = request.node.stash[PHASE_REPORT_KEY]
    failed = report.when == "call" and report.failed
+
+    # Check if the test has the check_nodes_for_errors marker
+    found_errors = await manager_client.check_all_errors(check_all_errors=(request.node.get_closest_marker("check_nodes_for_errors") is not None))
+    failed = failed or found_errors
+
    if failed:
        # Save scylladb logs for failed tests in a separate directory and copy XML report to the same directory to have
        # all related logs in one dir.
@@ -266,10 +286,44 @@ async def manager(request: pytest.FixtureRequest,
    await manager_client.stop()  # Stop client session and close driver after each test
    if cluster_status["server_broken"]:
        pytest.fail(
-            f"test case {test_case_name} leave unfinished tasks on Scylla server. Server marked as broken,"
+            f"test case {test_case_name} left unfinished tasks on Scylla server. Server marked as broken,"
            f" server_broken_reason: {cluster_status["message"]}"
        )
+    if found_errors:
+        full_message = []
+        for server, data in found_errors.items():
+            summary = []
+            detailed = []

+            if criticals := data.get("critical", []):
+                summary.append(f"{len(criticals)} critical error(s)")
+                detailed.extend(map(str.rstrip, criticals))
+
+            if backtraces := data.get("backtraces", []):
+                summary.append(f"{len(backtraces)} backtrace(s)")
+                with open(failed_test_dir_path / f"scylla-{server.server_id}-backtraces.txt", "w") as bt_file:
+                    for backtrace in backtraces:
+                        bt_file.write(backtrace + "\n\n")
+                        decoded_bt = await decode_backtrace(build_mode, backtrace)
+                        bt_file.write(decoded_bt + "\n\n")
+                    detailed.append(f"{len(backtraces)} backtrace(s) saved in {Path(bt_file.name).name}")
+
+            if errors := data.get("error", []):
+                summary.append(f"{len(errors)} error(s)")
+                detailed.extend(map(str.rstrip, errors))
+
+            if cores := data.get("cores", []):
+                summary.append(f"{len(cores)} core(s): {', '.join(cores)}")
+
+            if summary:
+                summary_line = f"Server {server.server_id}: found {', '.join(summary)} (log: { data['log']})"
+                detailed = [f"  {line}" for line in detailed]
+                full_message.append(summary_line)
+                full_message.extend(detailed)
+
+        with open(failed_test_dir_path / "found_errors.txt", "w") as f:
+            f.write("\n".join(full_message))
+        pytest.fail(f"\n{'\n'.join(full_message)}")

 # "cql" fixture: set up client object for communicating with the CQL API.
 # Since connection is managed by manager just return that object
--- a/test/cluster/dtest/conftest.py
+++ b/test/cluster/dtest/conftest.py
@@ -110,6 +110,9 @@ def fixture_dtest_setup(request: FixtureRequest,
    except Exception as e:  # noqa: BLE001
        logger.error("Error stopping cluster: %s", str(e))

+    manager.ignore_log_patterns.extend(dtest_setup.ignore_log_patterns)
+    manager.ignore_cores_log_patterns.extend(dtest_setup.ignore_cores_log_patterns)
+
    try:
        if not dtest_setup.allow_log_errors:
            exclude_errors = []
--- a/test/cluster/lwt/lwt_common.py
+++ b/test/cluster/lwt/lwt_common.py
@@ -14,7 +14,7 @@ import time
 from collections import defaultdict
 from functools import cached_property
 from functools import wraps
-from typing import List, Dict, Callable
+from typing import List, Dict, Callable, Optional, Tuple

 from cassandra import ConsistencyLevel
 from cassandra import WriteTimeout, ReadTimeout, OperationTimedOut
@@ -64,6 +64,7 @@ class Worker:
    """
    A single worker increments its dedicated column `s{i}` via LWT:
      UPDATE .. SET s{i}=? WHERE pk=? IF <guards on other cols> AND s{i}=?
+      bump global phase-ops counter via on_applied()
    It checks for applied state and retries on "uncertainty" timeouts.
    """
    def __init__(
@@ -76,8 +77,10 @@ class Worker:
        other_columns: List[int],
        get_lower_bound: Callable[[int, int], int],
        on_applied: Callable[[int, int, int], None],
-        stop_event: asyncio.Event
-
+        stop_event: asyncio.Event,
+        counter_update_statement: Optional[PreparedStatement] = None,
+        counters_random_delta: bool = False,
+        counters_max_delta: int = 5,
    ):
        self.stop_event = stop_event
        self.success_counts: Dict[int, int] = {pk: 0 for pk in pks}
@@ -91,7 +94,11 @@ class Worker:
        self.cql = cql
        self.get_lower_bound = get_lower_bound
        self.on_applied = on_applied
-
+        # counters
+        self.counter_update_statement = counter_update_statement
+        self.counters_random_delta = counters_random_delta
+        self.counters_max_delta = max(1, counters_max_delta)
+        self.counter_deltas: Dict[int, int] = {pk: 0 for pk in pks}

    async def verify_update_through_select(self, pk, new_val, prev_val):
        """
@@ -106,6 +113,24 @@ class Worker:
        assert current_val == new_val or current_val == prev_val
        return current_val == new_val

+    def _next_counter_delta(self) -> int:
+        """
+        Compute the next delta to apply to the counter table.
+        If random mode is disabled -> always +1.
+        If random mode is enabled -> random value from
+            [-max_delta..-1] U [1..max_delta].
+        """
+        if not self.counters_random_delta:
+            return 1
+        # Avoid 0 by choosing magnitude in [1, max] and random sign.
+        mag = self.rng.randint(1, self.counters_max_delta)
+        sign = -1 if self.rng.random() < 0.5 else 1
+        return sign * mag
+
+    async def _inc_counter(self, pk: int, delta: int) -> None:
+        stmt = self.counter_update_statement.bind([delta, pk])
+        stmt.consistency_level = ConsistencyLevel.LOCAL_QUORUM
+        await self.cql.run_async(stmt)

    def stop(self) -> None:
        self.stop_event.set()
@@ -170,6 +195,11 @@ class Worker:
                    self.on_applied(pk, self.worker_id, new_val)
                    self.success_counts[pk] += 1

+                if self.counter_update_statement:
+                    delta = self._next_counter_delta()
+                    self.counter_deltas[pk] += delta
+                    await self._inc_counter(pk, delta)
+
                await asyncio.sleep(0.1)

            except Exception:
@@ -187,7 +217,8 @@ class BaseLWTTester:

    def __init__(
            self, manager: ManagerClient, ks: str, tbl: str,
-            num_workers: int = DEFAULT_WORKERS, num_keys: int = DEFAULT_NUM_KEYS
+            num_workers: int = DEFAULT_WORKERS, num_keys: int = DEFAULT_NUM_KEYS, use_counters: bool = False,
+            counters_random_delta: bool = False, counters_max_delta: int = 5, counter_tbl: Optional[str] = None
    ):
        self.ks = ks
        self.tbl = tbl
@@ -202,6 +233,12 @@ class BaseLWTTester:
        self.migrations = 0
        self.phase = "warmup"  # "warmup" -> "migrating" -> "post"
        self.phase_ops = defaultdict(int)
+        # counters config
+        self.use_counters = use_counters
+        self.counters_random_delta = counters_random_delta
+        self.counters_max_delta = counters_max_delta
+        self.counter_tbl = counter_tbl or (f"{tbl}_ctr" if use_counters else None)
+

    def _get_lower_bound(self, pk: int, col_idx: int) -> int:
        return self.lb_counts[pk][col_idx]
@@ -233,6 +270,14 @@ class BaseLWTTester:

    def create_workers(self, stop_event) -> List[Worker]:
        workers: List[Worker] = []
+
+        counter_stmt: Optional[PreparedStatement] = None
+        if self.use_counters:
+            counter_stmt = self.cql.prepare(
+                f"UPDATE {self.ks}.{self.counter_tbl} "
+                f"SET c = c + ? WHERE pk = ?"
+            )
+
        for i in range(self.num_workers):
            other_columns = [j for j in range(self.num_workers) if j != i]
            cond = " AND ".join([*(f"s{j} >= ?" for j in other_columns), f"s{i} = ?"])
@@ -247,6 +292,9 @@ class BaseLWTTester:
                other_columns=other_columns,
                get_lower_bound=self._get_lower_bound,
                on_applied=self._on_applied,
+                counter_update_statement=counter_stmt,
+                counters_random_delta=self.counters_random_delta,
+                counters_max_delta=self.counters_max_delta,
            )
            workers.append(worker)
        return workers
@@ -258,6 +306,11 @@ class BaseLWTTester:
            f"CREATE TABLE {self.ks}.{self.tbl} (pk int PRIMARY KEY, {cols_def})"
        )
        logger.info("Created table %s.%s with %d columns", self.ks, self.tbl, self.num_workers)
+        if self.use_counters:
+            await self.cql.run_async(
+                f"CREATE TABLE {self.ks}.{self.counter_tbl} (pk int PRIMARY KEY, c counter)"
+            )
+            logger.info("Created counter table %s.%s", self.ks, self.counter_tbl)

    async def initialize_rows(self):
        """
@@ -296,7 +349,7 @@ class BaseLWTTester:
            assert not errs, f"worker errors: {errs}"
        logger.info("All workers stopped")

-    async def verify_consistency(self):
+    async def _verify_base_table(self):
        """Ensure every (pk, column) reflects the number of successful CAS writes."""
        # Run SELECTs for all PKs in parallel using prepared statement
        tasks = []
@@ -320,6 +373,35 @@ class BaseLWTTester:
        total_ops = sum(sum(w.success_counts.values()) for w in self.workers)
        logger.info("Consistency verified – %d total successful CAS operations", total_ops)

+    async def _verify_counters(self):
+        if not self.use_counters:
+            return
+
+        stmt = SimpleStatement(
+            f"SELECT pk, c FROM {self.ks}.{self.counter_tbl}",
+            consistency_level=ConsistencyLevel.LOCAL_QUORUM,
+        )
+
+        rows = await self.cql.run_async(stmt)
+        db_values: Dict[int, int] = {row.pk: row.c for row in rows}
+
+        mismatches = []
+        for pk in self.pks:
+            actual = db_values.get(pk, 0)
+            expected = sum(worker.counter_deltas.get(pk, 0) for worker in self.workers)
+            if actual != expected:
+                mismatches.append(
+                    f"counter mismatch pk={pk} c={actual}, expected={expected}"
+                )
+
+        assert not mismatches, "Counter consistency violations: " + "; ".join(mismatches)
+        total_delta = sum(sum(worker.counter_deltas.values()) for worker in self.workers)
+        logger.info("Counter table consistency verified – total delta=%d", total_delta)
+
+    async def verify_consistency(self):
+        await self._verify_base_table()
+        await self._verify_counters()
+

 async def get_token_for_pk(cql, ks: str, tbl: str, pk: int) -> int:
    """Get the token for a given primary key"""
--- a/test/cluster/lwt/test_lwt_with_counters_during_tablets_resize_and_migrations.py
+++ b/test/cluster/lwt/test_lwt_with_counters_during_tablets_resize_and_migrations.py
@@ -0,0 +1,412 @@
+# Copyright (C) 2025-present ScyllaDB
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+
+import asyncio
+import logging
+import random
+import re
+import time
+from typing import Dict
+
+import pytest
+from test.cluster.conftest import skip_mode
+from test.cluster.lwt.lwt_common import (
+    BaseLWTTester,
+    DEFAULT_WORKERS,
+    DEFAULT_NUM_KEYS,
+    wait_for_tablet_count
+)
+from test.cluster.util import new_test_keyspace
+from test.pylib.manager_client import ManagerClient
+from test.pylib.rest_client import HTTPError
+from test.pylib.tablets import get_tablet_count
+from test.pylib.tablets import get_tablet_replicas
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+TARGET_RESIZE_COUNT = 20
+NUM_MIGRATIONS = 20
+WARMUP_LWT_CNT = 100
+POST_LWT_CNT = 100
+
+PHASE_WARMUP = "warmup"
+PHASE_RESIZE = "resize"
+PHASE_POST = "post"
+
+MIN_TABLETS = 1
+MAX_TABLETS = 20
+RESIZE_TIMEOUT = 240
+MIGRATE_ONE_TIMEOUT_S = 60
+NO_REPLICA_RE = re.compile(r"has no replica on", re.IGNORECASE)
+DST_REPLICA_RE = re.compile(r"has replica on", re.IGNORECASE)
+
+
+def _err_code(e: Exception):
+    return getattr(e, "code", None)
+
+def _err_text(e: Exception):
+    return getattr(e, "text", "") or str(e)
+
+def _is_tablet_in_transition_http_error(e: Exception) -> bool:
+    return isinstance(e, HTTPError) and _err_code(e) == 500 and "in transition" in _err_text(e).lower()
+
+def _is_no_replica_on_src_error(e: Exception) -> bool:
+    return isinstance(e, HTTPError) and _err_code(e) == 500  and NO_REPLICA_RE.search(_err_text(e)) is not None
+
+def _is_dst_already_replica_error(e: Exception) -> bool:
+    return isinstance(e, HTTPError) and _err_code(e) == 500 and DST_REPLICA_RE.search(_err_text(e)) is not None
+
+
+async def _move_tablet_with_retry(manager, src_server, ks, tbl,
+                                  src_host_id, src_shard, dst_host_id, dst_shard, token,
+                                  *, timeout_s=MIGRATE_ONE_TIMEOUT_S, base_sleep=0.1, max_sleep=2.0):
+    deadline = time.time() + timeout_s
+    sleep = base_sleep
+    while True:
+        try:
+            await manager.api.move_tablet(
+                src_server.ip_addr, ks, tbl,
+                src_host_id, src_shard, dst_host_id, dst_shard, token
+            )
+            return
+        except Exception as e:
+            if _is_tablet_in_transition_http_error(e) and time.time() + sleep < deadline:
+                logger.info("Token %s in transition, retry in %.2fs", token, sleep)
+                await asyncio.sleep(sleep + random.uniform(0, sleep))
+                sleep = min(sleep * 1.7, max_sleep)
+                continue
+            raise
+
+
+async def tablet_migration_ops(
+    stop_event: asyncio.Event,
+    manager: ManagerClient,
+    servers,
+    tester: BaseLWTTester,
+    table: str,
+    num_ops: int,
+    pause_range=(0.5, 2.0),
+    *,
+    server_properties,
+) -> None:
+    logger.info("Starting tablet migration ops for %s.%s: target=%d", tester.ks, table, num_ops)
+    migration_count = 0
+    intranode_ratio = 0.3
+
+    # server_id -> rack
+    server_id_to_rack: Dict[str, str] = {
+        s.server_id: prop["rack"] for s, prop in zip(servers, server_properties)
+    }
+    host_ids = await asyncio.gather(
+        *(manager.get_host_id(s.server_id) for s in servers)
+    )
+    # server_id -> host_id и host_id -> server
+    server_id_to_host_id: Dict[str, str] = {
+        s.server_id: hid for s, hid in zip(servers, host_ids)
+    }
+    host_id_to_server = {
+        hid: s for s, hid in zip(servers, host_ids)
+    }
+
+    attempt = 0
+    while not stop_event.is_set() and migration_count < num_ops:
+        attempt += 1
+        sample_pk = random.choice(tester.pks)
+        token = tester.pk_to_token[sample_pk]
+
+        replicas = await get_tablet_replicas(
+            manager, servers[0], tester.ks, table, token
+        )
+        src_host_id, src_shard = random.choice(replicas)
+        src_server = host_id_to_server.get(src_host_id)
+        assert src_server is not None, (
+            f"Source host_id {src_host_id} for token {token} not found in host_id_to_server (attempt {attempt})"
+        )
+
+        if random.random() < intranode_ratio:
+            dst_host_id = src_host_id
+            dst_server = src_server
+            dst_shard = 0 if src_shard != 0 else 1
+        else:
+            replica_hids = {h for (h, _sh) in replicas}
+            src_rack = server_id_to_rack[src_server.server_id]
+
+            same_rack_candidates = [
+                s for s in servers if server_id_to_rack[s.server_id] == src_rack
+                and server_id_to_host_id[s.server_id] not in replica_hids
+            ]
+
+            assert same_rack_candidates, (
+                f"No same-rack non-replica candidate for token {token} (attempt {attempt})"
+            )
+
+            dst_server = random.choice(same_rack_candidates)
+            dst_host_id = server_id_to_host_id[dst_server.server_id]
+            dst_shard = 0
+
+        try:
+            await _move_tablet_with_retry(
+                manager, src_server, tester.ks, table,
+                src_host_id, src_shard, dst_host_id, dst_shard, token,
+                timeout_s=60,
+            )
+
+            migration_count += 1
+            logger.info(
+                "Completed migration #%d (token=%s -> %s:%d) for %s.%s",
+                migration_count, token, dst_server.ip_addr, dst_shard, tester.ks, table,
+            )
+            await asyncio.sleep(random.uniform(*pause_range))
+            continue
+        except Exception as e:
+            if _is_tablet_in_transition_http_error(e):
+                logger.info("Token %s in transition, switching token (attempt %d)",
+                            token, attempt)
+                continue
+            if _is_no_replica_on_src_error(e) or _is_dst_already_replica_error(e):
+                logger.info("Src replica vanished for token %s, re-pick (attempt %d)",
+                            token, attempt)
+                continue
+            raise
+
+    assert migration_count == num_ops, f"Only completed {migration_count}/{num_ops} migrations for {tester.ks}.{table}"
+    logger.info("Completed tablet migration ops for %s.%s: %d/%d", tester.ks, table, migration_count, num_ops)
+
+
+def powers_of_two_in_range(lo: int, hi: int):
+    if lo > hi or hi < 1:
+        return []
+    lo = max(1, lo)
+    start_e = (lo - 1).bit_length()
+    end_e = hi.bit_length()
+    return [1 << e for e in range(start_e, end_e + 1) if (1 << e) <= hi]
+
+
+async def run_random_resizes(
+    stop_event_: asyncio.Event,
+    manager: ManagerClient,
+    servers,
+    tester: BaseLWTTester,
+    ks: str,
+    table: str,
+    counter_table: str,
+    target_steps: int = TARGET_RESIZE_COUNT,
+    pause_range=(0.5, 2.0),
+):
+    """
+    Perform randomized tablet count changes (splits/merges) on the main LWT table
+    and its counter table. Runs until target resize count is reached or stop_event_
+    is set. Returns a dict with simple stats.
+    """
+    split_count = 0
+    merge_count = 0
+    current_resize_count = 0
+    pow2_targets = powers_of_two_in_range(MIN_TABLETS, MAX_TABLETS)
+
+    while not stop_event_.is_set() and current_resize_count < target_steps:
+        # Drive resize direction from the main table.
+        current_main = await get_tablet_count(manager, servers[0], ks, table)
+
+        candidates = [t for t in pow2_targets if t != current_main]
+        target_cnt = random.choice(candidates)
+
+        direction = "split" if target_cnt > current_main else "merge"
+        logger.info(
+            "[%s] starting: %s.%s=%d, %s.%s -> target %d",
+            direction.upper(), ks, table, current_main, ks,
+            counter_table, target_cnt
+        )
+        tables = [table, counter_table]
+        # Apply ALTER TABLE to both tables.
+        for tbl in tables:
+            await tester.cql.run_async(
+                f"ALTER TABLE {ks}.{tbl} "
+                f"WITH tablets = {{'min_tablet_count': {target_cnt}}}"
+            )
+
+        if direction == "split":
+            predicate = lambda c, tgt=target_cnt: c >= tgt
+        else:
+            predicate = lambda c, tgt=target_cnt: c <= tgt
+
+        # Wait for both tables to converge.
+        main_after, counter_after = await asyncio.gather(
+            wait_for_tablet_count(
+                manager,
+                servers[0],
+                tester.ks,
+                table,
+                predicate=predicate,
+                target=target_cnt,
+                timeout_s=RESIZE_TIMEOUT,
+            ),
+            wait_for_tablet_count(
+                manager,
+                servers[0],
+                tester.ks,
+                counter_table,
+                predicate=predicate,
+                target=target_cnt,
+                timeout_s=RESIZE_TIMEOUT,
+            ),
+        )
+
+        # Sanity: both tables should end up with the same tablet count.
+        assert main_after == counter_after, (
+            f"Tablet counts diverged: {ks}.{table}={main_after}, "
+            f"{ks}.{counter_table}={counter_after}"
+        )
+
+        if direction == "split":
+            logger.info(
+                "[SPLIT] converged: %s.%s %d -> %d, %s.%s -> %d (target %d)",
+                ks, table, current_main, main_after, ks, counter_table,
+                counter_after, target_cnt
+            )
+            assert main_after >= current_main, (
+                f"Tablet count expected to increase during split "
+                f"(was {current_main}, now {main_after})"
+            )
+            split_count += 1
+        else:
+            logger.info(
+                "[MERGE] converged: %s.%s %d -> %d, %s.%s -> %d (target %d)",
+                ks, table, current_main, main_after, ks, counter_table,
+                counter_after, target_cnt
+            )
+            assert main_after <= current_main, (
+                f"Tablet count expected to decrease during merge "
+                f"(was {current_main}, now {main_after})"
+            )
+            merge_count += 1
+
+        current_resize_count += 1
+        await asyncio.sleep(random.uniform(*pause_range))
+
+    return {
+        "steps_done": current_resize_count,
+        "seen_split": split_count,
+        "seen_merge": merge_count,
+    }
+
+
+@pytest.mark.asyncio
+@skip_mode("debug", "debug mode is too slow for this test")
+async def test_multi_column_lwt_migrate_and_random_resizes(manager: ManagerClient):
+
+    cfg = {
+        "enable_tablets": True,
+        "tablet_load_stats_refresh_interval_in_seconds": 1,
+        "target-tablet-size-in-bytes": 1024 * 16,
+    }
+
+    properties = [
+        {"dc": "dc1", "rack": "r1"},
+        {"dc": "dc1", "rack": "r2"},
+        {"dc": "dc1", "rack": "r3"},
+        {"dc": "dc1", "rack": "r1"},
+        {"dc": "dc1", "rack": "r2"},
+        {"dc": "dc1", "rack": "r3"},
+    ]
+
+    cmdline = [
+        '--logger-log-level', 'paxos=trace', '--smp=2',
+    ]
+
+    servers = await manager.servers_add(6, config=cfg, property_file=properties, cmdline=cmdline)
+    
+    async with new_test_keyspace(
+        manager,
+        "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} "
+        "AND tablets = {'initial': 1}",
+    ) as ks:
+        stop_event_ = asyncio.Event()
+        table = "lwt_split_merge_table"
+        cnt_table = "lwt_split_merge_counters"
+        tester = BaseLWTTester(
+            manager,
+            ks,
+            table,
+            num_workers=DEFAULT_WORKERS,
+            num_keys=DEFAULT_NUM_KEYS,
+            use_counters=True,
+            counters_random_delta=True,
+            counters_max_delta=5,
+            counter_tbl=cnt_table,
+        )
+
+        await tester.create_schema()
+        await tester.initialize_rows()
+        await tester.start_workers(stop_event_)
+
+        try:
+            # PHASE: warmup
+            tester.set_phase(PHASE_WARMUP)
+            logger.info("LWT warmup: waiting for %d applied CAS", WARMUP_LWT_CNT)
+            await tester.wait_for_phase_ops(stop_event_, PHASE_WARMUP, WARMUP_LWT_CNT, timeout=180, poll=0.2)
+            logger.info("LWT warmup complete: %d ops", tester.get_phase_ops(PHASE_WARMUP))
+
+            # PHASE: resize + migrate
+            tester.set_phase(PHASE_RESIZE)
+            logger.info("Starting RESIZE (random powers-of-two) + %d migrations per table", NUM_MIGRATIONS)
+
+            resize_task = asyncio.create_task(
+                run_random_resizes(
+                    stop_event_=stop_event_,
+                    manager=manager,
+                    servers=servers,
+                    tester=tester,
+                    ks=ks,
+                    table=table,
+                    target_steps=TARGET_RESIZE_COUNT,
+                    counter_table=cnt_table,
+                )
+            )
+            migrate_task = asyncio.create_task(
+                tablet_migration_ops(
+                    stop_event_,
+                    manager, servers, tester,
+                    num_ops=NUM_MIGRATIONS,
+                    pause_range=(0.3, 1.0),
+                    server_properties=properties,
+                    table=table,
+                )
+            )
+            migrate_cnt_task = asyncio.create_task(
+                tablet_migration_ops(
+                    stop_event_,
+                    manager, servers, tester,
+                    num_ops=NUM_MIGRATIONS,
+                    pause_range=(0.3, 1.0),
+                    server_properties=properties,
+                    table=cnt_table
+                )
+            )
+
+            resize_stats = await resize_task
+            await asyncio.gather(migrate_task, migrate_cnt_task)
+
+            logger.info(
+                "Randomized resize stats: steps_done=%d, split=%d, merge=%d; LWT ops during resize=%d",
+                resize_stats["steps_done"], resize_stats["seen_split"], resize_stats["seen_merge"],
+                tester.get_phase_ops(PHASE_RESIZE),
+            )
+            assert resize_stats["steps_done"] >= 1, "Resize phase performed 0 steps"
+            assert tester.get_phase_ops(PHASE_RESIZE) > 0, "Expected LWT ops during RESIZE phase"
+
+            # PHASE: post
+            tester.set_phase(PHASE_POST)
+            logger.info("LWT post resize: waiting for %d applied CAS", POST_LWT_CNT)
+            await tester.wait_for_phase_ops(stop_event_, PHASE_POST, POST_LWT_CNT, timeout=180, poll=0.2)
+            logger.info("LWT post resize complete: %d ops", tester.get_phase_ops(PHASE_POST))
+
+            total_ops = sum(tester.phase_ops.values())
+            assert total_ops >= (WARMUP_LWT_CNT + POST_LWT_CNT), f"Too few total LWT ops: {total_ops}"
+
+        finally:
+            await tester.stop_workers()
+
+        await tester.verify_consistency()
+        logger.info("Combined LWT during random split/merge + migrations test completed successfully")
--- a/test/cluster/object_store/test_backup.py
+++ b/test/cluster/object_store/test_backup.py
@@ -131,8 +131,9 @@ async def test_backup_move(manager: ManagerClient, object_storage, move_files):


@pytest.mark.asyncio
-async def test_backup_to_non_existent_bucket(manager: ManagerClient, object_storage):
-    '''backup should fail if the destination bucket does not exist'''
+@pytest.mark.parametrize("ne_parameter", [ "endpoint", "bucket", "snapshot" ])
+async def test_backup_with_non_existing_parameters(manager: ManagerClient, object_storage, ne_parameter):
+    '''backup should fail if either of the parameters does not exist'''

    objconf = object_storage.create_endpoint_conf()
    cfg = {'enable_user_defined_functions': False,
@@ -142,7 +143,8 @@ async def test_backup_to_non_existent_bucket(manager: ManagerClient, object_stor
           }
    cmd = ['--logger-log-level', 'snapshots=trace:task_manager=trace:api=info']
    server = await manager.server_add(config=cfg, cmdline=cmd)
-    ks, cf = await prepare_snapshot_for_backup(manager, server)
+    backup_snap_name = 'backup'
+    ks, cf = await prepare_snapshot_for_backup(manager, server, snap_name = backup_snap_name)

    workdir = await manager.server_get_workdir(server.server_id)
    cf_dir = os.listdir(f'{workdir}/data/{ks}')[0]
@@ -150,39 +152,18 @@ async def test_backup_to_non_existent_bucket(manager: ManagerClient, object_stor
    assert len(files) > 0

    prefix = f'{cf}/backup'
-    tid = await manager.api.backup(server.ip_addr, ks, cf, 'backup', object_storage.address, "non-existant-bucket", prefix)
+    tid = await manager.api.backup(server.ip_addr, ks, cf,
+            backup_snap_name if ne_parameter != 'snapshot' else 'no-such-snapshot',
+            object_storage.address if ne_parameter != 'endpoint' else 'no-such-endpoint',
+            object_storage.bucket_name if ne_parameter != 'bucket' else 'no-such-bucket',
+            prefix)
    status = await manager.api.wait_task(server.ip_addr, tid)
    assert status is not None
    assert status['state'] == 'failed'
-    #assert 'S3 request failed. Code: 15. Reason: Access Denied.' in status['error']
+    if ne_parameter == 'endpoint':
+        assert status['error'] == 'std::invalid_argument (endpoint no-such-endpoint not found)'


-@pytest.mark.asyncio
-async def test_backup_to_non_existent_endpoint(manager: ManagerClient, object_storage):
-    '''backup should fail if the endpoint is invalid/inaccessible'''
-
-    objconf = object_storage.create_endpoint_conf()
-    cfg = {'enable_user_defined_functions': False,
-           'object_storage_endpoints': objconf,
-           'experimental_features': ['keyspace-storage-options'],
-           'task_ttl_in_seconds': 300
-           }
-    cmd = ['--logger-log-level', 'snapshots=trace:task_manager=trace']
-    server = await manager.server_add(config=cfg, cmdline=cmd)
-    ks, cf = await prepare_snapshot_for_backup(manager, server)
-
-    workdir = await manager.server_get_workdir(server.server_id)
-    cf_dir = os.listdir(f'{workdir}/data/{ks}')[0]
-    files = set(os.listdir(f'{workdir}/data/{ks}/{cf_dir}/snapshots/backup'))
-    assert len(files) > 0
-
-    prefix = f'{cf}/backup'
-    tid = await manager.api.backup(server.ip_addr, ks, cf, 'backup', "does_not_exist", object_storage.bucket_name, prefix)
-    status = await manager.api.wait_task(server.ip_addr, tid)
-    assert status is not None
-    assert status['state'] == 'failed'
-    assert status['error'] == 'std::invalid_argument (endpoint does_not_exist not found)'
-
 async def do_test_backup_abort(manager: ManagerClient, object_storage,
                               breakpoint_name, min_files, max_files = None):
    '''helper for backup abort testing'''
@@ -236,38 +217,6 @@ async def do_test_backup_abort(manager: ManagerClient, object_storage,
    assert max_files is None or uploaded_count < max_files


-@pytest.mark.asyncio
-async def test_backup_to_non_existent_snapshot(manager: ManagerClient, object_storage):
-    '''backup should fail if the snapshot does not exist'''
-
-    objconf = object_storage.create_endpoint_conf()
-    cfg = {'enable_user_defined_functions': False,
-           'object_storage_endpoints': objconf,
-           'experimental_features': ['keyspace-storage-options'],
-           'task_ttl_in_seconds': 300
-           }
-    cmd = ['--logger-log-level', 'snapshots=trace:task_manager=trace:api=info']
-    server = await manager.server_add(config=cfg, cmdline=cmd)
-    ks, cf = await prepare_snapshot_for_backup(manager, server)
-
-    prefix = f'{cf}/backup'
-    tid = await manager.api.backup(server.ip_addr, ks, cf, 'nonexistent-snapshot',
-                                   object_storage.address, object_storage.bucket_name, prefix)
-    # The task is expected to fail immediately due to invalid snapshot name.
-    # However, since internal implementation details may change, we'll wait for
-    # task completion if immediate failure doesn't occur.
-    actual_state = None
-    for status_api in [manager.api.get_task_status,
-                       manager.api.wait_task]:
-        status = await status_api(server.ip_addr, tid)
-        assert status is not None
-        actual_state = status['state']
-        if actual_state == 'failed':
-            break
-    else:
-        assert actual_state == 'failed'
-
-
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
 async def test_backup_is_abortable(manager: ManagerClient, object_storage):
--- a/test/cluster/test_change_ip.py
+++ b/test/cluster/test_change_ip.py
@@ -121,7 +121,7 @@ async def test_change_two(manager, random_tables, build_mode):
        await manager.server_update_config(servers[1].server_id, "error_injections_at_startup", ['sleep_before_start_gossiping'])
        await manager.server_update_config(servers[2].server_id, "error_injections_at_startup", ['sleep_before_start_gossiping'])
    await manager.server_start(servers[1].server_id)
-    servers[1] = ServerInfo(servers[1].server_id, s1_new_ip, s1_new_ip, servers[1].datacenter, servers[1].rack)
+    servers[1] = servers[1]._replace(ip_addr=s1_new_ip, rpc_address=s1_new_ip)
    if build_mode != 'release':
        s0_logs = await manager.server_open_log(servers[0].server_id)
        await s0_logs.wait_for('crash-before-prev-ip-removed hit, killing the node')
@@ -132,7 +132,7 @@ async def test_change_two(manager, random_tables, build_mode):
    await wait_proper_ips([servers[0], servers[1]])

    await manager.server_start(servers[2].server_id)
-    servers[2] = ServerInfo(servers[2].server_id, s2_new_ip, s2_new_ip, servers[2].datacenter, servers[2].rack)
+    servers[2] = servers[2]._replace(ip_addr=s2_new_ip, rpc_address=s2_new_ip)
    await reconnect_driver(manager)
    await wait_proper_ips([servers[0], servers[1], servers[2]])

--- a/test/cluster/test_crash_coordinator_before_streaming.py
+++ b/test/cluster/test_crash_coordinator_before_streaming.py
@@ -51,6 +51,9 @@ async def test_kill_coordinator_during_op(manager: ManagerClient) -> None:
    coordinators_ids = await get_coordinator_host_ids(manager)
    assert len(coordinators_ids) == 1, "At least 1 coordinator id should be found"

+    # Configure manager to ignore crashes caused by crash_coordinator_before_stream injection
+    manager.ignore_cores_log_patterns.append("crash_coordinator_before_stream: aborting")
+
    # kill coordinator during decommission
    logger.debug("Kill coordinator during decommission")
    coordinator_host = await get_coordinator_host(manager)
--- a/test/cluster/test_no_removed_node_event_on_ip_change.py
+++ b/test/cluster/test_no_removed_node_event_on_ip_change.py
@@ -48,7 +48,7 @@ async def test_no_removed_node_event_on_ip_change(manager: ManagerClient, caplog
        with test_cluster.connect() as test_cql:
            logger.info(f"starting the follower node {servers[1]}")
            await manager.server_start(servers[1].server_id)
-            servers[1] = ServerInfo(servers[1].server_id, s1_new_ip, s1_new_ip, servers[1].datacenter, servers[1].rack)
+            servers[1] = servers[1]._replace(ip_addr=s1_new_ip, rpc_address=s1_new_ip)

            logger.info("waiting for cql and hosts")
            await wait_for_cql_and_get_hosts(test_cql, servers, time.time() + 30)
--- a/test/cluster/test_tablets2.py
+++ b/test/cluster/test_tablets2.py
@@ -10,7 +10,7 @@ from cassandra.policies import FallthroughRetryPolicy
 from test.pylib.internal_types import HostID, ServerInfo, ServerNum
 from test.pylib.manager_client import ManagerClient
 from test.pylib.rest_client import inject_error_one_shot, HTTPError, read_barrier
-from test.pylib.util import wait_for_cql_and_get_hosts, unique_name
+from test.pylib.util import wait_for_cql_and_get_hosts, unique_name, wait_for
 from test.pylib.tablets import get_tablet_replica, get_all_tablet_replicas, get_tablet_count, TabletReplicas
 from test.cluster.conftest import skip_mode
 from test.cluster.util import reconnect_driver, create_new_test_keyspace, new_test_keyspace
@@ -1981,3 +1981,171 @@ async def test_timed_out_reader_after_cleanup(manager: ManagerClient):

        rows = await cql.run_async(f"SELECT pk from {ks}.test")
        assert len(list(rows)) == 1
+
+# This is a test and reproducer for https://github.com/scylladb/scylladb/issues/26041
+@pytest.mark.asyncio
+@pytest.mark.parametrize("repair_before_split", [False, True])
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_split_and_incremental_repair_synchronization(manager: ManagerClient, repair_before_split: bool):
+    logger.info('Bootstrapping cluster')
+    cfg = { 'enable_tablets': True,
+            'tablet_load_stats_refresh_interval_in_seconds': 1
+            }
+    cmdline = [
+        '--logger-log-level', 'load_balancer=debug',
+        '--logger-log-level', 'debug_error_injection=debug',
+        '--logger-log-level', 'compaction=debug',
+    ]
+    servers = await manager.servers_add(2, cmdline=cmdline, config=cfg, auto_rack_dc="dc1")
+
+    cql = manager.get_cql()
+
+    await manager.api.disable_tablet_balancing(servers[0].ip_addr)
+
+    initial_tablets = 2
+
+    async with new_test_keyspace(manager, f"WITH replication = {{'class': 'NetworkTopologyStrategy', 'replication_factor': 2}}") as ks:
+        await cql.run_async(f"CREATE TABLE {ks}.test (pk int PRIMARY KEY, c int) WITH tablets = {{'min_tablet_count': {initial_tablets}}};")
+
+        # insert data
+        pks = range(256)
+        await asyncio.gather(*[cql.run_async(f"INSERT INTO {ks}.test (pk, c) VALUES ({k}, {k});") for k in pks])
+
+        # flush the table
+        for server in servers:
+            await manager.api.flush_keyspace(server.ip_addr, ks)
+
+        s0_log = await manager.server_open_log(servers[0].server_id)
+        s0_mark = await s0_log.mark()
+        s1_log = await manager.server_open_log(servers[1].server_id)
+        s1_mark = await s1_log.mark()
+        expected_tablet_count = 4 # expected tablet count post split
+
+        async def run_split_prepare():
+            await manager.api.enable_injection(servers[0].ip_addr, 'tablet_resize_finalization_postpone', one_shot=False)
+
+            # force split on the test table
+            await cql.run_async(f"ALTER TABLE {ks}.test WITH tablets = {{'min_tablet_count': {expected_tablet_count}}}")
+
+            await s0_log.wait_for('Finalizing resize decision for table', from_mark=s0_mark)
+
+        async def generate_repair_work():
+            insert_stmt = cql.prepare(f"INSERT INTO {ks}.test (pk, c) VALUES (?, ?)")
+            insert_stmt.consistency_level = ConsistencyLevel.ONE
+
+            await manager.api.enable_injection(servers[0].ip_addr, "database_apply", one_shot=False)
+            pks = range(256, 512)
+            await asyncio.gather(*[cql.run_async(insert_stmt, (k, k)) for k in pks])
+            await manager.api.disable_injection(servers[0].ip_addr, "database_apply")
+
+        token = 'all'
+
+        await manager.api.enable_tablet_balancing(servers[0].ip_addr)
+
+        if repair_before_split:
+            await generate_repair_work()
+            for server in servers:
+                await manager.api.enable_injection(server.ip_addr, "incremental_repair_prepare_wait", one_shot=True)
+            repair_task = asyncio.create_task(manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental'))
+            await s0_log.wait_for('incremental_repair_prepare_wait: waiting', from_mark=s0_mark)
+            await s1_log.wait_for('incremental_repair_prepare_wait: waiting', from_mark=s1_mark)
+
+            await run_split_prepare()
+
+            for server in servers:
+                await manager.api.message_injection(server.ip_addr, "incremental_repair_prepare_wait")
+            await repair_task
+        else:
+            await run_split_prepare()
+            await generate_repair_work()
+            await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
+
+        await manager.api.disable_injection(servers[0].ip_addr, "tablet_resize_finalization_postpone")
+
+        async def finished_splitting():
+            tablet_count = await get_tablet_count(manager, servers[0], ks, 'test')
+            return tablet_count >= expected_tablet_count or None
+        # Give enough time for split to happen in debug mode
+        await wait_for(finished_splitting, time.time() + 120)
+
+        await manager.server_stop(servers[0].server_id)
+        await manager.server_start(servers[0].server_id)
+        hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
+        await manager.servers_see_each_other(servers)
+
+@pytest.mark.asyncio
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_split_and_intranode_synchronization(manager: ManagerClient):
+    logger.info('Bootstrapping cluster')
+    cfg = { 'enable_tablets': True,
+            'tablet_load_stats_refresh_interval_in_seconds': 1
+            }
+    cmdline = [
+        '--logger-log-level', 'load_balancer=debug',
+        '--logger-log-level', 'debug_error_injection=debug',
+        '--logger-log-level', 'compaction=debug',
+        '--smp', '2',
+    ]
+    servers = await manager.servers_add(1, cmdline=cmdline, config=cfg)
+    server = servers[0]
+
+    cql = manager.get_cql()
+
+    await manager.api.disable_tablet_balancing(servers[0].ip_addr)
+
+    initial_tablets = 1
+
+    async with new_test_keyspace(manager, f"WITH replication = {{'class': 'NetworkTopologyStrategy', 'replication_factor': 1}}") as ks:
+        await cql.run_async(f"CREATE TABLE {ks}.test (pk int PRIMARY KEY, c int) WITH tablets = {{'min_tablet_count': {initial_tablets}}};")
+
+        # insert data
+        pks = range(256)
+        await asyncio.gather(*[cql.run_async(f"INSERT INTO {ks}.test (pk, c) VALUES ({k}, {k});") for k in pks])
+
+        # flush the table
+        await manager.api.flush_keyspace(server.ip_addr, ks)
+
+        log = await manager.server_open_log(server.server_id)
+        mark = await log.mark()
+
+        tablet_token = 0 # Doesn't matter since there is one tablet
+        replica = await get_tablet_replica(manager, servers[0], ks, 'test', tablet_token)
+
+        host_id = await manager.get_host_id(server.server_id)
+        src_shard = replica[1]
+
+        # if tablet replica is at shard 0, move it to shard 1.
+        if src_shard == 0:
+            dst_shard = 1
+            await manager.api.move_tablet(server.ip_addr, ks, "test", replica[0], src_shard, replica[0], dst_shard, tablet_token)
+
+        await manager.api.enable_tablet_balancing(server.ip_addr)
+
+        await manager.api.enable_injection(server.ip_addr, 'tablet_resize_finalization_postpone', one_shot=False)
+        await manager.api.enable_injection(server.ip_addr, "split_sstable_force_stop_exception", one_shot=False)
+
+        # force split on the test table
+        expected_tablet_count = initial_tablets * 2
+        await cql.run_async(f"ALTER TABLE {ks}.test WITH tablets = {{'min_tablet_count': {expected_tablet_count}}}")
+
+        # Check that shard 0 ACKed split.
+        mark, _ = await log.wait_for('Setting split ready sequence number to', from_mark=mark)
+
+        # Move tablet replica back to shard 0, where split was already ACKed.
+        src_shard = 1
+        dst_shard = 0
+        migration_task = asyncio.create_task(manager.api.move_tablet(server.ip_addr, ks, "test", replica[0], src_shard, replica[0], dst_shard, tablet_token))
+
+        mark, _ = await log.wait_for("Finished intra-node streaming of tablet", from_mark=mark)
+
+        await manager.api.stop_compaction(server.ip_addr, "SPLIT")
+
+        await migration_task
+
+        await manager.api.disable_injection(server.ip_addr, "tablet_resize_finalization_postpone")
+
+        async def finished_splitting():
+            tablet_count = await get_tablet_count(manager, server, ks, 'test')
+            return tablet_count >= expected_tablet_count or None
+        # Give enough time for split to happen in debug mode
+        await wait_for(finished_splitting, time.time() + 120)
--- a/test/cqlpy/test_cast.py
+++ b/test/cqlpy/test_cast.py
@@ -74,7 +74,6 @@ def test_cast_int_literal_with_type_hint_to_blob(cql, table1, scylla_only):
 # An int can always be converted to a valid blob, but blobs might have wrong amount of bytes
 # and can't be converted to a valid int.
 def test_cast_blob_literal_to_int(cql, table1):
-    pk = unique_key_int()
    with pytest.raises(InvalidRequest, match='HEX'):
        cql.execute(f"INSERT INTO {table1} (pk) VALUES (0xBAAAAAAD)")
    with pytest.raises(InvalidRequest, match='blob'):
--- a/test/cqlpy/test_clustering_order.py
+++ b/test/cqlpy/test_clustering_order.py
@@ -61,7 +61,7 @@ def test_select_default_order(cql, table_int_desc):
 def test_multi_column_relation_desc(cql, table2):
    k = unique_key_int()
    stmt = cql.prepare(f'INSERT INTO {table2} (p, c1, c2) VALUES (?, ?, ?)')
-    cql.execute(stmt, [0, 1, 0])
-    cql.execute(stmt, [0, 1, 1])
-    cql.execute(stmt, [0, 1, 2])
-    assert [(1, 2), (1, 1)] == list(cql.execute(f'SELECT c1,c2 FROM {table2} WHERE p = 0 AND (c1, c2) >= (1, 1)'))
+    cql.execute(stmt, [k, 1, 0])
+    cql.execute(stmt, [k, 1, 1])
+    cql.execute(stmt, [k, 1, 2])
+    assert [(1, 2), (1, 1)] == list(cql.execute(f'SELECT c1,c2 FROM {table2} WHERE p = {k} AND (c1, c2) >= (1, 1)'))
--- a/test/cqlpy/test_keyspace.py
+++ b/test/cqlpy/test_keyspace.py
@@ -352,7 +352,7 @@ def test_storage_options_alter_type(cql, scylla_only):
        ksdef_local = "WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : '1' } " \
            "AND STORAGE = { 'type' : 'S3', 'bucket' : '/b1', 'endpoint': 'localhost'}"
        with pytest.raises(InvalidRequest):
-            res = cql.execute(f"ALTER KEYSPACE {keyspace} {ksdef_local}")
+            cql.execute(f"ALTER KEYSPACE {keyspace} {ksdef_local}")

 # Reproducer for scylladb#14139
 def test_alter_keyspace_preserves_udt(cql):
--- a/test/cqlpy/test_permissions.py
+++ b/test/cqlpy/test_permissions.py
@@ -171,7 +171,6 @@ def test_grant_revoke_data_permissions(cql, test_keyspace):
 # Test that permissions for user-defined functions are serialized in a Cassandra-compatible way
 def test_udf_permissions_serialization(cql):
    schema = "a int primary key"
-    user = "cassandra"
    with new_test_keyspace(cql, "WITH REPLICATION = { 'class': 'NetworkTopologyStrategy', 'replication_factor': 1 }") as keyspace, new_user(cql) as user:
        with new_test_table(cql, keyspace, schema) as table:
            # Creating a bilingual function makes this test case work for both Scylla and Cassandra
@@ -247,7 +246,6 @@ def test_udf_permissions_quoted_names(cassandra_bug, cql):
 # permissions. Cassandra erroneously reports the unrelated missing permissions.
 # Reported to Cassandra as CASSANDRA-19005.
 def test_drop_udf_with_same_name(cql, cassandra_bug):
-    schema = "a int primary key"
    with new_test_keyspace(cql, "WITH REPLICATION = { 'class': 'NetworkTopologyStrategy', 'replication_factor': 1 }") as keyspace:
        body1_lua = "(i int) CALLED ON NULL INPUT RETURNS bigint LANGUAGE lua AS 'return 42;'"
        body1_java = "(i int) CALLED ON NULL INPUT RETURNS bigint LANGUAGE java AS 'return 42L;'"
@@ -288,7 +286,6 @@ def test_drop_udf_with_same_name(cql, cassandra_bug):
 # Tests for ALTER are separate, because they are qualified as cassandra_bug
 def test_grant_revoke_udf_permissions(cql):
    schema = "a int primary key, b list<int>"
-    user = "cassandra"
    with new_test_keyspace(cql, "WITH REPLICATION = { 'class': 'NetworkTopologyStrategy', 'replication_factor': 1 }") as keyspace:
        with new_test_table(cql, keyspace, schema) as table:
            fun_body_lua = "(i int, l list<int>) CALLED ON NULL INPUT RETURNS int LANGUAGE lua AS 'return 42;'"
@@ -335,7 +332,6 @@ def test_grant_revoke_udf_permissions(cql):
 # and yet it's not enforced
 def test_grant_revoke_alter_udf_permissions(cassandra_bug, cql):
    schema = "a int primary key"
-    user = "cassandra"
    with new_test_keyspace(cql, "WITH REPLICATION = { 'class': 'SimpleStrategy', 'replication_factor': 1 }") as keyspace:
        with new_test_table(cql, keyspace, schema) as table:
            fun_body_lua = "(i int) CALLED ON NULL INPUT RETURNS int LANGUAGE lua AS 'return 42;'"
--- a/test/cqlpy/test_service_levels.py
+++ b/test/cqlpy/test_service_levels.py
@@ -90,8 +90,6 @@ def test_attached_service_level(scylla_only, cql):
        assert res_one.role == cql.cluster.auth_provider.username and res_one.service_level == sl

 def test_list_effective_service_level(scylla_only, cql):
-    sl1 = "sl1"
-    sl2 = "sl2"
    timeout = "10s"
    workload_type = "batch"

@@ -120,8 +118,6 @@ def test_list_effective_service_level(scylla_only, cql):
                            assert row.value == "batch"

 def test_list_effective_service_level_shares(scylla_only, cql):
-    sl1 = "sl1"
-    sl2 = "sl2"
    shares1 = 500
    shares2 = 200

@@ -184,8 +180,6 @@ def test_default_shares_in_listings(scylla_only, cql):
 # and that the messages Scylla returns are informative.
 def test_manipulating_default_service_level(cql, scylla_only):
    default_sl = "default"
-    # Service levels are case-sensitive (if used with quotation marks).
-    fake_default_sl = '"DeFaUlT"'

    with new_user(cql) as role:
        # Creation.
--- a/test/cqlpy/test_virtual_tables.py
+++ b/test/cqlpy/test_virtual_tables.py
@@ -76,6 +76,7 @@ def test_clients(scylla_only, cql):
        'ssl_enabled',
        'ssl_protocol',
        'username',
+        'client_options',
    ])
    cls = list(cql.execute(f"SELECT {columns} FROM system.clients"))
    # There must be at least one connection - the one that sent this SELECT
@@ -84,6 +85,9 @@ def test_clients(scylla_only, cql):
    for cl in cls:
        assert(cl[0] == '127.0.0.1')
        assert(cl[2] == 'cql')
+        client_options = cl[13]
+        assert(client_options.get('DRIVER_NAME') == cl[4])
+        assert(client_options.get('DRIVER_VERSION') == cl[5])

 # We only want to check that the table exists with the listed columns, to assert
 # backwards compatibility.
--- a/test/cqlpy/test_wasm.py
+++ b/test/cqlpy/test_wasm.py
@@ -23,7 +23,7 @@ def scylla_with_wasm_only(scylla_only, cql, test_keyspace):
    try:
        f42 = unique_name()
        f42_body = f'(module(func ${f42} (param $n i64) (result i64)(return i64.const 42))(export "{f42}" (func ${f42})))'
-        res = cql.execute(f"CREATE FUNCTION {test_keyspace}.{f42} (input int) RETURNS NULL ON NULL INPUT RETURNS int LANGUAGE wasm AS '{f42_body}'")
+        cql.execute(f"CREATE FUNCTION {test_keyspace}.{f42} (input int) RETURNS NULL ON NULL INPUT RETURNS int LANGUAGE wasm AS '{f42_body}'")
        cql.execute(f"DROP FUNCTION {test_keyspace}.{f42}")
    except NoHostAvailable as err:
        if "not enabled" in str(err):
@@ -373,8 +373,7 @@ def test_pow(cql, test_keyspace, table1, scylla_with_wasm_only):
        assert len(res) == 1 and res[0].result == 177147

 # Test that only compilable input is accepted
-def test_compilable(cql, test_keyspace, table1, scylla_with_wasm_only):
-    table = table1
+def test_compilable(cql, test_keyspace, scylla_with_wasm_only):
    wrong_source = f"""
 Dear wasmtime compiler, please return a function which returns its float argument increased by 1
 """
@@ -384,8 +383,7 @@ Dear wasmtime compiler, please return a function which returns its float argumen

 # Test that not exporting a function with matching name
 # results in an error
-def test_not_exported(cql, test_keyspace, table1, scylla_with_wasm_only):
-    table = table1
+def test_not_exported(cql, test_keyspace, scylla_with_wasm_only):
    wrong_source = f"""
 (module
  (type (;0;) (func (param f32) (result f32)))
@@ -403,8 +401,7 @@ def test_not_exported(cql, test_keyspace, table1, scylla_with_wasm_only):
                f"AS '{wrong_source}'")

 # Test that trying to use something that is exported, but is not a function, won't work
-def test_not_a_function(cql, test_keyspace, table1, scylla_with_wasm_only):
-    table = table1
+def test_not_a_function(cql, test_keyspace, scylla_with_wasm_only):
    wrong_source = f"""
 (module
  (type (;0;) (func (param f32) (result f32)))
--- a/test/lib/CMakeLists.txt
+++ b/test/lib/CMakeLists.txt
@@ -1,5 +1,8 @@
 add_library(test-lib STATIC)
 target_sources(test-lib
+  PUBLIC
+    boost_test_tree_lister.cc
+    boost_tree_lister_injector.cc
  PRIVATE
    cql_assertions.cc
    dummy_sharder.cc
--- a/test/lib/boost_test_tree_lister.cc
+++ b/test/lib/boost_test_tree_lister.cc
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "test/lib/boost_test_tree_lister.hh"
+
+#include <boost/algorithm/string/replace.hpp>
+#include <fmt/ranges.h>
+
+#include <memory>
+#include <ranges>
+
+namespace {
+
+using label_info = internal::label_info;
+
+using test_case_info = internal::test_case_info;
+using test_suite_info = internal::test_suite_info;
+using test_file_info = internal::test_file_info;
+
+} // anonymous namespace
+
+/// --------------------
+///
+/// Implementation notes
+///
+/// --------------------
+///
+/// The structure of the Boost.Test's test tree consists solely
+/// of nodes representing test suites and test cases. It ignores
+/// information like, for instance, the name of the file those
+/// entities reside [1].
+///
+/// What's more, a test suite can span multiple files as long
+/// as it has the same name [2].
+///
+/// We'd like to re-visualize the tree in a different manner:
+/// have a forest, where each tree represents the internal structure
+/// of a specific file. The non-leaf nodes represent test suites,
+/// and the leaves -- test cases.
+///
+/// This type achieves that very goal (albeit in a bit ugly manner).
+///
+/// ---
+///
+/// Note that the implementation suffers from the same problems
+/// Boost.Test itself does. For instance, when parametrizing tests
+/// with `boost::unit_test::data`, the test will appear as a test suite,
+/// while cases for each of the data instances -- as test cases.
+/// There's no way to overcome that, so we're stuck with it.
+///
+/// -----------
+///
+/// Assumptions
+///
+/// -----------
+///
+/// We rely on the following assumptions:
+///
+/// 1. The tree traversal is performed pre-order. That's the case for
+///    Boost.Test 1.89.0.
+/// 2. If a test case TC belong to a test suite TS (directly or indirectly),
+///    the following execution order holds:
+///    i.   `test_suite_start(TC)`,
+///    ii.  `visit(TC)`,
+///    iii. `test_suite_finish(TC)`.
+/// 3. If test suite TS1 is nested within test suite TS2, the following
+///    execution order holds:
+///    i.   `test_suite_start(TS1)`,
+///    ii.  `test_suite_start(TS2)`,
+///    iii. `test_suite_finish(TS2)`,
+///    iv.  `test_suite_finish(TS1)`.
+///
+/// ----------
+///
+/// References
+///
+/// ----------
+///
+/// [1] https://www.boost.org/doc/libs/1_89_0/libs/test/doc/html/boost_test/tests_organization/test_tree.html
+/// [2] https://www.boost.org/doc/libs/1_89_0/libs/test/doc/html/boost_test/tests_organization/test_tree/test_suite.html
+///
+/// ----------------------------
+///
+/// Example of high-level output
+///
+/// ----------------------------
+///
+/// Let's consider the following organization of tests.
+///
+/// TestFile1.cc:
+/// - Suite A:
+///   - Suite A1:
+///     - Test A1.1 (labels: L1)
+///     - Test A1.2
+///   - Suite A2:
+///     - Test A2.1
+///   - Test A.1
+/// - Suite B:
+///   - Test B1
+///   - Test B2 (labels: L2, L3)
+/// - Test 1
+///
+/// TestFile2.cc:
+/// - Suite A:
+///   - Suite A3
+///     - Test A3.1
+///   - Test A.2
+/// - Suite C:
+///   - Test C.1
+/// - Test 2
+///
+/// This structure will be translated into the following JSON (we're
+/// omitting some details to make it cleaner and easier to read):
+///
+/// [
+///   {
+///     "file": "TestFile1.cc",
+///     "content": {
+///       "suites": [
+///         {
+///           "name": "A",
+///           "suites": [
+///             {
+///               "name": "A1",
+///               "suites": [],
+///               "tests": [
+///                 {
+///                   "name": "Test A1.1",
+///                   "labels": "L1"
+///                 },
+///                 {
+///                   "name": "Test A1.2",
+///                   "labels": ""
+///                 }
+///               ]
+///             }
+///           ],
+///           "tests": [
+///             {
+///               "name": "Test1",
+///               "labels": ""
+///             }
+///           ]
+///         },
+///         {
+///           "name": "B",
+///           "suites": [],
+///           "tests": [
+///             {
+///               "name": "Test B1",
+///               "labels": ""
+///             },
+///             {
+///               "name": "Test B2",
+///               "labels": "L2,L3"
+///             },
+///           ]
+///         }
+///       ],
+///       "tests": [
+///         {
+///           "name": "Test 1",
+///           "labels": ""
+///         }
+///       ]
+///     }
+///   },
+///   {
+///     "file": "TestFile2.cc",
+///     "content": {
+///       "suites": [
+///         {
+///           "name": "A",
+///           "suites": [
+///             {
+///               "name": "A3",
+///               "suites": [],
+///               "tests": [
+///                 {
+///                   "name": "Test A3.1",
+///                   "labels": ""
+///                 }
+///               ]
+///             }
+///           ],
+///           "tests": [
+///             {
+///               "name": "Test A.2",
+///               "labels": ""
+///             }
+///           ]
+///         },
+///         {
+///           "name": "C",
+///           "suites": [],
+///           "tests": [
+///             {
+///               "name": "Test C.1",
+///               "labels": ""
+///             }
+///           ]
+///         }
+///       ],
+///       "tests": [
+///         {
+///           "name": "Test 2",
+///           "labels": ""
+///         }
+///       ]
+///     }
+///   }
+/// ]
+///
+/// Note that although Boost.Test treats Suite A in TestFile1.cc
+/// and Suite A in TestFile2.cc as the SAME suite, we consider it
+/// separately for each of the files it resides in.
+struct boost_test_tree_lister::impl {
+public:
+    /// The final result we're building while traversing the test tree.
+    test_file_forest file_forest;
+    /// The path from the root to the current suite.
+    std::vector<std::string> active_suites;
+
+public:
+    void process_test_case(const boost::unit_test::test_case& tc) {
+        const std::string_view filename = {tc.p_file_name.begin(), tc.p_file_name.end()};
+        test_file_info& test_file = get_file_info(filename);
+
+        std::string test_name = tc.p_name;
+        std::vector<label_info> labels = tc.p_labels.get();
+
+        test_case_info test_info {.name = std::move(test_name), .labels = std::move(labels)};
+
+        if (active_suites.empty()) {
+            test_file.free_tests.push_back(std::move(test_info));
+        } else {
+            test_suite_info& suite_info = get_active_suite(filename);
+            suite_info.tests.push_back(std::move(test_info));
+        }
+    }
+
+    bool test_suite_start(const boost::unit_test::test_suite& ts) {
+        // The suite is the master test suite, so let's ignore it
+        // because it doesn't represent any actual test suite.
+        if (ts.p_parent_id == boost::unit_test::INV_TEST_UNIT_ID) {
+            assert(active_suites.empty());
+            return true;
+        }
+
+        std::string suite_name = ts.p_name.value;
+        add_active_suite(std::move(suite_name));
+
+        return true;
+    }
+
+    void test_suite_finish(const boost::unit_test::test_suite& ts) {
+        // The suite is the master test suite, so let's ignore it
+        // because it doesn't represent any actual test suite.
+        if (ts.p_parent_id == boost::unit_test::INV_TEST_UNIT_ID) {
+            assert(active_suites.empty());
+            return;
+        }
+
+        drop_active_suite();
+    }
+
+private:
+    test_file_info& get_file_info(std::string_view filename) {
+        auto& test_files = file_forest.test_files;
+
+        auto it = test_files.find(filename);
+        if (it == test_files.end()) {
+            std::tie(it, std::ignore) = test_files.emplace(filename, std::vector<test_suite_info>{});
+        }
+
+        return it->second;
+    }
+
+    void add_active_suite(std::string suite_name) {
+        active_suites.push_back(std::move(suite_name));
+    }
+
+    void drop_active_suite() {
+        assert(!active_suites.empty());
+        active_suites.pop_back();
+    }
+
+    test_suite_info& get_active_suite(std::string_view filename) {
+        assert(!active_suites.empty());
+
+        test_file_info& file_info = get_file_info(filename);
+        test_suite_info* last = &get_root_suite(file_info, active_suites[0]);
+
+        for (const auto& suite_name : active_suites | std::views::drop(1)) {
+            last = &get_subsuite(*last, suite_name);
+        }
+
+        return *last;
+    }
+
+    test_suite_info& get_root_suite(test_file_info& file_info, std::string_view suite_name) {
+        auto suite_it = std::ranges::find(file_info.suites, suite_name, &test_suite_info::name);
+        if (suite_it != file_info.suites.end()) {
+            return *suite_it;
+        }
+
+        test_suite_info suite_info {.name = std::string(suite_name)};
+        file_info.suites.push_back(std::move(suite_info));
+
+        return *file_info.suites.rbegin();
+    }
+
+    test_suite_info& get_subsuite(test_suite_info& parent, std::string_view suite_name) {
+        auto suite_it = std::ranges::find(parent.subsuites, suite_name, [] (auto&& suite_ptr) -> std::string_view {
+            return suite_ptr->name;
+        });
+
+        if (suite_it != parent.subsuites.end()) {
+            return **suite_it;
+        }
+
+        auto suite = std::make_unique<test_suite_info>(std::string(suite_name));
+        parent.subsuites.push_back(std::move(suite));
+
+        return **parent.subsuites.rbegin();
+    }
+};
+
+boost_test_tree_lister::boost_test_tree_lister() : _impl(std::make_unique<impl>()) {}
+boost_test_tree_lister::~boost_test_tree_lister() noexcept = default;
+
+const test_file_forest& boost_test_tree_lister::get_result() const {
+    return _impl->file_forest;
+}
+
+void boost_test_tree_lister::visit(const boost::unit_test::test_case& tc) {
+    return _impl->process_test_case(tc);
+}
+
+bool boost_test_tree_lister::test_suite_start(const boost::unit_test::test_suite& ts) {
+    return _impl->test_suite_start(ts);
+}
+
+void boost_test_tree_lister::test_suite_finish(const boost::unit_test::test_suite& ts) {
+    return _impl->test_suite_finish(ts);
+}
+
+// Replace every occurrenace of a double quotation mark (`"`) with a string `\"`.
+static std::string escape_quotation_marks(std::string_view str) {
+    const std::size_t double_quotation_count = std::ranges::count(str, '"');
+    std::string result(str.size() + double_quotation_count, '\\');
+
+    std::size_t offset = 0;
+    for (std::size_t i = 0; i < str.size(); ++i) {
+        if (str[i] == '"') {
+            result[i + offset] = '\\';
+            ++offset;
+        }
+        result[i + offset] = str[i];
+    }
+
+    return result;
+}
+
+auto fmt::formatter<internal::test_case_info>::format(
+        const internal::test_case_info& test_info,
+        fmt::format_context& ctx) const -> decltype(ctx.out())
+{
+    // Sanity check. The names of tests are expected to comprise only of alphanumeric characters.
+    assert(std::ranges::count(test_info.name, '"') == 0);
+    auto label_range = test_info.labels | std::views::transform(escape_quotation_marks);
+
+    return fmt::format_to(ctx.out(), R"({{"name":"{}","labels":"{}"}})",
+            test_info.name, fmt::join(label_range, ","));
+}
+
+auto fmt::formatter<internal::test_suite_info>::format(
+        const internal::test_suite_info& suite_info,
+        fmt::format_context& ctx) const -> decltype(ctx.out())
+{
+    auto actual_suite_range = suite_info.subsuites | std::views::transform([] (auto&& ptr) -> const test_suite_info& {
+        return *ptr;
+    });
+    auto suite_range = fmt::join(actual_suite_range, ",");
+    auto test_range = fmt::join(suite_info.tests, ",");
+    return fmt::format_to(ctx.out(), R"({{"name":"{}","suites":[{}],"tests":[{}]}})",
+            suite_info.name, std::move(suite_range), std::move(test_range));
+}
+
+auto fmt::formatter<internal::test_file_info>::format(
+        const internal::test_file_info& file_info,
+        fmt::format_context& ctx) const -> decltype(ctx.out())
+{
+    auto suite_range = fmt::join(file_info.suites, ",");
+    auto test_range = fmt::join(file_info.free_tests, ",");
+    return fmt::format_to(ctx.out(), R"({{"suites":[{}],"tests":[{}]}})",
+            std::move(suite_range), std::move(test_range));
+}
+
+auto fmt::formatter<internal::test_file_forest>::format(
+        const internal::test_file_forest& forest_info,
+        fmt::format_context& ctx) const -> decltype(ctx.out())
+{
+    std::size_t files_left = forest_info.test_files.size();
+
+    fmt::format_to(ctx.out(), "[");
+    for (const auto& [file, content] : forest_info.test_files) {
+        fmt::format_to(ctx.out(), R"({{"file":"{}","content":{}}})",
+                file, content);
+        if (files_left > 1) {
+            fmt::format_to(ctx.out(), ",");
+        }
+        --files_left;
+
+    }
+    return fmt::format_to(ctx.out(), "]");
+}
--- a/test/lib/boost_test_tree_lister.hh
+++ b/test/lib/boost_test_tree_lister.hh
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include <boost/test/tree/visitor.hpp>
+
+#include <fmt/base.h>
+
+#include <memory>
+
+namespace internal {
+
+using label_info = std::string;
+
+/// Type representing a single Boost test case.
+struct test_case_info {
+    /// The name of the test case.
+    std::string name;
+    /// The labels the test was marked with.
+    std::vector<label_info> labels;
+};
+
+/// Type representing a single Boost test suite within a single file.
+///
+/// Note that a single suite can span multiple files (as of Boost.Test 1.89.0); see:
+/// https://www.boost.org/doc/libs/1_89_0/libs/test/doc/html/boost_test/tests_organization/test_tree/test_suite.html.
+///
+/// We turn away from that convention and list suites from different files separately.
+/// However, that doesn't change the fact that it's still the same suite from the
+/// perspective of Boost.Test. In particular, if a suite is marked with a label,
+/// it's applied to it globally.
+struct test_suite_info {
+    std::string name;
+    std::vector<std::unique_ptr<test_suite_info>> subsuites;
+    /// The tests belonging directly to this suite.
+    std::vector<test_case_info> tests;
+};
+
+struct test_file_info {
+    std::vector<test_suite_info> suites;
+    std::vector<test_case_info> free_tests;
+};
+
+struct test_file_forest {
+    std::map<std::string, test_file_info, std::less<>> test_files;
+};
+
+} // namespace internal
+
+using test_file_forest = internal::test_file_forest;
+
+/// Implementation of the `boost::unit_test::test_tree_visitor` that
+/// produces a similar result to running a Boost.Test executable with
+/// `--list_content=HRF` or `--list_content=DOT`. This type results
+/// in the JSON format of the output.
+///
+/// The crucial difference between this implementation and the built-in
+/// HRF and DOT ones is that the result obtained by a call to `get_result()`
+/// (after the traversal has finished) is going to have a different structure.
+///
+/// The type `boost_test_tree_lister` will treat the same suite from different
+/// files as separate ones, even if they share the name. Boost.Test would treat
+/// them as the same one and group the results by suites. In other words,
+/// this type groups results by (in order):
+///
+/// 1. File
+/// 2. Suite(s)
+/// 3. Test cases
+class boost_test_tree_lister : public boost::unit_test::test_tree_visitor {
+private:
+    struct impl;
+
+private:
+    std::unique_ptr<impl> _impl;
+
+public:
+    boost_test_tree_lister();
+    ~boost_test_tree_lister() noexcept;
+
+public:
+    const test_file_forest& get_result() const;
+
+private:
+    virtual void visit(const boost::unit_test::test_case&) override;
+    virtual bool test_suite_start(const boost::unit_test::test_suite&) override;
+    virtual void test_suite_finish(const boost::unit_test::test_suite&) override;
+};
+
+template <>
+struct fmt::formatter<internal::test_case_info> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    auto format(const internal::test_case_info&, fmt::format_context& ctx) const -> decltype(ctx.out());
+};
+
+template <>
+struct fmt::formatter<internal::test_suite_info> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    auto format(const internal::test_suite_info&, fmt::format_context& ctx) const -> decltype(ctx.out());
+};
+
+template <>
+struct fmt::formatter<internal::test_file_info> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    auto format(const internal::test_file_info&, fmt::format_context& ctx) const -> decltype(ctx.out());
+};
+
+template <>
+struct fmt::formatter<internal::test_file_forest> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    auto format(const internal::test_file_forest&, fmt::format_context& ctx) const -> decltype(ctx.out());
+};
--- a/test/lib/boost_tree_lister_injector.cc
+++ b/test/lib/boost_tree_lister_injector.cc
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "test/lib/boost_test_tree_lister.hh"
+
+#include <boost/test/framework.hpp>
+#include <boost/test/tree/traverse.hpp>
+#include <boost/test/unit_test_suite.hpp>
+
+#include <fmt/core.h>
+
+namespace {
+
+/// Traverse the test tree and collect information about
+/// its structure and the tests.
+///
+/// The output is going to be in the JSON format.
+/// For more details, see the implementation of
+/// `boost_test_tree_lister`.
+void print_boost_tests() {
+    namespace but = boost::unit_test;
+
+    but::framework::finalize_setup_phase();
+
+    boost_test_tree_lister traverser;
+    but::traverse_test_tree(but::framework::master_test_suite().p_id, traverser, true);
+
+    fmt::print("{}", traverser.get_result());
+}
+
+/// --------
+/// Examples
+/// --------
+///
+/// # This will NOT list the tests because Boost.Test
+/// # will interpret it as an argument to the framework.
+/// $ ./path/to/my/test/exec --list_json_content
+///
+/// # This will NOT list the tests because Boost.Test requires
+/// # that all non-Boost.Test arguments be provided AFTER
+/// # a `--` sequence (cf. example below).
+/// $ ./path/to/my/test/exec list_json_content
+///
+/// # This will NOT list the tests because Boost.Test because
+/// # the option simply doesn't match the exepected one.
+/// $ ./path/to/my/test/exec list_json_content
+///
+/// # This DOES work and DOES what we expect, i.e. it lists the tests.
+/// $ ./path/to/my/test/exec -- --list_json_content
+bool list_tests(int argc, char** argv) {
+    for (int i = 1; i < argc; ++i) {
+        std::string_view option = argv[i];
+        if (option == "--list_json_content") {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+struct boost_tree_lister_injector {
+    boost_tree_lister_injector() {
+        const auto& master_suite = boost::unit_test::framework::master_test_suite();
+        /// The arguments here don't include Boost.Test-specific arguments.
+        /// Those present correspond to the path to the binary and options
+        /// specified for the "end-code".
+        ///
+        /// --------
+        /// Examples
+        /// --------
+        /// $ ./path/to/my/test/exec my_custom_arg
+        /// Arguments: [<path>, "my_custom_arg"]
+        ///
+        /// $ ./path/to/my/test/exec -- my_custom_arg
+        /// Arguments: [<path>, "my_custom_arg"]
+        ///
+        /// $ ./path/to/my/test/exec --auto_start_dbg=0 -- my_custom_arg
+        /// Arguments: [<path>, "my_custom_arg"]
+        ///
+        /// $ ./path/to/my/test/exec --auto_start_dbg=0 my_custom_arg
+        /// Arguments: [<path>, "my_custom_arg"]
+        ///
+        /// ------------------------------------------
+        /// Interaction with some Boost.Test arguments
+        /// ------------------------------------------
+        ///
+        /// Note, however, that some Boost.Test options may prevent us
+        /// from accessing this code. For instance, if the user runs
+        ///
+        /// $ ./path/to/my/test/exec --list_content -- my_custom_arg
+        ///
+        /// then Boost.Test will immediately move to its own code and not
+        /// execute this one (because it's only called by a global fixture).
+        auto&& [argc, argv] = std::make_pair(master_suite.argc, master_suite.argv);
+
+        if (list_tests(argc, argv)) {
+            print_boost_tests();
+
+            // At this point, it's impossible to prevent Boost.Test
+            // from executing the tests it collected. This is all
+            // we can do (at least without writing a lot more code.
+            // I don't know if it would still be possible to avoid it).
+            std::exit(0);
+        }
+    }
+};
+
+} // anonymous namespace
+
+BOOST_GLOBAL_FIXTURE(boost_tree_lister_injector);
--- a/test/lib/test_services.cc
+++ b/test/lib/test_services.cc
@@ -91,7 +91,7 @@ public:
    sstables::sstables_manager& get_sstables_manager() noexcept override {
        return _sstables_manager;
    }
-    sstables::shared_sstable make_sstable() const override {
+    sstables::shared_sstable make_sstable(sstables::sstable_state) const override {
        return table().make_sstable();
    }
    sstables::sstable_writer_config configure_writer(sstring origin) const override {
--- a/test/nodetool/test_cluster_repair.py
+++ b/test/nodetool/test_cluster_repair.py
@@ -385,7 +385,7 @@ def test_repair_options_hosts_and_dcs_tablets(nodetool, datacenter, hosts):
                                   [("--tablet-tokens", "1")],
                                   [("--tablet-tokens", "-1,2")],
                                   [("--tablet-tokens", "-1"), ("--tablet-tokens", "2")]])
-def test_repair_options_hosts_tablets(nodetool, tokens):
+def test_repair_options_tokens_tablets(nodetool, tokens):
    _do_test_repair_options_tablets(nodetool, tokens=tokens)

 def test_repair_all_with_vnode_keyspace(nodetool):
--- a/test/nodetool/test_repair.py
+++ b/test/nodetool/test_repair.py
@@ -623,7 +623,7 @@ Repair session 1
 Repair session 1 finished
 """

-def test_repair_keyspace(nodetool):
+def test_repair_keyspace_failure(nodetool):
    check_nodetool_fails_with(
        nodetool,
        ("repair", "ks"),
--- a/test/pylib/encryption_provider.py
+++ b/test/pylib/encryption_provider.py
@@ -29,9 +29,9 @@ class KeyProvider(Enum):

 class KeyProviderFactory:
    """Base class for provider factories"""
-    def __init__(self, key_provider : KeyProvider):
+    def __init__(self, key_provider : KeyProvider, tmpdir):
        self.key_provider = key_provider
-        self.system_keyfile = None
+        self.system_key_location = os.path.join(tmpdir, "resources/system_keys")

    async def __aenter__(self):
        return self
@@ -50,7 +50,7 @@ class KeyProviderFactory:

    def configuration_parameters(self) -> dict[str, str]:
        """scylla.conf entries for provider"""
-        return {}
+        return {"system_key_directory": self.system_key_location}

    def additional_cf_options(self) -> dict[str, str]:
        # pylint: disable=unused-argument
@@ -62,7 +62,7 @@ class KeyProviderFactory:
 class LocalFileSystemKeyProviderFactory(KeyProviderFactory):
    """LocalFileSystemKeyProviderFactory proxy"""
    def __init__(self, tmpdir):
-        super(LocalFileSystemKeyProviderFactory, self).__init__( KeyProvider.local)
+        super(LocalFileSystemKeyProviderFactory, self).__init__(KeyProvider.local, tmpdir)
        self.secret_file = os.path.join(tmpdir, "test/node1/conf/data_encryption_keys")

    def additional_cf_options(self) -> dict[str, str]:
@@ -72,8 +72,7 @@ class LocalFileSystemKeyProviderFactory(KeyProviderFactory):
 class ReplicatedKeyProviderFactory(KeyProviderFactory):
    """ReplicatedKeyProviderFactory proxy"""
    def __init__(self, tmpdir):
-        super(ReplicatedKeyProviderFactory, self).__init__( KeyProvider.replicated)
-        self.system_key_location = os.path.join(tmpdir, "resources/system_keys")
+        super(ReplicatedKeyProviderFactory, self).__init__(KeyProvider.replicated, tmpdir)
        self.system_key_file_name = "system_key"

    async def __aenter__(self):
@@ -88,17 +87,13 @@ class ReplicatedKeyProviderFactory(KeyProviderFactory):
            raise RuntimeError(f'Could not generate system key: {stderr.decode()}')
        return self

-    def configuration_parameters(self) -> dict[str, str]:
-        """scylla.conf entries for provider"""
-        return super().configuration_parameters() | {"system_key_directory": self.system_key_location}
-
    def additional_cf_options(self):
        return super().additional_cf_options() | {"system_key": self.system_key_file_name}

 class KmipKeyProviderFactory(KeyProviderFactory):
    """KmipKeyProviderFactory proxy"""
    def __init__(self, tmpdir):
-        super(KmipKeyProviderFactory, self).__init__( KeyProvider.kmip)
+        super(KmipKeyProviderFactory, self).__init__(KeyProvider.kmip, tmpdir)
        self.tmpdir = tmpdir
        self.kmip_server_wrapper = None
        self.kmip_host = "kmip_test"
@@ -178,7 +173,7 @@ class KmipKeyProviderFactory(KeyProviderFactory):
 class KMSKeyProviderFactory(KeyProviderFactory):
    """KMSKeyProviderFactory proxy"""
    def __init__(self, tmpdir):
-        super(KMSKeyProviderFactory, self).__init__( KeyProvider.kms)
+        super(KMSKeyProviderFactory, self).__init__(KeyProvider.kms, tmpdir)
        self.tmpdir = tmpdir
        self.master_key = "alias/Scylla-test"
        self.kms_host = "kms_test"
@@ -260,7 +255,7 @@ class KMSKeyProviderFactory(KeyProviderFactory):
 class AzureKeyProviderFactory(KeyProviderFactory):
    """AzureKeyProviderFactory proxy"""
    def __init__(self, tmpdir):
-        super(AzureKeyProviderFactory, self).__init__( KeyProvider.azure)
+        super(AzureKeyProviderFactory, self).__init__(KeyProvider.azure, tmpdir)
        self.tmpdir = tmpdir
        self.azure_host = "azure_test"
        self.azure_server = None
--- a/test/pylib/internal_types.py
+++ b/test/pylib/internal_types.py
@@ -22,12 +22,13 @@ class ServerInfo(NamedTuple):
    rpc_address: IPAddress
    datacenter: str
    rack: str
+    pid: int

    def __str__(self):
-        return f"Server({self.server_id}, {self.ip_addr}, {self.rpc_address}, {self.datacenter}, {self.rack})"
-    
+        return f"Server({self.server_id}, {self.ip_addr}, {self.rpc_address}, {self.datacenter}, {self.rack}, {self.pid})"
+
    def as_dict(self) -> dict[str, object]:
-        return {"server_id": self.server_id, "ip_addr": self.ip_addr, "rpc_address": self.rpc_address, "datacenter": self.datacenter, "rack": self.rack}
+        return {"server_id": self.server_id, "ip_addr": self.ip_addr, "rpc_address": self.rpc_address, "datacenter": self.datacenter, "rack": self.rack, "pid": self.pid}

    def property_file(self) -> dict[str, str]:
        return {"dc": self.datacenter, "rack": self.rack}
--- a/test/pylib/log_browsing.py
+++ b/test/pylib/log_browsing.py
@@ -167,3 +167,48 @@ class ScyllaLogFile:
                line = await self._run_in_executor(log_file.readline, loop=loop)

        return matches
+
+    async def find_backtraces(self, from_mark: int | None = None) -> list[str]:
+        """
+        Find and extract all backtraces from the log file.
+
+        Each backtrace starts with a line "Backtrace:" followed by lines that start with exactly 2 spaces.  
+        If `from_mark` argument is given, the log is searched from that position, otherwise from the beginning.  
+        Return a list of strings, where each string is a complete backtrace (all lines joined together).  
+        """
+        loop = asyncio.get_running_loop()
+
+        backtraces = []
+
+        with self.file.open(encoding="utf-8") as log_file:
+            if from_mark:
+                await self._run_in_executor(log_file.seek, from_mark, loop=loop)
+            
+            line = await self._run_in_executor(log_file.readline, loop=loop)
+            while line:
+                if line.strip() == "Backtrace:":
+                    # Found a backtrace, collect all lines that start with exactly 2 spaces
+                    backtrace_lines = [line]
+                    while True:
+                        next_line = await self._run_in_executor(log_file.readline, loop=loop)
+                        if not next_line:
+                            # End of file
+                            break
+                        if next_line.startswith("  ") and not next_line.startswith("   "):
+                            # Line starts with exactly 2 spaces (backtrace entry)
+                            backtrace_lines.append(next_line)
+                        else:
+                            # End of backtrace
+                            line = next_line
+                            break
+                    
+                    if backtrace_lines:
+                        # Join all backtrace lines into a single string
+                        backtraces.append(''.join(backtrace_lines))
+                    
+                    # Continue from current line (already read in the inner loop)
+                    continue
+                
+                line = await self._run_in_executor(log_file.readline, loop=loop)
+
+        return backtraces
--- a/test/pylib/manager_client.py
+++ b/test/pylib/manager_client.py
@@ -8,7 +8,9 @@
   Provides helper methods to test cases.
   Manages driver refresh when cluster is cycled.
 """
+from collections import defaultdict
 import pathlib
+import re
 import shutil
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -64,6 +66,8 @@ class ManagerClient:
        self.metrics = ScyllaMetricsClient()
        self.thread_pool = ThreadPoolExecutor()
        self.test_finished_event = asyncio.Event()
+        self.ignore_log_patterns = []  # patterns to ignore in server logs when checking for errors
+        self.ignore_cores_log_patterns = []  # patterns to ignore in server logs when checking for core files

    @property
    def client(self):
@@ -179,6 +183,82 @@ class ManagerClient:
        logger.info("Cluster after test %s: %s", test_case_name, cluster_status)

        return cluster_status
+    
+    async def check_all_errors(self, check_all_errors=False) -> dict[ServerInfo, dict[str, Union[list[str], list[str], Path, list[str]]]]:
+        
+        errors = defaultdict(dict)
+        # find errors in logs
+        for server in await self.all_servers():
+            log_file = await self.server_open_log(server_id=server.server_id)
+            # check if we should ignore cores on this server
+            ignore_cores = []
+            if self.ignore_cores_log_patterns:
+                if matches := log_file.grep("|".join(f"({p})" for p in set(self.ignore_cores_log_patterns))):
+                    logger.debug(f"Will ignore cores on {server}. Found the following log messages: {matches}")
+                    ignore_cores.append(server)
+            critical_error_pattern = r"Assertion.*failed|AddressSanitizer"
+            if server not in ignore_cores:
+                critical_error_pattern += "|Aborting on shard"
+            if found_critical := await log_file.grep(critical_error_pattern):
+                errors[server]["critical"] = [e[0] for e in found_critical]
+                # Find the backtraces for the critical errors
+                if found_backtraces := await log_file.find_backtraces():
+                    errors[server]["backtraces"] = found_backtraces
+            if check_all_errors:
+                if found_errors := await log_file.grep_for_errors(distinct_errors=True):
+                    if filtered_errors := await self.filter_errors(found_errors):
+                        errors[server]["error"] = filtered_errors
+        # find core files
+        for server, cores in (await self.find_cores()).items():
+            errors[server]["cores"] = cores
+        # add log file path to the report for servers that had errors or cores
+        for server in await self.all_servers():
+            log_file = await self.server_open_log(server_id=server.server_id)
+            if server in errors:
+                errors[server]["log"] = log_file.file.name
+
+        return errors
+    
+    async def filter_errors(self, errors: list[str]):
+        exclude_errors_pattern = re.compile("|".join(f"{p}" for p in {
+            *self.ignore_log_patterns,
+            *self.ignore_cores_log_patterns,
+
+            r"Compaction for .* deliberately stopped",
+            r"update compaction history failed:.*ignored",
+
+            # We may stop nodes that have not finished starting yet.
+            r"(Startup|start) failed:.*(seastar::sleep_aborted|raft::request_aborted)",
+            r"Timer callback failed: seastar::gate_closed_exception",
+
+            # Ignore expected RPC errors when nodes are stopped.
+            r"rpc - client .*(connection dropped|fail to connect)",
+
+            # We see benign RPC errors when nodes start/stop.
+            # If they cause system malfunction, it should be detected using higher-level tests.
+            r"rpc::unknown_verb_error",
+            r"raft_rpc - Failed to send",
+            r"raft_topology.*(seastar::broken_promise|rpc::closed_error)",
+
+            # Expected tablet migration stream failure where a node is stopped.
+            # Refs: https://github.com/scylladb/scylladb/issues/19640
+            r"Failed to handle STREAM_MUTATION_FRAGMENTS.*rpc::stream_closed",
+
+            # Expected Raft errors on decommission-abort or node restart with MV.
+            r"raft_topology - raft_topology_cmd.*failed with: raft::request_aborted",
+        }))
+        return [e for e in errors if not exclude_errors_pattern.search(e)]
+
+    async def find_cores(self) -> dict[ServerInfo, list[str]]:
+        """Find core files on all servers"""
+        # find *.core files in current dir
+        cores = [str(core_file.absolute()) for core_file in pathlib.Path('.').glob('*.core')]
+        server_cores = dict()
+        # match core files to servers by pid
+        for server in await self.all_servers():
+            if found_cores := [core for core in cores if f".{server.pid}." in core]:
+                server_cores[server] = found_cores
+        return server_cores

    async def gather_related_logs(self, failed_test_path_dir: Path, logs: Dict[str, Path]) -> None:
        for server in await self.all_servers():
@@ -212,8 +292,7 @@ class ManagerClient:
        except RuntimeError as exc:
            raise Exception("Failed to get list of running servers") from exc
        assert isinstance(server_info_list, list), "running_servers got unknown data type"
-        return [ServerInfo(ServerNum(int(info[0])), IPAddress(info[1]), IPAddress(info[2]), info[3], info[4])
-                for info in server_info_list]
+        return [ServerInfo(*info) for info in server_info_list]

    async def all_servers(self) -> list[ServerInfo]:
        """Get List of server info (id and IP address) of all servers"""
@@ -222,8 +301,7 @@ class ManagerClient:
        except RuntimeError as exc:
            raise Exception("Failed to get list of servers") from exc
        assert isinstance(server_info_list, list), "all_servers got unknown data type"
-        return [ServerInfo(ServerNum(int(info[0])), IPAddress(info[1]), IPAddress(info[2]), info[3], info[4])
-                for info in server_info_list]
+        return [ServerInfo(*info) for info in server_info_list]

    async def starting_servers(self) -> list[ServerInfo]:
        """Get List of server info (id and IP address) of servers currently
@@ -236,8 +314,7 @@ class ManagerClient:
        except RuntimeError as exc:
            raise Exception("Failed to get list of starting servers") from exc
        assert isinstance(server_info_list, list), "starting_servers got unknown data type"
-        return [ServerInfo(ServerNum(int(info[0])), IPAddress(info[1]), IPAddress(info[2]), info[3], info[4])
-                for info in server_info_list]
+        return [ServerInfo(*info) for info in server_info_list]

    async def mark_dirty(self) -> None:
        """Manually mark current cluster dirty.
@@ -276,6 +353,9 @@ class ManagerClient:
        Replace CLI options and environment variables with `cmdline_options_override` and `append_env_override`
        if provided.
        """
+        if expected_error is not None:
+            self.ignore_log_patterns.append(re.escape(expected_error))
+
        logger.debug("ManagerClient starting %s", server_id)
        data = {
            "expected_error": expected_error,
@@ -412,6 +492,9 @@ class ManagerClient:
                         expected_server_up_state: Optional[ServerUpState] = None,
                         connect_driver: bool = True) -> ServerInfo:
        """Add a new server"""
+        if expected_error is not None:
+            self.ignore_log_patterns.append(re.escape(expected_error))
+
        try:
            data = self._create_server_add_data(
                replace_cfg,
@@ -440,11 +523,7 @@ class ManagerClient:
        except Exception as exc:
            raise Exception("Failed to add server") from exc
        try:
-            s_info = ServerInfo(ServerNum(int(server_info["server_id"])),
-                                IPAddress(server_info["ip_addr"]),
-                                IPAddress(server_info["rpc_address"]),
-                                server_info["datacenter"],
-                                server_info["rack"])
+            s_info = ServerInfo(**server_info)
        except Exception as exc:
            raise RuntimeError(f"server_add got invalid server data {server_info}") from exc
        logger.debug("ManagerClient added %s", s_info)
@@ -473,6 +552,9 @@ class ManagerClient:
        assert servers_num > 0, f"servers_add: cannot add {servers_num} servers, servers_num must be positive"
        assert not (property_file and auto_rack_dc), f"Either property_file or auto_rack_dc can be provided, but not both"

+        if expected_error is not None:
+            self.ignore_log_patterns.append(re.escape(expected_error))
+
        if auto_rack_dc:
            property_file = [{"dc":auto_rack_dc, "rack":f"rack{i+1}"} for i in range(servers_num)]

@@ -489,11 +571,7 @@ class ManagerClient:
        s_infos = list[ServerInfo]()
        for server_info in server_infos:
            try:
-                s_info = ServerInfo(ServerNum(int(server_info["server_id"])),
-                                    IPAddress(server_info["ip_addr"]),
-                                    IPAddress(server_info["rpc_address"]),
-                                    server_info["datacenter"],
-                                    server_info["rack"])
+                s_info = ServerInfo(**server_info)
                s_infos.append(s_info)
            except Exception as exc:
                raise RuntimeError(f"servers_add got invalid server data {server_info}") from exc
@@ -512,6 +590,9 @@ class ManagerClient:
                          wait_removed_dead: bool = True,
                          timeout: Optional[float] = ScyllaServer.TOPOLOGY_TIMEOUT) -> None:
        """Invoke remove node Scylla REST API for a specified server"""
+        if expected_error is not None:
+            self.ignore_log_patterns.append(re.escape(expected_error))
+
        logger.debug("ManagerClient remove node %s on initiator %s", server_id, initiator_id)

        # If we remove a node, we should wait until other nodes see it as dead
@@ -532,6 +613,9 @@ class ManagerClient:
                                expected_error: str | None = None,
                                timeout: Optional[float] = ScyllaServer.TOPOLOGY_TIMEOUT) -> None:
        """Tell a node to decommission with Scylla REST API"""
+        if expected_error is not None:
+            self.ignore_log_patterns.append(re.escape(expected_error))
+
        logger.debug("ManagerClient decommission %s", server_id)
        data = {"expected_error": expected_error}
        await self.client.put_json(f"/cluster/decommission-node/{server_id}", data,
--- a/test/pylib/runner.py
+++ b/test/pylib/runner.py
@@ -49,6 +49,9 @@ RUN_ID = pytest.StashKey[int]()

 logger = logging.getLogger(__name__)

+# Store pytest config globally so we can access it in hooks that only receive report
+_pytest_config: pytest.Config | None = None
+

 def pytest_addoption(parser: pytest.Parser) -> None:
    parser.addoption('--mode', choices=ALL_MODES, action="append", dest="modes",
@@ -184,6 +187,52 @@ def pytest_sessionstart(session: pytest.Session) -> None:
        )


+@pytest.hookimpl(trylast=True)
+def pytest_runtest_logreport(report):
+    """Add custom XML attributes to JUnit testcase elements.
+
+    This hook wraps the node_reporter's to_xml method to add custom attributes
+    when the XML element is created. This approach works with pytest-xdist because
+    it modifies the XML element directly when it's generated, rather than trying
+    to modify attrs before finalize() is called.
+
+    Attributes added:
+    - function_path: The function path of the test case (excluding parameters).
+
+    Uses trylast=True to run after LogXML's hook has created the node_reporter.
+    """
+    from _pytest.junitxml import xml_key
+
+    # Only process call phase
+    if report.when != "call":
+        return
+
+    # Get the XML reporter
+    config = _pytest_config
+    if config is None:
+        return
+
+    xml = config.stash.get(xml_key, None)
+    if xml is None:
+        return
+
+    node_reporter = xml.node_reporter(report)
+
+    nodeid = report.nodeid
+    function_path = f'test/{nodeid.rsplit('.', 2)[0].rsplit('[', 1)[0]}'
+
+    # Wrap the to_xml method to add custom attributes to the element
+    original_to_xml = node_reporter.to_xml
+
+    def custom_to_xml():
+        """Wrapper that adds custom attributes to the testcase element."""
+        element = original_to_xml()
+        element.set("function_path", function_path)
+        return element
+
+    node_reporter.to_xml = custom_to_xml
+
+
 def pytest_sessionfinish(session: pytest.Session) -> None:
    if not session.config.getoption("--test-py-init"):
        return
@@ -196,6 +245,9 @@ def pytest_sessionfinish(session: pytest.Session) -> None:


 def pytest_configure(config: pytest.Config) -> None:
+    global _pytest_config
+    _pytest_config = config
+
    config.build_modes = get_modes_to_run(config)

    if testpy_run_id := config.getoption("--run_id"):
--- a/test/pylib/scylla_cluster.py
+++ b/test/pylib/scylla_cluster.py
@@ -243,7 +243,7 @@ async def get_scylla_2025_1_executable(build_mode: str) -> str:
            if not unpacked_marker.exists():
                if not downloaded_marker.exists():
                    archive_path.unlink(missing_ok=True)
-                    await run_process(["curl", "--silent", "--show-error", "--output", archive_path, url])
+                    await run_process(["curl", "--retry", "10", "--fail", "--silent", "--show-error", "--output", archive_path, url])
                    downloaded_marker.touch()
                shutil.rmtree(unpack_dir, ignore_errors=True)
                unpack_dir.mkdir(exist_ok=True, parents=True)
@@ -477,7 +477,8 @@ class ScyllaServer:
        return "DEFAULT_RACK"
    
    def server_info(self) -> ServerInfo:
-        return ServerInfo(self.server_id, self.ip_addr, self.rpc_address, self.datacenter, self.rack)
+        pid = self.cmd.pid if self.cmd else None
+        return ServerInfo(self.server_id, self.ip_addr, self.rpc_address, self.datacenter, self.rack, pid)

    def change_rpc_address(self, rpc_address: IPAddress) -> None:
        """Change RPC IP address of the current server. Pre: the server is
--- a/test/raft/randomized_nemesis_test.cc
+++ b/test/raft/randomized_nemesis_test.cc
@@ -2930,6 +2930,18 @@ private:

    static constexpr elem_t magic = 54313;

+    static void check_digest_value(elem_t d) {
+        if (d < 0 || d >= magic) {
+            on_fatal_internal_error(tlogger, fmt::format("Digest value out of range: {}", d));
+        }
+    }
+
+    static void validate_digest_value(elem_t d_new, elem_t d_old, elem_t x) {
+        if (d_new < 0 || d_new >= magic) {
+            on_fatal_internal_error(tlogger, fmt::format("Digest value invalid after appending/removing element: d_new {}, d_old {}, x {}", d_new, d_old, x));
+        }
+    }
+
 public:
    append_seq(std::vector<elem_t> v) : _seq{make_lw_shared<std::vector<elem_t>>(std::move(v))}, _end{_seq->size()}, _digest{0} {
        for (auto x : *_seq) {
@@ -2938,20 +2950,26 @@ public:
    }

    static elem_t digest_append(elem_t d, elem_t x) {
-        BOOST_REQUIRE_LE(0, d);
-        BOOST_REQUIRE_LT(d, magic);
+        check_digest_value(d);

        auto y = (d + x) % magic;
        SCYLLA_ASSERT(digest_remove(y, x) == d);
+
+        validate_digest_value(y, d, x);
        return y;
    }

    static elem_t digest_remove(elem_t d, elem_t x) {
-        BOOST_REQUIRE_LE(0, d);
-        BOOST_REQUIRE_LT(d, magic);
+        check_digest_value(d);

        auto y = (d - x) % magic;
-        return y < 0 ? y + magic : y;
+
+        if (y < 0) {
+            y += magic;
+        }
+
+        validate_digest_value(y, d, x);
+        return y;
    }

    elem_t digest() const {
--- a/test/storage/test_out_of_space_prevention.py
+++ b/test/storage/test_out_of_space_prevention.py
@@ -397,3 +397,80 @@ async def test_node_restart_while_tablet_split(manager: ManagerClient, volumes_f
                    mark, _ = await log.wait_for("compaction_manager - Enabled", from_mark=mark)
                mark, _ = await log.wait_for(f"Detected tablet split for table {cf}, increasing from 1 to 2 tablets", from_mark=mark)
                await assert_resize_task_info(table_id, lambda response: len(response) == 2 and all(r.resize_task_info is None for r in response))
+
+# Verify that new sstable produced by repair cannot be split, if disk utilization level is critical.
+@pytest.mark.asyncio
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_repair_failure_on_split_rejection(manager: ManagerClient, volumes_factory: Callable) -> None:
+    cfg = {
+        'tablet_load_stats_refresh_interval_in_seconds': 1,
+    }
+    async with space_limited_servers(manager, volumes_factory, ["100M"]*3, cmdline=global_cmdline, config=cfg) as servers:
+        cql, _ = await manager.get_ready_cql(servers)
+        workdir = await manager.server_get_workdir(servers[0].server_id)
+        log = await manager.server_open_log(servers[0].server_id)
+        mark = await log.mark()
+
+        logger.info("Create and populate test table")
+        async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} AND tablets = {'initial': 2}") as ks:
+            async with new_test_table(manager, ks, "pk int PRIMARY KEY, t text") as cf:
+                table = cf.split('.')[-1]
+                table_id = (await cql.run_async(f"SELECT id FROM system_schema.tables WHERE keyspace_name = '{ks}' AND table_name = '{table}'"))[0].id
+
+                await asyncio.gather(*[cql.run_async(query) for query in write_generator(cf, 64)])
+                await manager.api.flush_keyspace(servers[0].ip_addr, ks)
+
+                coord = await get_topology_coordinator(manager)
+                coord_serv = await find_server_by_host_id(manager, servers, coord)
+                coord_log = await manager.server_open_log(coord_serv.server_id)
+
+                async def run_split():
+                    await manager.api.enable_injection(coord_serv.ip_addr, 'tablet_resize_finalization_postpone', one_shot=False)
+
+                    # force split on the test table
+                    await cql.run_async(f"ALTER TABLE {cf} WITH tablets = {{'min_tablet_count': 4}}")
+
+                    coord_log.wait_for(f"Generating resize decision for table {table_id} of type split")
+
+                async def generate_repair_work():
+                    insert_stmt = cql.prepare(f"INSERT INTO {cf} (pk, t) VALUES (?, ?)")
+                    insert_stmt.consistency_level = ConsistencyLevel.ONE
+
+                    await manager.api.enable_injection(servers[0].ip_addr, "database_apply", one_shot=False)
+                    pks = range(256, 512)
+                    await asyncio.gather(*[cql.run_async(insert_stmt, (k, f'{k}')) for k in pks])
+                    await manager.api.disable_injection(servers[0].ip_addr, "database_apply")
+
+                await generate_repair_work()
+                await manager.api.enable_injection(servers[0].ip_addr, "maybe_split_new_sstable_wait", one_shot=True)
+
+                token = 'all'
+                repair_task = asyncio.create_task(manager.api.tablet_repair(servers[0].ip_addr, ks, table, token))
+
+                # Emit split decision during repair.
+                await run_split()
+
+                await log.wait_for("maybe_split_new_sstable_wait: waiting", from_mark=mark)
+                await manager.api.disable_injection(coord_serv.ip_addr, "tablet_resize_finalization_postpone")
+
+                logger.info("Create a big file on the target node to reach critical disk utilization level")
+                disk_info = psutil.disk_usage(workdir)
+                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
+                    for _ in range(2):
+                        mark, _ = await log.wait_for("compaction_manager - Drained", from_mark=mark)
+
+                    await manager.api.message_injection(servers[0].ip_addr, "maybe_split_new_sstable_wait")
+
+                    # Expect repair to fail when splitting new sstables
+                    await log.wait_for("Repair for tablet migration of .* failed", from_mark=mark)
+                    await log.wait_for("Cannot split .* because manager has compaction disabled", from_mark=mark)
+
+                    assert await log.grep(f"compaction .* Split {cf}", from_mark=mark) == []
+
+                logger.info("With blob file removed, wait for DB to drop below the critical disk utilization level")
+                for _ in range(2):
+                    mark, _ = await log.wait_for("compaction_manager - Enabled", from_mark=mark)
+
+                await repair_task
+
+                mark, _ = await log.wait_for(f"Detected tablet split for table {cf}", from_mark=mark)
--- a/test/vector_search_validator/Cargo.toml
+++ b/test/vector_search_validator/Cargo.toml
@@ -1,6 +1,10 @@
 ## Copyright 2025-present ScyllaDB
 # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0

+# This file is generated by cargo-toml-template. Do not edit directly.
+# To make changes, edit the template and regenerate with command:
+# "$ cargo-toml-template > Cargo.toml".
+
 [workspace]
 members = ["crates/*"]
 default-members = ["crates/validator"]
@@ -12,13 +16,16 @@ edition = "2024"

 [workspace.dependencies]
 anyhow = "1.0.97"
+async-backtrace = "0.2.7"
 futures = "0.3.31"
 scylla = { version = "1.2.0", features = ["time-03"] }
 tokio = { version = "1.44.1", features = ["full"] }
 tracing = "0.1.41"
 uuid = "1.16.0"
-vector-search-validator-engine = { git = "https://github.com/scylladb/vector-store.git", rev = "3ee46a5" }
-vector-search-validator-tests = { git = "https://github.com/scylladb/vector-store.git", rev = "3ee46a5" }
+httpclient = { git = "https://github.com/scylladb/vector-store.git", rev = "d79ee80" }
+vector-search-validator-engine = { git = "https://github.com/scylladb/vector-store.git", rev = "d79ee80" }
+vector-search-validator-tests = { git = "https://github.com/scylladb/vector-store.git", rev = "d79ee80" }
+vector-store = { git = "https://github.com/scylladb/vector-store.git", rev = "d79ee80" }

-[patch.'https://github.com/scylladb/vector-store.git']
+[patch.'https://github.com/scylladb/scylladb.git']
 vector-search-validator-scylla = { path = "crates/validator-scylla" }
--- a/test/vector_search_validator/README.md
+++ b/test/vector_search_validator/README.md
@@ -8,6 +8,8 @@ namespace to separate it from the host environment. `vector-search-validator`
 contains DNS server and all tests in one binary. It uses external scylla and
 vector-store binaries.

+## Running tests
+
 The `test_validator.py::test_validator[test-case]` is the entry point for
 running the tests. It is parametrized with name of the test case.  Available
 test cases are taken dynamically from the `vector-search-validator` binary.
@@ -37,6 +39,22 @@ $ pytest --mode=dev test/vector_search_validator/test_validator.py --filters fil
 Logs are stored in
 `testlog/{mode}/vector_search_validator/{test-case}-{run_id}/` directory.

-Implementing new test cases on the Scylla repository side means adding new test
-in crate `crates/validator-scylla`.
+## Development of test cases
+
+`vector-search-validator` (in short `validator`) is divided into multiple
+crates:
+- `validator` - a main crate that contains only the entry point
+- `validator-scylla` - contains implementation of the validator tests on the
+  scylladb.git side. If you want to add/modify the tests implemented in the
+  scylladb.git, you will work in this crate.
+- `vector-store.git/validator-engine` - contains the core logic of the
+  validator - overall test runner and implementation of actors for tests (dns
+  server, scylla cluster, vector store cluster)
+- `vector-store.git/validator-tests` - contains the core logic of the framework
+  tests, provides base structures for tests and actor interfaces.  In the
+  future we should check if it is possible to merge it with `validator-engine`
+  crate.
+- `vector-store.git/validator-vector-store` - contains implementation of the
+  validator tests on the vector-store.git side. If you want to add/modify the
+  tests implemented in the vector-store.git, you will work in this crate.

--- a/test/vector_search_validator/build-env
+++ b/test/vector_search_validator/build-env
@@ -1,2 +1,2 @@
 VECTOR_STORE_GIT=https://github.com/scylladb/vector-store.git
-VECTOR_STORE_REV=3ee46a5
+VECTOR_STORE_REV=d79ee80
--- a/test/vector_search_validator/cargo-toml-template
+++ b/test/vector_search_validator/cargo-toml-template
@@ -11,6 +11,10 @@ cat << EOF
 ## Copyright 2025-present ScyllaDB
 # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0

+# This file is generated by cargo-toml-template. Do not edit directly.
+# To make changes, edit the template and regenerate with command:
+# "\$ cargo-toml-template > Cargo.toml".
+
 [workspace]
 members = ["crates/*"]
 default-members = ["crates/validator"]
@@ -22,14 +26,17 @@ edition = "2024"

 [workspace.dependencies]
 anyhow = "1.0.97"
+async-backtrace = "0.2.7"
 futures = "0.3.31"
 scylla = { version = "1.2.0", features = ["time-03"] }
 tokio = { version = "1.44.1", features = ["full"] }
 tracing = "0.1.41"
 uuid = "1.16.0"
+httpclient = { git = "$VECTOR_STORE_GIT", rev = "$VECTOR_STORE_REV" }
 vector-search-validator-engine = { git = "$VECTOR_STORE_GIT", rev = "$VECTOR_STORE_REV" }
 vector-search-validator-tests = { git = "$VECTOR_STORE_GIT", rev = "$VECTOR_STORE_REV" }
+vector-store = { git = "$VECTOR_STORE_GIT", rev = "$VECTOR_STORE_REV" }

-[patch.'$VECTOR_STORE_GIT']
+[patch.'https://github.com/scylladb/scylladb.git']
 vector-search-validator-scylla = { path = "crates/validator-scylla" }
 EOF
--- a/test/vector_search_validator/crates/validator-scylla/Cargo.toml
+++ b/test/vector_search_validator/crates/validator-scylla/Cargo.toml
@@ -4,5 +4,11 @@ version = "0.1.0"
 edition = "2024"

 [dependencies]
+async-backtrace.workspace = true
+httpclient.workspace = true
+scylla.workspace = true
+tokio.workspace = true
 tracing.workspace = true
+uuid.workspace = true
 vector-search-validator-tests.workspace = true
+vector-store.workspace = true
--- a/test/vector_search_validator/crates/validator-scylla/src/common.rs
+++ b/test/vector_search_validator/crates/validator-scylla/src/common.rs
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+use async_backtrace::framed;
+use httpclient::HttpClient;
+use scylla::client::session::Session;
+use scylla::client::session_builder::SessionBuilder;
+use scylla::response::query_result::QueryRowsResult;
+use std::collections::HashMap;
+use std::net::Ipv4Addr;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::time;
+use tracing::info;
+use uuid::Uuid;
+use vector_search_validator_tests::DnsExt;
+use vector_search_validator_tests::ScyllaClusterExt;
+use vector_search_validator_tests::ScyllaNodeConfig;
+use vector_search_validator_tests::TestActors;
+use vector_search_validator_tests::VectorStoreClusterExt;
+use vector_search_validator_tests::VectorStoreNodeConfig;
+use vector_store::httproutes::IndexStatus;
+use vector_store::IndexInfo;
+
+pub(crate) const DEFAULT_TEST_TIMEOUT: Duration = Duration::from_secs(120);
+
+pub(crate) const VS_NAMES: [&str; 3] = ["vs1", "vs2", "vs3"];
+
+pub(crate) const VS_PORT: u16 = 6080;
+
+pub(crate) const DB_OCTET_1: u8 = 1;
+pub(crate) const DB_OCTET_2: u8 = 2;
+pub(crate) const DB_OCTET_3: u8 = 3;
+pub(crate) const VS_OCTET_1: u8 = 128;
+pub(crate) const VS_OCTET_2: u8 = 129;
+pub(crate) const VS_OCTET_3: u8 = 130;
+
+#[framed]
+pub(crate) async fn get_default_vs_urls(actors: &TestActors) -> Vec<String> {
+    let domain = actors.dns.domain().await;
+    VS_NAMES
+        .iter()
+        .map(|name| format!("http://{name}.{domain}:{VS_PORT}"))
+        .collect()
+}
+
+pub(crate) fn get_default_vs_ips(actors: &TestActors) -> Vec<Ipv4Addr> {
+    vec![
+        actors.services_subnet.ip(VS_OCTET_1),
+        actors.services_subnet.ip(VS_OCTET_2),
+        actors.services_subnet.ip(VS_OCTET_3),
+    ]
+}
+
+pub(crate) fn get_default_db_ips(actors: &TestActors) -> Vec<Ipv4Addr> {
+    vec![
+        actors.services_subnet.ip(DB_OCTET_1),
+        actors.services_subnet.ip(DB_OCTET_2),
+        actors.services_subnet.ip(DB_OCTET_3),
+    ]
+}
+
+#[framed]
+pub(crate) async fn get_default_scylla_node_configs(actors: &TestActors) -> Vec<ScyllaNodeConfig> {
+    let default_vs_urls = get_default_vs_urls(actors).await;
+    get_default_db_ips(actors)
+        .iter()
+        .enumerate()
+        .map(|(i, &ip)| {
+            let mut vs_urls = default_vs_urls.clone();
+            ScyllaNodeConfig {
+                db_ip: ip,
+                primary_vs_uris: vec![vs_urls.remove(i)],
+                secondary_vs_uris: vs_urls,
+            }
+        })
+        .collect()
+}
+
+pub(crate) fn get_default_vs_node_configs(actors: &TestActors) -> Vec<VectorStoreNodeConfig> {
+    let db_ips = get_default_db_ips(actors);
+    get_default_vs_ips(actors)
+        .iter()
+        .zip(db_ips.iter())
+        .map(|(&vs_ip, &db_ip)| VectorStoreNodeConfig {
+            vs_ip,
+            db_ip,
+            envs: HashMap::new(),
+        })
+        .collect()
+}
+
+#[framed]
+pub(crate) async fn init(actors: TestActors) {
+    info!("started");
+
+    let scylla_configs = get_default_scylla_node_configs(&actors).await;
+    let vs_configs = get_default_vs_node_configs(&actors);
+    init_with_config(actors, scylla_configs, vs_configs).await;
+
+    info!("finished");
+}
+
+#[framed]
+pub(crate) async fn init_with_config(
+    actors: TestActors,
+    scylla_configs: Vec<ScyllaNodeConfig>,
+    vs_configs: Vec<VectorStoreNodeConfig>,
+) {
+    let vs_ips = get_default_vs_ips(&actors);
+    for (name, ip) in VS_NAMES.iter().zip(vs_ips.iter()) {
+        actors.dns.upsert(name.to_string(), *ip).await;
+    }
+
+    actors.db.start(scylla_configs, None).await;
+    assert!(actors.db.wait_for_ready().await);
+    actors.vs.start(vs_configs).await;
+    assert!(actors.vs.wait_for_ready().await);
+}
+
+#[framed]
+pub(crate) async fn cleanup(actors: TestActors) {
+    info!("started");
+    for name in VS_NAMES.iter() {
+        actors.dns.remove(name.to_string()).await;
+    }
+    actors.vs.stop().await;
+    actors.db.stop().await;
+    info!("finished");
+}
+
+#[framed]
+pub(crate) async fn prepare_connection_with_custom_vs_ips(
+    actors: &TestActors,
+    vs_ips: Vec<Ipv4Addr>,
+) -> (Arc<Session>, Vec<HttpClient>) {
+    let session = Arc::new(
+        SessionBuilder::new()
+            .known_node(actors.services_subnet.ip(DB_OCTET_1).to_string())
+            .build()
+            .await
+            .expect("failed to create session"),
+    );
+    let clients = vs_ips
+        .iter()
+        .map(|&ip| HttpClient::new((ip, VS_PORT).into()))
+        .collect();
+    (session, clients)
+}
+
+#[framed]
+pub(crate) async fn wait_for<F, Fut>(mut condition: F, msg: &str, timeout: Duration)
+where
+    F: FnMut() -> Fut,
+    Fut: std::future::Future<Output = bool>,
+{
+    time::timeout(timeout, async {
+        while !condition().await {
+            time::sleep(Duration::from_millis(100)).await;
+        }
+    })
+    .await
+    .unwrap_or_else(|_| panic!("Timeout on: {msg}"))
+}
+
+#[framed]
+pub(crate) async fn wait_for_value<F, Fut, T>(mut poll_fn: F, msg: &str, timeout: Duration) -> T
+where
+    F: FnMut() -> Fut,
+    Fut: std::future::Future<Output = Option<T>>,
+{
+    time::timeout(timeout, async {
+        loop {
+            if let Some(value) = poll_fn().await {
+                return value;
+            }
+            time::sleep(Duration::from_millis(100)).await;
+        }
+    })
+    .await
+    .unwrap_or_else(|_| panic!("Timeout on: {msg}"))
+}
+
+#[framed]
+pub(crate) async fn wait_for_index(
+    client: &HttpClient,
+    index: &IndexInfo,
+) -> vector_store::httproutes::IndexStatusResponse {
+    wait_for_value(
+        || async {
+            match client.index_status(&index.keyspace, &index.index).await {
+                Ok(resp) if resp.status == IndexStatus::Serving => Some(resp),
+                _ => None,
+            }
+        },
+        "Waiting for index to be SERVING",
+        Duration::from_secs(20),
+    )
+    .await
+}
+
+#[framed]
+pub(crate) async fn get_query_results(query: String, session: &Session) -> QueryRowsResult {
+    session
+        .query_unpaged(query, ())
+        .await
+        .expect("failed to run query")
+        .into_rows_result()
+        .expect("failed to get rows")
+}
+
+#[framed]
+pub(crate) async fn create_keyspace(session: &Session) -> String {
+    let keyspace = format!("ks_{}", Uuid::new_v4().simple());
+
+    // Create keyspace with replication factor of 3 for the 3-node cluster
+    session.query_unpaged(
+        format!("CREATE KEYSPACE {keyspace} WITH replication = {{'class': 'NetworkTopologyStrategy', 'replication_factor': 3}}"),
+        (),
+    ).await.expect("failed to create a keyspace");
+
+    // Use keyspace
+    session
+        .use_keyspace(&keyspace, false)
+        .await
+        .expect("failed to use a keyspace");
+
+    keyspace
+}
+
+#[framed]
+pub(crate) async fn create_table(
+    session: &Session,
+    columns: &str,
+    options: Option<&str>,
+) -> String {
+    let table = format!("tbl_{}", Uuid::new_v4().simple());
+
+    let extra = if let Some(options) = options {
+        format!("WITH {options}")
+    } else {
+        String::new()
+    };
+
+    // Create table
+    session
+        .query_unpaged(format!("CREATE TABLE {table} ({columns}) {extra}"), ())
+        .await
+        .expect("failed to create a table");
+
+    table
+}
+
+#[framed]
+pub(crate) async fn create_index(
+    session: &Session,
+    clients: &[HttpClient],
+    table: &str,
+    column: &str,
+) -> IndexInfo {
+    let index = format!("idx_{}", Uuid::new_v4().simple());
+
+    // Create index
+    session
+        .query_unpaged(
+            format!("CREATE INDEX {index} ON {table}({column}) USING 'vector_index'"),
+            (),
+        )
+        .await
+        .expect("failed to create an index");
+
+    // Wait for the index to be created
+    wait_for(
+        || async {
+            for client in clients.iter() {
+                if !client
+                    .indexes()
+                    .await
+                    .iter()
+                    .any(|idx| idx.index.to_string() == index)
+                {
+                    return false;
+                }
+            }
+            true
+        },
+        "Waiting for the first index to be created",
+        Duration::from_secs(10),
+    )
+    .await;
+
+    clients
+        .first()
+        .expect("No vector store clients provided")
+        .indexes()
+        .await
+        .into_iter()
+        .find(|idx| idx.index.to_string() == index)
+        .expect("index not found")
+}
--- a/test/vector_search_validator/crates/validator-scylla/src/cql.rs
+++ b/test/vector_search_validator/crates/validator-scylla/src/cql.rs
@@ -3,12 +3,13 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
 */

-use std::time::Duration;
-use vector_search_validator_tests::common;
-use vector_search_validator_tests::*;
+use crate::common;
+use async_backtrace::framed;
+use vector_search_validator_tests::TestCase;

+#[framed]
 pub(crate) async fn new() -> TestCase {
-    let timeout = Duration::from_secs(30);
+    let timeout = common::DEFAULT_TEST_TIMEOUT;
    TestCase::empty()
        .with_init(timeout, common::init)
        .with_cleanup(timeout, common::cleanup)
--- a/test/vector_search_validator/crates/validator-scylla/src/high_availability.rs
+++ b/test/vector_search_validator/crates/validator-scylla/src/high_availability.rs
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+use crate::common;
+use async_backtrace::framed;
+use tracing::info;
+use vector_search_validator_tests::ScyllaClusterExt;
+use vector_search_validator_tests::ScyllaNodeConfig;
+use vector_search_validator_tests::TestActors;
+use vector_search_validator_tests::TestCase;
+use vector_search_validator_tests::VectorStoreNodeConfig;
+
+#[framed]
+pub(crate) async fn new() -> TestCase {
+    let timeout = common::DEFAULT_TEST_TIMEOUT;
+    TestCase::empty()
+        .with_cleanup(timeout, common::cleanup)
+        .with_test(
+            "secondary_uri_works_correctly",
+            timeout,
+            test_secondary_uri_works_correctly,
+        )
+}
+
+#[framed]
+async fn test_secondary_uri_works_correctly(actors: TestActors) {
+    info!("started");
+
+    let vs_urls = common::get_default_vs_urls(&actors).await;
+    let vs_url = &vs_urls[0];
+
+    let scylla_configs: Vec<ScyllaNodeConfig> = vec![
+        ScyllaNodeConfig {
+            db_ip: actors.services_subnet.ip(common::DB_OCTET_1),
+            primary_vs_uris: vec![vs_url.clone()],
+            secondary_vs_uris: vec![],
+        },
+        ScyllaNodeConfig {
+            db_ip: actors.services_subnet.ip(common::DB_OCTET_2),
+            primary_vs_uris: vec![],
+            secondary_vs_uris: vec![vs_url.clone()],
+        },
+        ScyllaNodeConfig {
+            db_ip: actors.services_subnet.ip(common::DB_OCTET_3),
+            primary_vs_uris: vec![],
+            secondary_vs_uris: vec![vs_url.clone()],
+        },
+    ];
+    let vs_configs = vec![VectorStoreNodeConfig {
+        vs_ip: actors.services_subnet.ip(common::VS_OCTET_1),
+        db_ip: actors.services_subnet.ip(common::DB_OCTET_1),
+        envs: Default::default(),
+    }];
+    common::init_with_config(actors.clone(), scylla_configs, vs_configs).await;
+
+    let vs_ips = vec![actors.services_subnet.ip(common::VS_OCTET_1)];
+    let (session, clients) = common::prepare_connection_with_custom_vs_ips(&actors, vs_ips).await;
+
+    let keyspace = common::create_keyspace(&session).await;
+    let table =
+        common::create_table(&session, "pk INT PRIMARY KEY, v VECTOR<FLOAT, 3>", None).await;
+
+    // Insert vectors
+    for i in 0..100 {
+        let embedding = vec![i as f32, (i * 2) as f32, (i * 3) as f32];
+        session
+            .query_unpaged(
+                format!("INSERT INTO {table} (pk, v) VALUES (?, ?)"),
+                (i, &embedding),
+            )
+            .await
+            .expect("failed to insert data");
+    }
+
+    let index = common::create_index(&session, &clients, &table, "v").await;
+
+    for client in &clients {
+        let index_status = common::wait_for_index(&client, &index).await;
+
+        assert_eq!(
+            index_status.count, 100,
+            "Expected 100 vectors to be indexed"
+        );
+    }
+
+    // Down the first node with primary URI
+    let first_node_ip = actors.services_subnet.ip(common::DB_OCTET_1);
+    info!("Bringing down node {first_node_ip}");
+    actors.db.down_node(first_node_ip).await;
+
+    // Should work via secondary URIs
+    let results = common::get_query_results(
+        format!("SELECT pk FROM {table} ORDER BY v ANN OF [0.0, 0.0, 0.0] LIMIT 10"),
+        &session,
+    )
+    .await;
+
+    let rows = results
+        .rows::<(i32,)>()
+        .expect("failed to get rows after node down");
+    assert!(
+        rows.rows_remaining() <= 10,
+        "Expected at most 10 results from ANN query after node down"
+    );
+
+    // Drop keyspace
+    session
+        .query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
+        .await
+        .expect("failed to drop a keyspace");
+
+    info!("finished");
+}
--- a/test/vector_search_validator/crates/validator-scylla/src/lib.rs
+++ b/test/vector_search_validator/crates/validator-scylla/src/lib.rs
@@ -3,12 +3,19 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
 */

+mod common;
 mod cql;
+mod high_availability;

+use async_backtrace::framed;
 use vector_search_validator_tests::TestCase;

+#[framed]
 pub async fn test_cases() -> impl Iterator<Item = (String, TestCase)> {
-    vec![("cql", cql::new().await)]
-        .into_iter()
-        .map(|(name, test_case)| (name.to_string(), test_case))
+    vec![
+        ("scylla_cql", cql::new().await),
+        ("scylla_high_availability", high_availability::new().await),
+    ]
+    .into_iter()
+    .map(|(name, test_case)| (name.to_string(), test_case))
 }
--- a/tools/cqlsh
+++ b/tools/cqlsh
--- a/tools/scylla-sstable.cc
+++ b/tools/scylla-sstable.cc
@@ -819,7 +819,7 @@ public:
    virtual compaction::compaction_strategy_state& get_compaction_strategy_state() noexcept override { return _compaction_strategy_state; }
    virtual reader_permit make_compaction_reader_permit() const override { return _permit; }
    virtual sstables::sstables_manager& get_sstables_manager() noexcept override { return _sst_man; }
-    virtual sstables::shared_sstable make_sstable() const override { return do_make_sstable(); }
+    virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const override { return do_make_sstable(); }
    virtual sstables::sstable_writer_config configure_writer(sstring origin) const override { return do_configure_writer(std::move(origin)); }
    virtual api::timestamp_type min_memtable_timestamp() const override { return api::min_timestamp; }
    virtual api::timestamp_type min_memtable_live_timestamp() const override { return api::min_timestamp; }
@@ -909,7 +909,7 @@ void scrub_operation(schema_ptr schema, reader_permit permit, const std::vector<

    auto compaction_descriptor = compaction::compaction_descriptor(std::move(sstables));
    compaction_descriptor.options = compaction::compaction_type_options::make_scrub(scrub_mode, compaction::compaction_type_options::scrub::quarantine_invalid_sstables::no);
-    compaction_descriptor.creator = [&compaction_group_view] (shard_id) { return compaction_group_view.make_sstable(); };
+    compaction_descriptor.creator = [&compaction_group_view] (shard_id) { return compaction_group_view.make_sstable(sstables::sstable_state::normal); };
    compaction_descriptor.replacer = [] (compaction::compaction_completion_desc) { };

    auto compaction_data = compaction::compaction_data{};
--- a/tools/toolchain/image
+++ b/tools/toolchain/image
@@ -1 +1 @@
-docker.io/scylladb/scylla-toolchain:fedora-43-20251208
+docker.io/scylladb/scylla-toolchain:fedora-43-20251217
--- a/transport/controller.cc
+++ b/transport/controller.cc
@@ -353,7 +353,7 @@ future<> controller::set_cql_ready(bool ready) {
    return _gossiper.local().add_local_application_state(gms::application_state::RPC_READY, gms::versioned_value::cql_ready(ready));
 }

-future<utils::chunked_vector<client_data>> controller::get_client_data() {
+future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> controller::get_client_data() {
    return _server ? _server->local().get_client_data() : protocol_server::get_client_data();
 }

--- a/transport/controller.hh
+++ b/transport/controller.hh
@@ -77,7 +77,7 @@ public:
    virtual future<> start_server() override;
    virtual future<> stop_server() override;
    virtual future<> request_stop_server() override;
-    virtual future<utils::chunked_vector<client_data>> get_client_data() override;
+    virtual future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data() override;
    future<> update_connections_scheduling_group();

    future<std::vector<connection_service_level_params>> get_connections_service_level_params();
--- a/transport/protocol_server.hh
+++ b/transport/protocol_server.hh
@@ -10,6 +10,7 @@

 #include "seastarx.hh"
 #include <seastar/core/future.hh>
+#include <seastar/core/sharded.hh>
 #include <seastar/net/socket_defs.hh>
 #include <vector>
 #include "client_data.hh"
@@ -43,8 +44,8 @@ public:
    /// This variant is used by the REST API so failure is acceptable.
    virtual future<> request_stop_server() = 0;

-    virtual future<utils::chunked_vector<client_data>> get_client_data() {
-        return make_ready_future<utils::chunked_vector<client_data>>(utils::chunked_vector<client_data>());
+    virtual future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data() {
+        return make_ready_future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>>();
    }

    protocol_server(seastar::scheduling_group sg) noexcept : _sched_group(std::move(sg)) {}
--- a/transport/server.cc
+++ b/transport/server.cc
@@ -691,6 +691,7 @@ client_data cql_server::connection::make_client_data() const {
        cd.connection_stage = client_connection_stage::authenticating;
    }
    cd.scheduling_group_name = _current_scheduling_group.name();
+    cd.client_options = _client_state.get_client_options();

    cd.ssl_enabled = _ssl_enabled;
    cd.ssl_protocol = _ssl_protocol;
@@ -958,12 +959,17 @@ future<std::unique_ptr<cql_server::response>> cql_server::connection::process_st
    }

    if (auto driver_ver_opt = options.find("DRIVER_VERSION"); driver_ver_opt != options.end()) {
-        _client_state.set_driver_version(driver_ver_opt->second);
+        co_await _client_state.set_driver_version(_server._connection_options_keys_and_values, driver_ver_opt->second);
    }
    if (auto driver_name_opt = options.find("DRIVER_NAME"); driver_name_opt != options.end()) {
-        _client_state.set_driver_name(driver_name_opt->second);
+        co_await _client_state.set_driver_name(_server._connection_options_keys_and_values, driver_name_opt->second);
    }

+    // Store all received client options for later exposure in the system.clients 'client_options' column
+    // (a frozen map<text, text>). Options are cached to reduce memory overhead by deduplicating
+    // identical key/value sets across multiple connections (e.g., same driver name/version).
+    co_await _client_state.set_client_options(_server._connection_options_keys_and_values, options);
+
    cql_protocol_extension_enum_set cql_proto_exts;
    for (cql_protocol_extension ext : supported_cql_protocol_extensions()) {
        if (options.contains(protocol_extension_name(ext))) {
@@ -1647,6 +1653,9 @@ std::unique_ptr<cql_server::response> cql_server::connection::make_supported(int
    opts.insert({"CQL_VERSION", cql3::query_processor::CQL_VERSION});
    opts.insert({"COMPRESSION", "lz4"});
    opts.insert({"COMPRESSION", "snappy"});
+    // CLIENT_OPTIONS value is a JSON string that can be used to pass client-specific configuration,
+    // e.g. CQL driver configuration.
+    opts.insert({"CLIENT_OPTIONS", ""});
    if (_server._config.allow_shard_aware_drivers) {
        opts.insert({"SCYLLA_SHARD", format("{:d}", this_shard_id())});
        opts.insert({"SCYLLA_NR_SHARDS", format("{:d}", smp::count)});
@@ -2308,11 +2317,11 @@ const cql3::cql_metadata_id_type& cql_metadata_id_wrapper::get_response_metadata
    return _response_metadata_id.value();
 }

-future<utils::chunked_vector<client_data>> cql_server::get_client_data() {
-    utils::chunked_vector<client_data> ret;
+future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> cql_server::get_client_data() {
+    utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>> ret;
    co_await for_each_gently([&ret] (const generic_server::connection& c) {
        const connection& conn = dynamic_cast<const connection&>(c);
-        ret.emplace_back(conn.make_client_data());
+        ret.emplace_back(make_foreign(std::make_unique<client_data>(conn.make_client_data())));
    });
    co_return ret;
 }
--- a/transport/server.hh
+++ b/transport/server.hh
@@ -206,6 +206,7 @@ private:
    seastar::metrics::metric_groups _metrics;
    std::unique_ptr<event_notifier> _notifier;
 private:
+    client_options_cache_type _connection_options_keys_and_values;
    transport_stats _stats;
    auth::service& _auth_service;
    qos::service_level_controller& _sl_controller;
@@ -234,7 +235,7 @@ public:
        return scheduling_group_get_specific<cql_sg_stats>(_stats_key).get_cql_opcode_stats(op);
    }

-    future<utils::chunked_vector<client_data>> get_client_data();
+    future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data();
    future<> update_connections_scheduling_group();
    future<> update_connections_service_level_params();
    future<std::vector<connection_service_level_params>> get_connections_service_level_params();
--- a/utils/logalloc.cc
+++ b/utils/logalloc.cc
@@ -1547,8 +1547,8 @@ void reclaim_timer::report() const noexcept {
    auto time_level = _stall_detected ? log_level::warn : log_level::debug;
    auto info_level = _stall_detected ? log_level::info : log_level::debug;
    auto MiB = 1024*1024;
-    auto msg_extra = extra_msg_when_stall_detected(_stall_detected,
-                                                   _stall_detected ? current_backtrace() : saved_backtrace{});
+    auto msg_extra = extra_msg_when_stall_detected(_stall_detected && !_preemptible,
+                                                   (_stall_detected && !_preemptible) ? current_backtrace() : saved_backtrace{});

    timing_logger.log(time_level, "{} took {} us, trying to release {:.3f} MiB {}preemptibly, reserve: {{goal: {}, max: {}}}{}",
                        _name, (_duration + 500ns) / 1us, (float)_memory_to_release / MiB, _preemptible ? "" : "non-",