db/batchlog_manager: bypass cache when scanning batchlog table

Scans should not pollute the cache with cold data, in general. In the case of the batchlog table, there is another reason to bypass the cache: this table can have a lot of partition tombstones, which currently are not purged from the cache. So in certain cases, using the cache can make batch replay very slow, because it has to scan past tombstones of already replayed batches.
2024-06-19 09:43:01 -04:00
parent 29f610d861
commit 31c0fa07d8
1 changed files with 2 additions and 2 deletions
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -134,7 +134,7 @@ future<> db::batchlog_manager::stop() {
 }

 future<size_t> db::batchlog_manager::count_all_batches() const {
-    sstring query = format("SELECT count(*) FROM {}.{}", system_keyspace::NAME, system_keyspace::BATCHLOG);
+    sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
    return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
       return size_t(rs->one().get_as<int64_t>("count"));
    });
@@ -260,7 +260,7 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
    return seastar::with_gate(_gate, [this, batch = std::move(batch)] () mutable {
        blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
        return _qp.query_internal(
-                format("SELECT id, data, written_at, version FROM {}.{})", system_keyspace::NAME, system_keyspace::BATCHLOG),
+                format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
                db::consistency_level::ONE,
                {},
                page_size,