service/storage_proxy: schedule_repair(): materialize the range into a vector

Said method passes down its `diff` input to `mutate_internal()`, after
some std::ranges massaging. Said massaging is destructive -- it moves
items from the diff. If the output range is iterated-over multiple
times, only the first time will see the actual output, further
iterations will get an empty range.
When trace-level logging is enabled, this is exactly what happens:
`mutate_internal()` iterates over the range multiple times, first to log
its content, then to pass it down the stack. This ends up resulting in
a range with moved-from elements being pased down and consequently write
handlers being created with nullopt mutations.

Make the range re-entrant by materializing it into a vector before
passing it to `mutate_internal()`.

Fixes: scylladb/scylladb#21907
Fixes: scylladb/scylladb#21714

Closes scylladb/scylladb#21910
This commit is contained in:
Botond Dénes
2024-12-12 23:37:29 -05:00
committed by Avi Kivity
parent 6e1fb2c74e
commit 7150442f6a
2 changed files with 36 additions and 1 deletions

View File

@@ -4401,7 +4401,13 @@ future<result<>> storage_proxy::schedule_repair(locator::effective_replication_m
if (diffs.empty()) {
return make_ready_future<result<>>(bo::success());
}
return mutate_internal(diffs | std::views::values | std::views::transform([ermp] (auto& v) { return read_repair_mutation{std::move(v), ermp}; }), cl, false, std::move(trace_state), std::move(permit));
return mutate_internal(
diffs |
std::views::values |
std::views::transform([ermp] (auto& v) { return read_repair_mutation{std::move(v), ermp}; }) |
// The transform above is destructive, materialize into a vector to make the range re-iterable.
std::ranges::to<std::vector<read_repair_mutation>>()
, cl, false, std::move(trace_state), std::move(permit));
}
class abstract_read_resolver {

View File

@@ -328,3 +328,32 @@ async def test_incremental_read_repair(data_class, workdir, manager):
logger.info("Check rows with CL=ONE after read-repair")
check_rows(cql, host1, all_rows)
check_rows(cql, host2, all_rows)
@pytest.mark.asyncio
async def test_read_repair_with_trace_logging(request, manager):
logger.info("Creating a new cluster")
cmdline = ["--hinted-handoff-enabled", "0", "--logger-log-level", "mutation_data=trace"]
for i in range(2):
await manager.server_add(cmdline=cmdline)
cql = manager.get_cql()
srvs = await manager.running_servers()
await wait_for_cql_and_get_hosts(cql, srvs, time.time() + 60)
await cql.run_async("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2};")
await cql.run_async("CREATE TABLE ks.t (pk bigint PRIMARY KEY, c int);")
await cql.run_async("INSERT INTO ks.t (pk, c) VALUES (0, 0)")
await manager.server_stop(srvs[0].server_id)
prepared = cql.prepare("INSERT INTO ks.t (pk, c) VALUES (0, 1)")
prepared.consistency_level = ConsistencyLevel.ONE
await cql.run_async(prepared)
await manager.server_start(srvs[0].server_id)
prepared = cql.prepare("SELECT * FROM ks.t WHERE pk = 0")
prepared.consistency_level = ConsistencyLevel.ALL
await cql.run_async(prepared)