repair: Prevent CPU stall during cross-shard row copy and destruction
When handling `repair_stream_cmd::end_of_current_rows`, passing the foreign list directly to `put_row_diff_handler` triggered a massive synchronous deep copy on the destination shard. Additionally, destroying the list triggered a synchronous deallocation on the source shard. This blocked the reactor and triggered the CPU stall detector. This commit fixes the issue by introducing `clone_gently()` to copy the list elements one by one, and leveraging the existing `utils::clear_gently()` to destroy them. Both utilize `seastar::coroutine::maybe_yield()` to allow the reactor to breathe during large cross-shard transfers and cleanups. Fixes SCYLLADB-403 Closes scylladb/scylladb#28979
This commit is contained in:
@@ -2362,6 +2362,15 @@ static future<> repair_get_row_diff_with_rpc_stream_process_op_slow_path(
|
||||
}
|
||||
}
|
||||
|
||||
static future<repair_rows_on_wire> clone_gently(const repair_rows_on_wire& rows) {
|
||||
repair_rows_on_wire cloned;
|
||||
for (const auto& row : rows) {
|
||||
cloned.push_back(row);
|
||||
co_await seastar::coroutine::maybe_yield();
|
||||
}
|
||||
co_return cloned;
|
||||
}
|
||||
|
||||
static future<> repair_put_row_diff_with_rpc_stream_process_op(
|
||||
sharded<repair_service>& repair,
|
||||
locator::host_id from,
|
||||
@@ -2388,7 +2397,9 @@ static future<> repair_put_row_diff_with_rpc_stream_process_op(
|
||||
co_await rm->put_row_diff_handler(std::move(*fp));
|
||||
rm->set_repair_state_for_local_node(repair_state::put_row_diff_with_rpc_stream_finished);
|
||||
} else {
|
||||
co_await rm->put_row_diff_handler(*fp);
|
||||
// Gently clone to avoid copy stall on destination shard
|
||||
repair_rows_on_wire local_rows = co_await clone_gently(*fp);
|
||||
co_await seastar::when_all_succeed(rm->put_row_diff_handler(std::move(local_rows)), utils::clear_gently(fp));
|
||||
rm->set_repair_state_for_local_node(repair_state::put_row_diff_with_rpc_stream_finished);
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user