From 43b48c59fdefa5e261fc645f0967816e9f003a78 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Wed, 7 Jun 2023 12:11:42 +0200 Subject: [PATCH 1/3] raft: replication test: print a warning when handling `commit_status_unknown` `commit_status_unknown` may lead to double application and then a hard-to-debug failure. But some tests actually rely on retrying it, so print a warning and leave a FIXME for maybe a better future solution. Ref: #14029 --- test/raft/replication.hh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/raft/replication.hh b/test/raft/replication.hh index 9e6c4852e8..88ea308759 100644 --- a/test/raft/replication.hh +++ b/test/raft/replication.hh @@ -950,6 +950,13 @@ future<> raft_cluster::add_entry(size_t val, std::optional server co_await at->add_entry(create_command(val), raft::wait_type::committed); break; } catch (raft::commit_status_unknown& e) { + // FIXME: in some cases when we get `commit_status_unknown` the entry may have been applied. + // Retrying it could lead to double application which causes hard to debug failures, e.g. #14029. + // For now we leave a warning so the logs give a hint if such a failure happens and we need + // to debug it. Ideally we would never have to handle `commit_status_unknown` but some replication + // tests rely on retrying it during leader changes etc. + tlogger.warn("replication_test: got `commit_status_unknown` from `add_entry`" + ", val: {}, server: {}", val, server); } catch (raft::dropped_entry& e) { // retry if an entry is dropped because the leader have changed after it was submitted } From 2fea2fc19c1a1f41a0533fdd3c6e30296eb5f3ac Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Wed, 7 Jun 2023 14:14:39 +0200 Subject: [PATCH 2/3] raft: replication test: don't hang if `_seen` overshots `_apply_entries` As in the previous commit, if a command gets doubly applied due to `commit_status_unknown`, this will could lead to hard-to-debug failures; one of them was the test hanging because we would never call `_done.set_value()` in `state_machine::apply` due to `_seen` overshooting `_apply_entries`. Fix the problem and print a warning if we apply too many commands. Fixes: #14072 --- test/raft/replication.hh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/raft/replication.hh b/test/raft/replication.hh index 88ea308759..f91204ffdd 100644 --- a/test/raft/replication.hh +++ b/test/raft/replication.hh @@ -407,7 +407,11 @@ public: future<> apply(const std::vector commands) override { auto n = _apply(_id, commands, hasher); _seen += n; - if (n && _seen == _apply_entries) { + if (n && _seen >= _apply_entries) { + if (_seen > _apply_entries) { + // Retrying `commit_status_unknown` may lead to this. Ref: #14072 + tlogger.warn("sm::apply[{}]: _seen ({}) overshot _apply_entries ({})", _id, _seen, _apply_entries); + } _done.set_value(); } tlogger.debug("sm::apply[{}] got {}/{} entries", _id, _seen, _apply_entries); From 5504da374538061e7414eb512c6001e4a8487a6f Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Wed, 7 Jun 2023 11:21:39 +0200 Subject: [PATCH 3/3] raft: server: throw fewer `commit_status_unknown`s from `wait_for_entry` There are some cases where we can deduce that the entry was committed, but we were throwing `commit_status_unknown`. Handle one more such case. The added comment explains it in detail. Also add a FIXME for another case where we throw `commit_status_unknown` but we could do better. Fixes: #14029 --- raft/server.cc | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/raft/server.cc b/raft/server.cc index d29f47b01c..06268bc96a 100644 --- a/raft/server.cc +++ b/raft/server.cc @@ -432,6 +432,30 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor _stats.waiters_awoken++; if (!term) { + // The entry at index `eid.idx` got truncated away. + // Still, if the last snapshot's term is the same as `eid.term`, we can deduce + // that our entry `eid` got committed at index `eid.idx` and not some different entry. + // Indeed, let `snp_idx` be the last snapshot index (`snp_idx >= eid.idx`). Consider + // the entry that was committed at `snp_idx`; it had the same term as the snapshot's term, + // `snp_term`. If `eid.term == snp_term`, then we know that the entry at `snp_idx` was + // created by the same leader as the entry `eid`. A leader doesn't replace an entry + // that it previously appended, so when it appended the `snp_idx` entry, the entry at + // `eid.idx` was still `eid`. By the Log Matching Property, every log that had the entry + // `(snp_idx, snp_term)` also had the entry `eid`. Thus when the snapshot at `snp_idx` + // was created, it included the entry `eid`. + auto snap_idx = _fsm->log_last_snapshot_idx(); + auto snap_term = _fsm->log_term_for(snap_idx); + assert(snap_term); + assert(snap_idx >= eid.idx); + if (type == wait_type::committed && snap_term == eid.term) { + logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away, but has the snapshot's term" + " (snapshot index: {})", id(), eid.term, eid.idx, snap_idx); + co_return; + + // We don't do this for `wait_type::applied` - see below why. + } + + logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away", id(), eid.term, eid.idx); throw commit_status_unknown(); } @@ -444,6 +468,12 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor // and we don't know if the entry was applied with `state_machine::apply` // (we may've loaded a snapshot before we managed to apply the entry). // As specified by `add_entry`, throw `commit_status_unknown` in this case. + // + // FIXME: replace this with a different exception type - `commit_status_unknown` + // gives too much uncertainty while we know that the entry was committed + // and had to be applied on at least one server. Some callers of `add_entry` + // need to know only that the current state includes that entry, whether it was done + // through `apply` on this server or through receiving a snapshot. throw commit_status_unknown(); }