Merge 'raft: server: throw fewer commit_status_unknowns from wait_for_entry' from Kamil Braun
There are some cases where we can deduce that the entry was committed, but we were throwing `commit_status_unknown`. Handle one more such case. The added comment explains it in detail. Also add a FIXME for another case where we throw `commit_status_unknown` but we could do better. Fixes: #14029 Fixes: #14072 Closes #14167 * github.com:scylladb/scylladb: raft: server: throw fewer `commit_status_unknown`s from `wait_for_entry` raft: replication test: don't hang if `_seen` overshots `_apply_entries` raft: replication test: print a warning when handling `commit_status_unknown`
This commit is contained in:
@@ -432,6 +432,30 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor
|
||||
_stats.waiters_awoken++;
|
||||
|
||||
if (!term) {
|
||||
// The entry at index `eid.idx` got truncated away.
|
||||
// Still, if the last snapshot's term is the same as `eid.term`, we can deduce
|
||||
// that our entry `eid` got committed at index `eid.idx` and not some different entry.
|
||||
// Indeed, let `snp_idx` be the last snapshot index (`snp_idx >= eid.idx`). Consider
|
||||
// the entry that was committed at `snp_idx`; it had the same term as the snapshot's term,
|
||||
// `snp_term`. If `eid.term == snp_term`, then we know that the entry at `snp_idx` was
|
||||
// created by the same leader as the entry `eid`. A leader doesn't replace an entry
|
||||
// that it previously appended, so when it appended the `snp_idx` entry, the entry at
|
||||
// `eid.idx` was still `eid`. By the Log Matching Property, every log that had the entry
|
||||
// `(snp_idx, snp_term)` also had the entry `eid`. Thus when the snapshot at `snp_idx`
|
||||
// was created, it included the entry `eid`.
|
||||
auto snap_idx = _fsm->log_last_snapshot_idx();
|
||||
auto snap_term = _fsm->log_term_for(snap_idx);
|
||||
assert(snap_term);
|
||||
assert(snap_idx >= eid.idx);
|
||||
if (type == wait_type::committed && snap_term == eid.term) {
|
||||
logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away, but has the snapshot's term"
|
||||
" (snapshot index: {})", id(), eid.term, eid.idx, snap_idx);
|
||||
co_return;
|
||||
|
||||
// We don't do this for `wait_type::applied` - see below why.
|
||||
}
|
||||
|
||||
logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away", id(), eid.term, eid.idx);
|
||||
throw commit_status_unknown();
|
||||
}
|
||||
|
||||
@@ -444,6 +468,12 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor
|
||||
// and we don't know if the entry was applied with `state_machine::apply`
|
||||
// (we may've loaded a snapshot before we managed to apply the entry).
|
||||
// As specified by `add_entry`, throw `commit_status_unknown` in this case.
|
||||
//
|
||||
// FIXME: replace this with a different exception type - `commit_status_unknown`
|
||||
// gives too much uncertainty while we know that the entry was committed
|
||||
// and had to be applied on at least one server. Some callers of `add_entry`
|
||||
// need to know only that the current state includes that entry, whether it was done
|
||||
// through `apply` on this server or through receiving a snapshot.
|
||||
throw commit_status_unknown();
|
||||
}
|
||||
|
||||
|
||||
@@ -407,7 +407,11 @@ public:
|
||||
future<> apply(const std::vector<raft::command_cref> commands) override {
|
||||
auto n = _apply(_id, commands, hasher);
|
||||
_seen += n;
|
||||
if (n && _seen == _apply_entries) {
|
||||
if (n && _seen >= _apply_entries) {
|
||||
if (_seen > _apply_entries) {
|
||||
// Retrying `commit_status_unknown` may lead to this. Ref: #14072
|
||||
tlogger.warn("sm::apply[{}]: _seen ({}) overshot _apply_entries ({})", _id, _seen, _apply_entries);
|
||||
}
|
||||
_done.set_value();
|
||||
}
|
||||
tlogger.debug("sm::apply[{}] got {}/{} entries", _id, _seen, _apply_entries);
|
||||
@@ -950,6 +954,13 @@ future<> raft_cluster<Clock>::add_entry(size_t val, std::optional<size_t> server
|
||||
co_await at->add_entry(create_command(val), raft::wait_type::committed);
|
||||
break;
|
||||
} catch (raft::commit_status_unknown& e) {
|
||||
// FIXME: in some cases when we get `commit_status_unknown` the entry may have been applied.
|
||||
// Retrying it could lead to double application which causes hard to debug failures, e.g. #14029.
|
||||
// For now we leave a warning so the logs give a hint if such a failure happens and we need
|
||||
// to debug it. Ideally we would never have to handle `commit_status_unknown` but some replication
|
||||
// tests rely on retrying it during leader changes etc.
|
||||
tlogger.warn("replication_test: got `commit_status_unknown` from `add_entry`"
|
||||
", val: {}, server: {}", val, server);
|
||||
} catch (raft::dropped_entry& e) {
|
||||
// retry if an entry is dropped because the leader have changed after it was submitted
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user