From 43b48c59fdefa5e261fc645f0967816e9f003a78 Mon Sep 17 00:00:00 2001
From: Kamil Braun <kbraun@scylladb.com>
Date: Wed, 7 Jun 2023 12:11:42 +0200
Subject: [PATCH 1/3] raft: replication test: print a warning when handling
 `commit_status_unknown`

`commit_status_unknown` may lead to double application and then a
hard-to-debug failure. But some tests actually rely on retrying it, so
print a warning and leave a FIXME for maybe a better future solution.

Ref: #14029
---
 test/raft/replication.hh | 7 +++++++
 1 file changed, 7 insertions(+)
diff --git a/test/raft/replication.hh b/test/raft/replication.hh
index 9e6c4852e8..88ea308759 100644
--- a/test/raft/replication.hh
+++ b/test/raft/replication.hh
@@ -950,6 +950,13 @@ future<> raft_cluster<Clock>::add_entry(size_t val, std::optional<size_t> server
             co_await at->add_entry(create_command(val), raft::wait_type::committed);
             break;
         } catch (raft::commit_status_unknown& e) {
+            // FIXME: in some cases when we get `commit_status_unknown` the entry may have been applied.
+            // Retrying it could lead to double application which causes hard to debug failures, e.g. #14029.
+            // For now we leave a warning so the logs give a hint if such a failure happens and we need
+            // to debug it. Ideally we would never have to handle `commit_status_unknown` but some replication
+            // tests rely on retrying it during leader changes etc.
+            tlogger.warn("replication_test: got `commit_status_unknown` from `add_entry`"
+                    ", val: {}, server: {}", val, server);
         } catch (raft::dropped_entry& e) {
             // retry if an entry is dropped because the leader have changed after it was submitted
         }

From 2fea2fc19c1a1f41a0533fdd3c6e30296eb5f3ac Mon Sep 17 00:00:00 2001
From: Kamil Braun <kbraun@scylladb.com>
Date: Wed, 7 Jun 2023 14:14:39 +0200
Subject: [PATCH 2/3] raft: replication test: don't hang if `_seen` overshots
 `_apply_entries`

As in the previous commit, if a command gets doubly applied due to
`commit_status_unknown`, this will could lead to hard-to-debug failures;
one of them was the test hanging because we would never call
`_done.set_value()` in `state_machine::apply` due to `_seen`
overshooting `_apply_entries`.

Fix the problem and print a warning if we apply too many commands.

Fixes: #14072
---
 test/raft/replication.hh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/raft/replication.hh b/test/raft/replication.hh
index 88ea308759..f91204ffdd 100644
--- a/test/raft/replication.hh
+++ b/test/raft/replication.hh
@@ -407,7 +407,11 @@ public:
     future<> apply(const std::vector<raft::command_cref> commands) override {
         auto n = _apply(_id, commands, hasher);
         _seen += n;
-        if (n && _seen == _apply_entries) {
+        if (n && _seen >= _apply_entries) {
+            if (_seen > _apply_entries) {
+                // Retrying `commit_status_unknown` may lead to this. Ref: #14072
+                tlogger.warn("sm::apply[{}]: _seen ({}) overshot _apply_entries ({})", _id, _seen, _apply_entries);
+            }
             _done.set_value();
         }
         tlogger.debug("sm::apply[{}] got {}/{} entries", _id, _seen, _apply_entries);

From 5504da374538061e7414eb512c6001e4a8487a6f Mon Sep 17 00:00:00 2001
From: Kamil Braun <kbraun@scylladb.com>
Date: Wed, 7 Jun 2023 11:21:39 +0200
Subject: [PATCH 3/3] raft: server: throw fewer `commit_status_unknown`s from
 `wait_for_entry`

There are some cases where we can deduce that the entry was committed,
but we were throwing `commit_status_unknown`. Handle one more such case.
The added comment explains it in detail.

Also add a FIXME for another case where we throw `commit_status_unknown`
but we could do better.

Fixes: #14029
---
 raft/server.cc | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/raft/server.cc b/raft/server.cc
index d29f47b01c..06268bc96a 100644
--- a/raft/server.cc
+++ b/raft/server.cc
@@ -432,6 +432,30 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor
             _stats.waiters_awoken++;
 
             if (!term) {
+                // The entry at index `eid.idx` got truncated away.
+                // Still, if the last snapshot's term is the same as `eid.term`, we can deduce
+                // that our entry `eid` got committed at index `eid.idx` and not some different entry.
+                // Indeed, let `snp_idx` be the last snapshot index (`snp_idx >= eid.idx`). Consider
+                // the entry that was committed at `snp_idx`; it had the same term as the snapshot's term,
+                // `snp_term`. If `eid.term == snp_term`, then we know that the entry at `snp_idx` was
+                // created by the same leader as the entry `eid`. A leader doesn't replace an entry
+                // that it previously appended, so when it appended the `snp_idx` entry, the entry at
+                // `eid.idx` was still `eid`. By the Log Matching Property, every log that had the entry
+                // `(snp_idx, snp_term)` also had the entry `eid`. Thus when the snapshot at `snp_idx`
+                // was created, it included the entry `eid`.
+                auto snap_idx = _fsm->log_last_snapshot_idx();
+                auto snap_term = _fsm->log_term_for(snap_idx);
+                assert(snap_term);
+                assert(snap_idx >= eid.idx);
+                if (type == wait_type::committed && snap_term == eid.term) {
+                    logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away, but has the snapshot's term"
+                                 " (snapshot index: {})", id(), eid.term, eid.idx, snap_idx);
+                    co_return;
+
+                    // We don't do this for `wait_type::applied` - see below why.
+                }
+
+                logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away", id(), eid.term, eid.idx);
                 throw commit_status_unknown();
             }
 
@@ -444,6 +468,12 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor
                 // and we don't know if the entry was applied with `state_machine::apply`
                 // (we may've loaded a snapshot before we managed to apply the entry).
                 // As specified by `add_entry`, throw `commit_status_unknown` in this case.
+                //
+                // FIXME: replace this with a different exception type - `commit_status_unknown`
+                // gives too much uncertainty while we know that the entry was committed
+                // and had to be applied on at least one server. Some callers of `add_entry`
+                // need to know only that the current state includes that entry, whether it was done
+                // through `apply` on this server or through receiving a snapshot.
                 throw commit_status_unknown();
             }