Merge 'Add more logging for gossiper::lock_endpoint and storage_service::handle_state_normal' from Kamil Braun

In a longevity test reported in scylladb/scylladb#16668 we observed that NORMAL state is not being properly handled for a node that replaced another node. Either handle_state_normal is not being called, or it is but getting stuck in the middle. Which is the case couldn't be determined from the logs, and attempts at creating a local reproducer failed. Thus the plan is to continue debugging using the longevity test, but we need more logs. To check whether `handle_state_normal` was called and which branches were taken, include some INFO level logs there. Also, detect deadlocks inside `gossiper::lock_endpoint` by reporting an error message if `lock_endpoint` waits for the lock for too long. Ref: scylladb/scylladb#16668 Closes scylladb/scylladb#16733 * github.com:scylladb/scylladb: gossiper: report error when waiting too long for endpoint lock gossiper: store source_location instead of string in endpoint_permit storage_service: more verbose logging in handle_state_normal
2024-01-12 10:51:21 +02:00
parent cd9e027047 cf646022cb
commit 5f44ae8371
3 changed files with 61 additions and 17 deletions
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -775,14 +775,18 @@ future<> gossiper::do_status_check() {
    }
 }

-gossiper::endpoint_permit::endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, std::string caller) noexcept
+gossiper::endpoint_permit::endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, seastar::compat::source_location caller) noexcept
    : _ptr(std::move(ptr))
    , _permit_id(_ptr->pid)
    , _addr(std::move(addr))
    , _caller(std::move(caller))
 {
    ++_ptr->holders;
-    logger.debug("{}: lock_endpoint {}: acquired: permit_id={} holders={}", _caller, _addr, _permit_id, _ptr->holders);
+    if (!_ptr->first_holder) {
+        _ptr->first_holder = _caller;
+    }
+    _ptr->last_holder = _caller;
+    logger.debug("{}: lock_endpoint {}: acquired: permit_id={} holders={}", _caller.function_name(), _addr, _permit_id, _ptr->holders);
 }

 gossiper::endpoint_permit::endpoint_permit(endpoint_permit&& o) noexcept
@@ -799,11 +803,12 @@ gossiper::endpoint_permit::~endpoint_permit() {
 bool gossiper::endpoint_permit::release() noexcept {
    if (auto ptr = std::exchange(_ptr, nullptr)) {
        assert(ptr->pid == _permit_id);
-        logger.debug("{}: lock_endpoint {}: released: permit_id={} holders={}", _caller, _addr, _permit_id, ptr->holders);
+        logger.debug("{}: lock_endpoint {}: released: permit_id={} holders={}", _caller.function_name(), _addr, _permit_id, ptr->holders);
        if (!--ptr->holders) {
-            logger.debug("{}: lock_endpoint {}: released: permit_id={}", _caller, _addr, _permit_id);
+            logger.debug("{}: lock_endpoint {}: released: permit_id={}", _caller.function_name(), _addr, _permit_id);
            ptr->units.return_all();
            ptr->pid = null_permit_id;
+            ptr->first_holder = ptr->last_holder = std::nullopt;
            _permit_id = null_permit_id;
            return true;
        }
@@ -820,27 +825,58 @@ future<gossiper::endpoint_permit> gossiper::lock_endpoint(inet_address ep, permi
    if (this_shard_id() != 0) {
        on_internal_error(logger, "lock_endpoint must be called on shard 0");
    }
-    std::string caller = l.function_name();
    auto eptr = co_await _endpoint_locks.get_or_load(ep, [] (const inet_address& ep) { return endpoint_lock_entry(); });
    if (pid) {
        if (eptr->pid == pid) {
            // Already locked with the same permit
-            co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(caller));
+            co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(l));
        } else {
            // permit_id mismatch means either that the endpoint lock was released,
            // or maybe we're passed a permit_id that was acquired for a different endpoint.
-            on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: permit_id={}: endpoint_lock_entry has mismatching permit_id={}", caller, ep, pid, eptr->pid));
+            on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: permit_id={}: endpoint_lock_entry has mismatching permit_id={}", l.function_name(), ep, pid, eptr->pid));
        }
    }
    pid = permit_id::create_random_id();
-    logger.debug("{}: lock_endpoint {}: waiting: permit_id={}", caller, ep, pid);
-    eptr->units = co_await get_units(eptr->sem, 1, _abort_source);
+    logger.debug("{}: lock_endpoint {}: waiting: permit_id={}", l.function_name(), ep, pid);
+    while (true) {
+        _abort_source.check();
+        static constexpr auto duration = std::chrono::minutes{1};
+        abort_on_expiry aoe(lowres_clock::now() + duration);
+        auto sub = _abort_source.subscribe([&aoe] () noexcept {
+            aoe.abort_source().request_abort();
+        });
+        assert(sub); // due to check() above
+        try {
+            eptr->units = co_await get_units(eptr->sem, 1, aoe.abort_source());
+            break;
+        } catch (const abort_requested_exception&) {
+            if (_abort_source.abort_requested()) {
+                throw;
+            }
+
+            // If we didn't rethrow above, the abort had to come from `abort_on_expiry`'s timer.
+
+            static constexpr auto fmt_loc = [] (const seastar::compat::source_location& l) {
+                return fmt::format("{}({}:{}) `{}`", l.file_name(), l.line(), l.column(), l.function_name());
+            };
+            static constexpr auto fmt_loc_opt = [] (const std::optional<seastar::compat::source_location>& l) {
+                if (!l) {
+                    return "null"s;
+                }
+                return fmt_loc(*l);
+            };
+            logger.error(
+                "{}: waiting for endpoint lock (ep={}) took more than {}, signifying possible deadlock;"
+                " holders: {}, first holder: {}, last holder (might not be current): {}",
+                fmt_loc(l), ep, duration, eptr->holders, fmt_loc_opt(eptr->first_holder), fmt_loc_opt(eptr->last_holder));
+        }
+    }
    eptr->pid = pid;
    if (eptr->holders) {
-        on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: newly held endpoint_lock_entry has {} holders", caller, ep, eptr->holders));
+        on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: newly held endpoint_lock_entry has {} holders", l.function_name(), ep, eptr->holders));
    }
    _abort_source.check();
-    co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(caller));
+    co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(l));
 }

 void gossiper::permit_internal_error(const inet_address& addr, permit_id pid) {
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -152,6 +152,10 @@ public:
        permit_id pid;
        semaphore_units<> units;
        size_t holders = 0;
+        std::optional<seastar::compat::source_location> first_holder;
+        // last_holder is the caller of endpoint_permit who last took this entry,
+        // it might not be a current holder (the permit might've been destroyed)
+        std::optional<seastar::compat::source_location> last_holder;

        endpoint_lock_entry() noexcept;
    };
@@ -160,9 +164,9 @@ public:
        endpoint_locks_map::entry_ptr _ptr;
        permit_id _permit_id;
        inet_address _addr;
-        std::string _caller;
+        seastar::compat::source_location _caller;
    public:
-        endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, std::string caller) noexcept;
+        endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, seastar::compat::source_location caller) noexcept;
        endpoint_permit(endpoint_permit&&) noexcept;
        ~endpoint_permit();
        bool release() noexcept;
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -3702,7 +3702,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit

    auto tokens = get_tokens_for(endpoint);

-    slogger.debug("Node {} state normal, token {}", endpoint, tokens);
+    slogger.info("Node {} is in normal state, tokens: {}", endpoint, tokens);

    auto tmlock = std::make_unique<token_metadata_lock>(co_await get_token_metadata_lock());
    auto tmptr = co_await get_mutable_token_metadata_ptr();
@@ -3723,7 +3723,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
    // Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300).
    auto host_id = _gossiper.get_host_id(endpoint);
    if (tmptr->is_normal_token_owner(host_id)) {
-        slogger.info("Node {}/{} state jump to normal", endpoint, host_id);
+        slogger.info("handle_state_normal: node {}/{} was already a normal token owner", endpoint, host_id);
    }
    auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id);

@@ -3775,7 +3775,10 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
        // We do this after update_normal_tokens, allowing for tokens to be properly
        // migrated to the new host_id.

+        slogger.info("Host ID {} continues to be owned by {}", host_id, endpoint);
        if (const auto old_host_id = tmptr->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) {
+            // Replace with same IP scenario
+            slogger.info("The IP {} previously owned host ID {}", endpoint, *old_host_id);
            replaced_id = *old_host_id;
        }
    } else {
@@ -3827,7 +3830,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
        }
        auto current_owner = current->second;
        if (endpoint == current_owner) {
-            slogger.debug("handle_state_normal: endpoint={} == current_owner={} token {}", endpoint, current_owner, t);
+            slogger.info("handle_state_normal: endpoint={} == current_owner={} token {}", endpoint, current_owner, t);
            // set state back to normal, since the node may have tried to leave, but failed and is now back up
            owned_tokens.insert(t);
        } else if (_gossiper.compare_endpoint_startup(endpoint, current_owner) > 0) {
@@ -3854,7 +3857,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
    if (!candidates_for_removal.empty()) {
        for (const auto& [t, ep] : token_to_endpoint_map) {
            if (candidates_for_removal.contains(ep)) {
-                slogger.debug("handle_state_normal: endpoint={} still owns tokens, will not be removed", ep);
+                slogger.info("handle_state_normal: endpoint={} still owns tokens, will not be removed", ep);
                candidates_for_removal.erase(ep);
                if (candidates_for_removal.empty()) {
                    break;
@@ -3930,6 +3933,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
        }
    }
    _normal_state_handled_on_boot.insert(endpoint);
+    slogger.info("handle_state_normal for {}/{} finished", endpoint, host_id);
 }

 future<> storage_service::handle_state_left(inet_address endpoint, std::vector<sstring> pieces, gms::permit_id pid) {