Merge 'Add more logging for gossiper::lock_endpoint and storage_service::handle_state_normal' from Kamil Braun

In a longevity test reported in scylladb/scylladb#16668 we observed that
NORMAL state is not being properly handled for a node that replaced
another node. Either handle_state_normal is not being called, or it is
but getting stuck in the middle. Which is the case couldn't be
determined from the logs, and attempts at creating a local reproducer
failed.

Thus the plan is to continue debugging using the longevity test, but we need
more logs. To check whether `handle_state_normal` was called and which branches
were taken, include some INFO level logs there. Also, detect deadlocks inside
`gossiper::lock_endpoint` by reporting an error message if `lock_endpoint`
waits for the lock for too long.

Ref: scylladb/scylladb#16668

Closes scylladb/scylladb#16733

* github.com:scylladb/scylladb:
  gossiper: report error when waiting too long for endpoint lock
  gossiper: store source_location instead of string in endpoint_permit
  storage_service: more verbose logging in handle_state_normal
This commit is contained in:
Botond Dénes
2024-01-12 10:51:21 +02:00
3 changed files with 61 additions and 17 deletions

View File

@@ -775,14 +775,18 @@ future<> gossiper::do_status_check() {
}
}
gossiper::endpoint_permit::endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, std::string caller) noexcept
gossiper::endpoint_permit::endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, seastar::compat::source_location caller) noexcept
: _ptr(std::move(ptr))
, _permit_id(_ptr->pid)
, _addr(std::move(addr))
, _caller(std::move(caller))
{
++_ptr->holders;
logger.debug("{}: lock_endpoint {}: acquired: permit_id={} holders={}", _caller, _addr, _permit_id, _ptr->holders);
if (!_ptr->first_holder) {
_ptr->first_holder = _caller;
}
_ptr->last_holder = _caller;
logger.debug("{}: lock_endpoint {}: acquired: permit_id={} holders={}", _caller.function_name(), _addr, _permit_id, _ptr->holders);
}
gossiper::endpoint_permit::endpoint_permit(endpoint_permit&& o) noexcept
@@ -799,11 +803,12 @@ gossiper::endpoint_permit::~endpoint_permit() {
bool gossiper::endpoint_permit::release() noexcept {
if (auto ptr = std::exchange(_ptr, nullptr)) {
assert(ptr->pid == _permit_id);
logger.debug("{}: lock_endpoint {}: released: permit_id={} holders={}", _caller, _addr, _permit_id, ptr->holders);
logger.debug("{}: lock_endpoint {}: released: permit_id={} holders={}", _caller.function_name(), _addr, _permit_id, ptr->holders);
if (!--ptr->holders) {
logger.debug("{}: lock_endpoint {}: released: permit_id={}", _caller, _addr, _permit_id);
logger.debug("{}: lock_endpoint {}: released: permit_id={}", _caller.function_name(), _addr, _permit_id);
ptr->units.return_all();
ptr->pid = null_permit_id;
ptr->first_holder = ptr->last_holder = std::nullopt;
_permit_id = null_permit_id;
return true;
}
@@ -820,27 +825,58 @@ future<gossiper::endpoint_permit> gossiper::lock_endpoint(inet_address ep, permi
if (this_shard_id() != 0) {
on_internal_error(logger, "lock_endpoint must be called on shard 0");
}
std::string caller = l.function_name();
auto eptr = co_await _endpoint_locks.get_or_load(ep, [] (const inet_address& ep) { return endpoint_lock_entry(); });
if (pid) {
if (eptr->pid == pid) {
// Already locked with the same permit
co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(caller));
co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(l));
} else {
// permit_id mismatch means either that the endpoint lock was released,
// or maybe we're passed a permit_id that was acquired for a different endpoint.
on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: permit_id={}: endpoint_lock_entry has mismatching permit_id={}", caller, ep, pid, eptr->pid));
on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: permit_id={}: endpoint_lock_entry has mismatching permit_id={}", l.function_name(), ep, pid, eptr->pid));
}
}
pid = permit_id::create_random_id();
logger.debug("{}: lock_endpoint {}: waiting: permit_id={}", caller, ep, pid);
eptr->units = co_await get_units(eptr->sem, 1, _abort_source);
logger.debug("{}: lock_endpoint {}: waiting: permit_id={}", l.function_name(), ep, pid);
while (true) {
_abort_source.check();
static constexpr auto duration = std::chrono::minutes{1};
abort_on_expiry aoe(lowres_clock::now() + duration);
auto sub = _abort_source.subscribe([&aoe] () noexcept {
aoe.abort_source().request_abort();
});
assert(sub); // due to check() above
try {
eptr->units = co_await get_units(eptr->sem, 1, aoe.abort_source());
break;
} catch (const abort_requested_exception&) {
if (_abort_source.abort_requested()) {
throw;
}
// If we didn't rethrow above, the abort had to come from `abort_on_expiry`'s timer.
static constexpr auto fmt_loc = [] (const seastar::compat::source_location& l) {
return fmt::format("{}({}:{}) `{}`", l.file_name(), l.line(), l.column(), l.function_name());
};
static constexpr auto fmt_loc_opt = [] (const std::optional<seastar::compat::source_location>& l) {
if (!l) {
return "null"s;
}
return fmt_loc(*l);
};
logger.error(
"{}: waiting for endpoint lock (ep={}) took more than {}, signifying possible deadlock;"
" holders: {}, first holder: {}, last holder (might not be current): {}",
fmt_loc(l), ep, duration, eptr->holders, fmt_loc_opt(eptr->first_holder), fmt_loc_opt(eptr->last_holder));
}
}
eptr->pid = pid;
if (eptr->holders) {
on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: newly held endpoint_lock_entry has {} holders", caller, ep, eptr->holders));
on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: newly held endpoint_lock_entry has {} holders", l.function_name(), ep, eptr->holders));
}
_abort_source.check();
co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(caller));
co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(l));
}
void gossiper::permit_internal_error(const inet_address& addr, permit_id pid) {

View File

@@ -152,6 +152,10 @@ public:
permit_id pid;
semaphore_units<> units;
size_t holders = 0;
std::optional<seastar::compat::source_location> first_holder;
// last_holder is the caller of endpoint_permit who last took this entry,
// it might not be a current holder (the permit might've been destroyed)
std::optional<seastar::compat::source_location> last_holder;
endpoint_lock_entry() noexcept;
};
@@ -160,9 +164,9 @@ public:
endpoint_locks_map::entry_ptr _ptr;
permit_id _permit_id;
inet_address _addr;
std::string _caller;
seastar::compat::source_location _caller;
public:
endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, std::string caller) noexcept;
endpoint_permit(endpoint_locks_map::entry_ptr&& ptr, inet_address addr, seastar::compat::source_location caller) noexcept;
endpoint_permit(endpoint_permit&&) noexcept;
~endpoint_permit();
bool release() noexcept;

View File

@@ -3702,7 +3702,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
auto tokens = get_tokens_for(endpoint);
slogger.debug("Node {} state normal, token {}", endpoint, tokens);
slogger.info("Node {} is in normal state, tokens: {}", endpoint, tokens);
auto tmlock = std::make_unique<token_metadata_lock>(co_await get_token_metadata_lock());
auto tmptr = co_await get_mutable_token_metadata_ptr();
@@ -3723,7 +3723,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
// Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300).
auto host_id = _gossiper.get_host_id(endpoint);
if (tmptr->is_normal_token_owner(host_id)) {
slogger.info("Node {}/{} state jump to normal", endpoint, host_id);
slogger.info("handle_state_normal: node {}/{} was already a normal token owner", endpoint, host_id);
}
auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id);
@@ -3775,7 +3775,10 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
// We do this after update_normal_tokens, allowing for tokens to be properly
// migrated to the new host_id.
slogger.info("Host ID {} continues to be owned by {}", host_id, endpoint);
if (const auto old_host_id = tmptr->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) {
// Replace with same IP scenario
slogger.info("The IP {} previously owned host ID {}", endpoint, *old_host_id);
replaced_id = *old_host_id;
}
} else {
@@ -3827,7 +3830,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
}
auto current_owner = current->second;
if (endpoint == current_owner) {
slogger.debug("handle_state_normal: endpoint={} == current_owner={} token {}", endpoint, current_owner, t);
slogger.info("handle_state_normal: endpoint={} == current_owner={} token {}", endpoint, current_owner, t);
// set state back to normal, since the node may have tried to leave, but failed and is now back up
owned_tokens.insert(t);
} else if (_gossiper.compare_endpoint_startup(endpoint, current_owner) > 0) {
@@ -3854,7 +3857,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
if (!candidates_for_removal.empty()) {
for (const auto& [t, ep] : token_to_endpoint_map) {
if (candidates_for_removal.contains(ep)) {
slogger.debug("handle_state_normal: endpoint={} still owns tokens, will not be removed", ep);
slogger.info("handle_state_normal: endpoint={} still owns tokens, will not be removed", ep);
candidates_for_removal.erase(ep);
if (candidates_for_removal.empty()) {
break;
@@ -3930,6 +3933,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
}
}
_normal_state_handled_on_boot.insert(endpoint);
slogger.info("handle_state_normal for {}/{} finished", endpoint, host_id);
}
future<> storage_service::handle_state_left(inet_address endpoint, std::vector<sstring> pieces, gms::permit_id pid) {