`get_rpc_client` calculates a `topology_ignored` field when creating a client which says whether the client's endpoint had topology information when this client was created. This is later used to check if that client needs to be dropped and replaced with a new client which uses the correct topology information. The `topology_ignored` field was incorrectly calculated as `true` for pending endpoints even though we had topology information for them. This would lead to unnecessary drops of RPC clients later. Fix this. Remove the default parameter for `with_pending` from `topology::has_endpoint` to avoid similar bugs in the future. Apparently this fixes #11780. The verbs used by decommission operation use RPC client index 1 (see `do_get_rpc_client_idx` in message/messaging_service.cc). From local testing with additional logging I found that by the time this client is created (i.e. the first verb in this group is used), we already know the topology. The node is pending at that point - hence the bug would cause us to assume we don't know the topology, leading us to dropping the RPC client later, possibly in the middle of a decommission operation. Fixes: #11780 Closes #11942 * github.com:scylladb/scylladb: message: messaging_service: check for known topology before calling is_same_dc/rack test: reenable test_topology::test_decommission_node_add_column test/pylib: util: configurable period in wait_for message: messaging_service: fix topology_ignored for pending endpoints in get_rpc_client message: messaging_service: topology independent connection settings for GOSSIP verbs
125 lines
3.5 KiB
C++
125 lines
3.5 KiB
C++
/*
|
|
*
|
|
* Modified by ScyllaDB
|
|
* Copyright (C) 2022-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <unordered_set>
|
|
#include <unordered_map>
|
|
|
|
#include <seastar/core/future.hh>
|
|
#include <seastar/core/sstring.hh>
|
|
#include <seastar/util/bool_class.hh>
|
|
|
|
#include "locator/types.hh"
|
|
#include "inet_address_vectors.hh"
|
|
|
|
using namespace seastar;
|
|
|
|
namespace locator {
|
|
|
|
class topology {
|
|
public:
|
|
struct config {
|
|
endpoint_dc_rack local_dc_rack;
|
|
bool disable_proximity_sorting = false;
|
|
};
|
|
topology(config cfg);
|
|
topology(topology&&) = default;
|
|
|
|
topology& operator=(topology&&) = default;
|
|
|
|
future<topology> clone_gently() const;
|
|
future<> clear_gently() noexcept;
|
|
|
|
using pending = bool_class<struct pending_tag>;
|
|
|
|
/**
|
|
* Stores current DC/rack assignment for ep
|
|
*/
|
|
void update_endpoint(const inet_address& ep, endpoint_dc_rack dr, pending pend);
|
|
|
|
/**
|
|
* Removes current DC/rack assignment for ep
|
|
*/
|
|
void remove_endpoint(inet_address ep);
|
|
|
|
/**
|
|
* Returns true iff contains given endpoint.
|
|
* Excludes pending endpoints if `with_pending == pending::no`.
|
|
*/
|
|
bool has_endpoint(inet_address, pending with_pending) const;
|
|
|
|
const std::unordered_map<sstring,
|
|
std::unordered_set<inet_address>>&
|
|
get_datacenter_endpoints() const {
|
|
return _dc_endpoints;
|
|
}
|
|
|
|
const std::unordered_map<sstring,
|
|
std::unordered_map<sstring,
|
|
std::unordered_set<inet_address>>>&
|
|
get_datacenter_racks() const {
|
|
return _dc_racks;
|
|
}
|
|
|
|
const endpoint_dc_rack& get_location(const inet_address& ep) const;
|
|
sstring get_rack() const;
|
|
sstring get_rack(inet_address ep) const;
|
|
sstring get_datacenter() const;
|
|
sstring get_datacenter(inet_address ep) const;
|
|
|
|
auto get_local_dc_filter() const noexcept {
|
|
return [ this, local_dc = get_datacenter() ] (inet_address ep) {
|
|
return get_datacenter(ep) == local_dc;
|
|
};
|
|
};
|
|
|
|
template <std::ranges::range Range>
|
|
inline size_t count_local_endpoints(const Range& endpoints) const {
|
|
return std::count_if(endpoints.begin(), endpoints.end(), get_local_dc_filter());
|
|
}
|
|
|
|
/**
|
|
* This method will sort the <tt>List</tt> by proximity to the given
|
|
* address.
|
|
*/
|
|
void sort_by_proximity(inet_address address, inet_address_vector_replica_set& addresses) const;
|
|
|
|
private:
|
|
// default constructor for cloning purposes
|
|
topology() = default;
|
|
|
|
/**
|
|
* compares two endpoints in relation to the target endpoint, returning as
|
|
* Comparator.compare would
|
|
*/
|
|
int compare_endpoints(const inet_address& address, const inet_address& a1, const inet_address& a2) const;
|
|
void remove_pending_location(const inet_address& ep);
|
|
|
|
/** multi-map: DC -> endpoints in that DC */
|
|
std::unordered_map<sstring,
|
|
std::unordered_set<inet_address>>
|
|
_dc_endpoints;
|
|
|
|
/** map: DC -> (multi-map: rack -> endpoints in that rack) */
|
|
std::unordered_map<sstring,
|
|
std::unordered_map<sstring,
|
|
std::unordered_set<inet_address>>>
|
|
_dc_racks;
|
|
|
|
/** reverse-lookup map: endpoint -> current known dc/rack assignment */
|
|
std::unordered_map<inet_address, endpoint_dc_rack> _current_locations;
|
|
std::unordered_map<inet_address, endpoint_dc_rack> _pending_locations;
|
|
|
|
bool _sort_by_proximity = true;
|
|
};
|
|
|
|
} // namespace locator
|