Merge 'Support for sending tablet info to the drivers' from Sylwia Szunejko

There is a need for sending tablet info to the drivers so they can be tablet aware. For the best performance we want to get this info lazily only when it is needed.

The info is send when driver asks about the information that the specific tablet contains and it is directed to the wrong node/shard so it could use that information for every subsequent query. If we send the query to the wrong node/shard, we want to send the RESULT message with additional information about the tablet (replicas and token range) in custom_payload.

Mechanism for sending custom_payload added.

Sending custom_payload tested using three node cluster and cqlsh queries. I used RF=1 so choosing wrong node was testable.

I also manually tested it with the python-driver and confirmed that the tablet info can be deserialized properly.

Automatic tests added.

Closes scylladb/scylladb#15410

* github.com:scylladb/scylladb:
  docs: add documentation about sending tablet info to protocol extensions
  Add tests for sending tablet info
  cql3: send tablet if wrong node/shard is used during modification statement
  cql3: send tablet if wrong node/shard is used during select statement
  locator: add function to check locality
  locator: add function to check if host is local
  transport: add function to add tablet info to the result_message
  transport: add support for setting custom payload
This commit is contained in:
Tomasz Grabiec
2023-11-22 12:04:14 +01:00
committed by Botond Dénes
18 changed files with 359 additions and 26 deletions

View File

@@ -120,6 +120,10 @@ inet_address_vector_replica_set vnode_effective_replication_map::get_endpoints_f
return inet_address_vector_replica_set(endpoints->begin(), endpoints->end());
}
std::optional<tablet_routing_info> vnode_effective_replication_map::check_locality(const token& token) const {
return {};
}
bool vnode_effective_replication_map::has_pending_ranges(inet_address endpoint) const {
for (const auto& item : _pending_endpoints) {
const auto& nodes = item.second;

View File

@@ -24,6 +24,7 @@
#include "utils/maybe_yield.hh"
#include "utils/sequenced_set.hh"
#include "utils/simple_hashers.hh"
#include "tablets.hh"
// forward declaration since replica/database.hh includes this file
namespace replica {
@@ -215,6 +216,9 @@ public:
/// Returns a list of nodes to which a read request should be directed.
virtual inet_address_vector_replica_set get_endpoints_for_reading(const token& search_token) const = 0;
virtual std::optional<tablet_routing_info> check_locality(const token& token) const = 0;
/// Returns true if there are any pending ranges for this endpoint.
/// This operation is expensive, for vnode_erm it iterates
/// over all pending ranges which is O(number of tokens).
@@ -290,6 +294,7 @@ public: // effective_replication_map
inet_address_vector_replica_set get_natural_endpoints_without_node_being_replaced(const token& search_token) const override;
inet_address_vector_topology_change get_pending_endpoints(const token& search_token) const override;
inet_address_vector_replica_set get_endpoints_for_reading(const token& search_token) const override;
std::optional<tablet_routing_info> check_locality(const token& token) const override;
bool has_pending_ranges(inet_address endpoint) const override;
std::unique_ptr<token_range_splitter> make_splitter() const override;
const dht::sharder& get_sharder(const schema& s) const override;

View File

@@ -429,6 +429,42 @@ public:
return result;
}
std::optional<tablet_routing_info> check_locality(const token& search_token) const override {
auto&& tablets = get_tablet_map();
auto tid = tablets.get_tablet_id(search_token);
auto&& info = tablets.get_tablet_info(tid);
auto host = get_token_metadata().get_my_id();
auto shard = this_shard_id();
auto make_tablet_routing_info = [&] {
dht::token first_token;
if (tid == tablets.first_tablet()) {
first_token = dht::minimum_token();
} else {
first_token = tablets.get_last_token(tablet_id(size_t(tid) - 1));
}
auto token_range = std::make_pair(first_token, tablets.get_last_token(tid));
return tablet_routing_info{info.replicas, token_range};
};
for (auto&& r : info.replicas) {
if (r.host == host) {
if (r.shard == shard) {
return std::nullopt; // routed correctly
} else {
return make_tablet_routing_info();
}
}
}
auto tinfo = tablets.get_tablet_transition_info(tid);
if (tinfo && tinfo->pending_replica.host == host && tinfo->pending_replica.shard == shard) {
return std::nullopt; // routed correctly
}
return make_tablet_routing_info();
}
virtual bool has_pending_ranges(inet_address endpoint) const override {
const auto host_id = _tmptr->get_host_id_if_known(endpoint);
if (!host_id.has_value()) {

View File

@@ -114,6 +114,16 @@ tablet_replica_set replace_replica(const tablet_replica_set& rs, tablet_replica
return result;
}
inline
bool contains(const tablet_replica_set& rs, host_id host) {
for (auto replica : rs) {
if (replica.host == host) {
return true;
}
}
return false;
}
/// Stores information about a single tablet.
struct tablet_info {
tablet_replica_set replicas;
@@ -339,6 +349,11 @@ public:
friend std::ostream& operator<<(std::ostream&, const tablet_metadata&);
};
struct tablet_routing_info {
tablet_replica_set tablet_replicas;
std::pair<dht::token, dht::token> token_range;
};
}
template <>