Merge 'service: storage_proxy: make hint write handlers cancellable' from Kamil Braun
The `view_update_write_response_handler` class, which is a subclass of `abstract_write_response_handler`, was created for a single purpose: to make it possible to cancel a handler for a view update write, which means we stop waiting for a response to the write, timing out the handler immediately. This was done to solve issue with node shutdown hanging because it was waiting for a view update to finish; view updates were configured with 5 minute timeout. See #3966, #4028. Now we're having a similar problem with hint updates causing shutdown to hang in tests (#8079). `view_update_write_response_handler` implements cancelling by adding itself to an intrusive list which we then iterate over to timeout each handler when we shutdown or when gossiper notifies `storage_proxy` that a node is down. To make it possible to reuse this algorithm for other handlers, move the functionality into `abstract_write_response_handler`. We inherit from `bi::list_base_hook` so it introduces small memory overhead to each write handler (2 pointers) which was only present for view update handlers before. But those handlers are already quite large, the overhead is small compared to their size. Use this new functionality to also cancel hint write handlers when we shutdown. This fixes #8079. Closes #14047 * github.com:scylladb/scylladb: test: reproducer for hints manager shutdown hang test: pylib: ScyllaCluster: generalize config type for `server_add` test: pylib: scylla_cluster: add explicit timeout for graceful server stop service: storage_proxy: make hint write handlers cancellable service: storage_proxy: rename `view_update_handlers_list` service: storage_proxy: make it possible to cancel all write handler types
This commit is contained in:
@@ -44,7 +44,7 @@ static logging::logger manager_logger("hints_manager");
|
|||||||
const std::string manager::FILENAME_PREFIX("HintsLog" + commitlog::descriptor::SEPARATOR);
|
const std::string manager::FILENAME_PREFIX("HintsLog" + commitlog::descriptor::SEPARATOR);
|
||||||
|
|
||||||
const std::chrono::seconds manager::hint_file_write_timeout = std::chrono::seconds(2);
|
const std::chrono::seconds manager::hint_file_write_timeout = std::chrono::seconds(2);
|
||||||
const std::chrono::seconds manager::hints_flush_period = std::chrono::seconds(10);
|
std::chrono::seconds manager::hints_flush_period = std::chrono::seconds(10);
|
||||||
|
|
||||||
manager::manager(sstring hints_directory, host_filter filter, int64_t max_hint_window_ms, resource_manager& res_manager, distributed<replica::database>& db)
|
manager::manager(sstring hints_directory, host_filter filter, int64_t max_hint_window_ms, resource_manager& res_manager, distributed<replica::database>& db)
|
||||||
: _hints_dir(fs::path(hints_directory) / format("{:d}", this_shard_id()))
|
: _hints_dir(fs::path(hints_directory) / format("{:d}", this_shard_id()))
|
||||||
@@ -52,7 +52,11 @@ manager::manager(sstring hints_directory, host_filter filter, int64_t max_hint_w
|
|||||||
, _max_hint_window_us(max_hint_window_ms * 1000)
|
, _max_hint_window_us(max_hint_window_ms * 1000)
|
||||||
, _local_db(db.local())
|
, _local_db(db.local())
|
||||||
, _resource_manager(res_manager)
|
, _resource_manager(res_manager)
|
||||||
{}
|
{
|
||||||
|
if (utils::get_local_injector().enter("decrease_hints_flush_period")) {
|
||||||
|
hints_flush_period = std::chrono::seconds{1};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
manager::~manager() {
|
manager::~manager() {
|
||||||
assert(_ep_managers.empty());
|
assert(_ep_managers.empty());
|
||||||
|
|||||||
@@ -501,7 +501,8 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
static const std::string FILENAME_PREFIX;
|
static const std::string FILENAME_PREFIX;
|
||||||
static const std::chrono::seconds hints_flush_period;
|
// Non-const - can be modified with an error injection.
|
||||||
|
static std::chrono::seconds hints_flush_period;
|
||||||
static const std::chrono::seconds hint_file_write_timeout;
|
static const std::chrono::seconds hint_file_write_timeout;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -1605,7 +1605,8 @@ static future<> apply_to_remote_endpoints(service::storage_proxy& proxy, gms::in
|
|||||||
std::move(pending_endpoints),
|
std::move(pending_endpoints),
|
||||||
db::write_type::VIEW,
|
db::write_type::VIEW,
|
||||||
std::move(tr_state),
|
std::move(tr_state),
|
||||||
allow_hints);
|
allow_hints,
|
||||||
|
service::is_cancellable::yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool should_update_synchronously(const schema& s) {
|
static bool should_update_synchronously(const schema& s) {
|
||||||
|
|||||||
@@ -1165,7 +1165,7 @@ public:
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
class abstract_write_response_handler : public seastar::enable_shared_from_this<abstract_write_response_handler> {
|
class abstract_write_response_handler : public seastar::enable_shared_from_this<abstract_write_response_handler>, public bi::list_base_hook<bi::link_mode<bi::auto_unlink>> {
|
||||||
protected:
|
protected:
|
||||||
using error = storage_proxy::error;
|
using error = storage_proxy::error;
|
||||||
storage_proxy::response_id_type _id;
|
storage_proxy::response_id_type _id;
|
||||||
@@ -1211,7 +1211,7 @@ public:
|
|||||||
db::consistency_level cl, db::write_type type,
|
db::consistency_level cl, db::write_type type,
|
||||||
std::unique_ptr<mutation_holder> mh, inet_address_vector_replica_set targets, tracing::trace_state_ptr trace_state,
|
std::unique_ptr<mutation_holder> mh, inet_address_vector_replica_set targets, tracing::trace_state_ptr trace_state,
|
||||||
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info, size_t pending_endpoints = 0,
|
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info, size_t pending_endpoints = 0,
|
||||||
inet_address_vector_topology_change dead_endpoints = {})
|
inet_address_vector_topology_change dead_endpoints = {}, is_cancellable cancellable = is_cancellable::no)
|
||||||
: _id(p->get_next_response_id()), _proxy(std::move(p))
|
: _id(p->get_next_response_id()), _proxy(std::move(p))
|
||||||
, _effective_replication_map_ptr(std::move(erm))
|
, _effective_replication_map_ptr(std::move(erm))
|
||||||
, _trace_state(trace_state), _cl(cl), _type(type), _mutation_holder(std::move(mh)), _targets(std::move(targets)),
|
, _trace_state(trace_state), _cl(cl), _type(type), _mutation_holder(std::move(mh)), _targets(std::move(targets)),
|
||||||
@@ -1222,6 +1222,10 @@ public:
|
|||||||
// or we may fail the consistency level guarantees (see #833, #8058)
|
// or we may fail the consistency level guarantees (see #833, #8058)
|
||||||
_total_block_for = db::block_for(*_effective_replication_map_ptr, _cl) + pending_endpoints;
|
_total_block_for = db::block_for(*_effective_replication_map_ptr, _cl) + pending_endpoints;
|
||||||
++_stats.writes;
|
++_stats.writes;
|
||||||
|
|
||||||
|
if (cancellable) {
|
||||||
|
register_cancellable();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
virtual ~abstract_write_response_handler() {
|
virtual ~abstract_write_response_handler() {
|
||||||
--_stats.writes;
|
--_stats.writes;
|
||||||
@@ -1249,6 +1253,8 @@ public:
|
|||||||
_cdc_operation_result_tracker->on_mutation_failed();
|
_cdc_operation_result_tracker->on_mutation_failed();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_cancellable_live_iterators();
|
||||||
}
|
}
|
||||||
bool is_counter() const {
|
bool is_counter() const {
|
||||||
return _type == db::write_type::COUNTER;
|
return _type == db::write_type::COUNTER;
|
||||||
@@ -1456,6 +1462,11 @@ public:
|
|||||||
return _stats;
|
return _stats;
|
||||||
}
|
}
|
||||||
friend storage_proxy;
|
friend storage_proxy;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void register_cancellable();
|
||||||
|
// Called on destruction
|
||||||
|
void update_cancellable_live_iterators();
|
||||||
};
|
};
|
||||||
|
|
||||||
class datacenter_write_response_handler : public abstract_write_response_handler {
|
class datacenter_write_response_handler : public abstract_write_response_handler {
|
||||||
@@ -1488,37 +1499,25 @@ public:
|
|||||||
db::consistency_level cl, db::write_type type,
|
db::consistency_level cl, db::write_type type,
|
||||||
std::unique_ptr<mutation_holder> mh, inet_address_vector_replica_set targets,
|
std::unique_ptr<mutation_holder> mh, inet_address_vector_replica_set targets,
|
||||||
const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change dead_endpoints, tracing::trace_state_ptr tr_state,
|
const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change dead_endpoints, tracing::trace_state_ptr tr_state,
|
||||||
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info) :
|
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info, is_cancellable cancellable) :
|
||||||
abstract_write_response_handler(std::move(p), std::move(ermp), cl, type, std::move(mh),
|
abstract_write_response_handler(std::move(p), std::move(ermp), cl, type, std::move(mh),
|
||||||
std::move(targets), std::move(tr_state), stats, std::move(permit), rate_limit_info, pending_endpoints.size(), std::move(dead_endpoints)) {
|
std::move(targets), std::move(tr_state), stats, std::move(permit), rate_limit_info, pending_endpoints.size(), std::move(dead_endpoints), cancellable) {
|
||||||
_total_endpoints = _targets.size();
|
_total_endpoints = _targets.size();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class view_update_write_response_handler : public write_response_handler, public bi::list_base_hook<bi::link_mode<bi::auto_unlink>> {
|
// This list contains `abstract_write_response_handler`s which were constructed as `cancellable`.
|
||||||
public:
|
// When a `cancellable` handler is constructed, it adds itself to the list (see `register_cancellable`).
|
||||||
view_update_write_response_handler(shared_ptr<storage_proxy> p,
|
// We use the list to cancel handlers - as if the write timed out - on certain events, such as when
|
||||||
locator::effective_replication_map_ptr ermp,
|
// we shutdown a node so that shutdown is not blocked.
|
||||||
db::consistency_level cl,
|
// We don't add normal data path writes to the list, only background work such as hints and view updates.
|
||||||
std::unique_ptr<mutation_holder> mh, inet_address_vector_replica_set targets,
|
class storage_proxy::cancellable_write_handlers_list : public bi::list<abstract_write_response_handler, bi::base_hook<abstract_write_response_handler>, bi::constant_time_size<false>> {
|
||||||
const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change dead_endpoints, tracing::trace_state_ptr tr_state,
|
|
||||||
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info):
|
|
||||||
write_response_handler(p, std::move(ermp), cl, db::write_type::VIEW, std::move(mh),
|
|
||||||
std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info) {
|
|
||||||
register_in_intrusive_list(*p);
|
|
||||||
}
|
|
||||||
~view_update_write_response_handler();
|
|
||||||
private:
|
|
||||||
void register_in_intrusive_list(storage_proxy& p);
|
|
||||||
};
|
|
||||||
|
|
||||||
class storage_proxy::view_update_handlers_list : public bi::list<view_update_write_response_handler, bi::base_hook<view_update_write_response_handler>, bi::constant_time_size<false>> {
|
|
||||||
// _live_iterators holds all iterators that point into the bi:list in the base class of this object.
|
// _live_iterators holds all iterators that point into the bi:list in the base class of this object.
|
||||||
// If we remove a view_update_write_response_handler from the list, and an iterator happens to point
|
// If we remove a abstract_write_response_handler from the list, and an iterator happens to point
|
||||||
// into it, we advance the iterator so it doesn't point at a removed object. See #4912.
|
// into it, we advance the iterator so it doesn't point at a removed object. See #4912.
|
||||||
std::vector<iterator*> _live_iterators;
|
std::vector<iterator*> _live_iterators;
|
||||||
public:
|
public:
|
||||||
view_update_handlers_list() {
|
cancellable_write_handlers_list() {
|
||||||
_live_iterators.reserve(10); // We only expect 1.
|
_live_iterators.reserve(10); // We only expect 1.
|
||||||
}
|
}
|
||||||
void register_live_iterator(iterator* itp) noexcept { // We don't tolerate failure, so abort instead
|
void register_live_iterator(iterator* itp) noexcept { // We don't tolerate failure, so abort instead
|
||||||
@@ -1527,36 +1526,38 @@ public:
|
|||||||
void unregister_live_iterator(iterator* itp) {
|
void unregister_live_iterator(iterator* itp) {
|
||||||
_live_iterators.erase(boost::remove(_live_iterators, itp), _live_iterators.end());
|
_live_iterators.erase(boost::remove(_live_iterators, itp), _live_iterators.end());
|
||||||
}
|
}
|
||||||
void update_live_iterators(view_update_write_response_handler* vuwrh) {
|
void update_live_iterators(abstract_write_response_handler* handler) {
|
||||||
// vuwrh is being removed from the b::list, so if any live iterator points at it,
|
// handler is being removed from the b::list, so if any live iterator points at it,
|
||||||
// move it to the next object (this requires that the list is traversed in the forward
|
// move it to the next object (this requires that the list is traversed in the forward
|
||||||
// direction).
|
// direction).
|
||||||
for (auto& itp : _live_iterators) {
|
for (auto& itp : _live_iterators) {
|
||||||
if (&**itp == vuwrh) {
|
if (&**itp == handler) {
|
||||||
++*itp;
|
++*itp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
class iterator_guard {
|
class iterator_guard {
|
||||||
view_update_handlers_list& _vuhl;
|
cancellable_write_handlers_list& _handlers;
|
||||||
iterator* _itp;
|
iterator* _itp;
|
||||||
public:
|
public:
|
||||||
iterator_guard(view_update_handlers_list& vuhl, iterator& it) : _vuhl(vuhl), _itp(&it) {
|
iterator_guard(cancellable_write_handlers_list& handlers, iterator& it) : _handlers(handlers), _itp(&it) {
|
||||||
_vuhl.register_live_iterator(_itp);
|
_handlers.register_live_iterator(_itp);
|
||||||
}
|
}
|
||||||
~iterator_guard() {
|
~iterator_guard() {
|
||||||
_vuhl.unregister_live_iterator(_itp);
|
_handlers.unregister_live_iterator(_itp);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
void view_update_write_response_handler::register_in_intrusive_list(storage_proxy& p) {
|
void abstract_write_response_handler::register_cancellable() {
|
||||||
p.get_view_update_handlers_list().push_back(*this);
|
_proxy->_cancellable_write_handlers_list->push_back(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
view_update_write_response_handler::~view_update_write_response_handler() {
|
void abstract_write_response_handler::update_cancellable_live_iterators() {
|
||||||
_proxy->_view_update_handlers_list->update_live_iterators(this);
|
if (is_linked()) {
|
||||||
|
_proxy->_cancellable_write_handlers_list->update_live_iterators(this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class datacenter_sync_write_response_handler : public abstract_write_response_handler {
|
class datacenter_sync_write_response_handler : public abstract_write_response_handler {
|
||||||
@@ -2304,7 +2305,7 @@ future<result<>> storage_proxy::response_wait(storage_proxy::response_id_type id
|
|||||||
result<storage_proxy::response_id_type> storage_proxy::create_write_response_handler(locator::effective_replication_map_ptr ermp,
|
result<storage_proxy::response_id_type> storage_proxy::create_write_response_handler(locator::effective_replication_map_ptr ermp,
|
||||||
db::consistency_level cl, db::write_type type, std::unique_ptr<mutation_holder> m,
|
db::consistency_level cl, db::write_type type, std::unique_ptr<mutation_holder> m,
|
||||||
inet_address_vector_replica_set targets, const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change dead_endpoints, tracing::trace_state_ptr tr_state,
|
inet_address_vector_replica_set targets, const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change dead_endpoints, tracing::trace_state_ptr tr_state,
|
||||||
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info)
|
storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info, is_cancellable cancellable)
|
||||||
{
|
{
|
||||||
shared_ptr<abstract_write_response_handler> h;
|
shared_ptr<abstract_write_response_handler> h;
|
||||||
auto& rs = ermp->get_replication_strategy();
|
auto& rs = ermp->get_replication_strategy();
|
||||||
@@ -2313,10 +2314,8 @@ result<storage_proxy::response_id_type> storage_proxy::create_write_response_han
|
|||||||
h = ::make_shared<datacenter_write_response_handler>(shared_from_this(), std::move(ermp), cl, type, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info);
|
h = ::make_shared<datacenter_write_response_handler>(shared_from_this(), std::move(ermp), cl, type, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info);
|
||||||
} else if (cl == db::consistency_level::EACH_QUORUM && rs.get_type() == locator::replication_strategy_type::network_topology){
|
} else if (cl == db::consistency_level::EACH_QUORUM && rs.get_type() == locator::replication_strategy_type::network_topology){
|
||||||
h = ::make_shared<datacenter_sync_write_response_handler>(shared_from_this(), std::move(ermp), cl, type, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info);
|
h = ::make_shared<datacenter_sync_write_response_handler>(shared_from_this(), std::move(ermp), cl, type, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info);
|
||||||
} else if (type == db::write_type::VIEW) {
|
|
||||||
h = ::make_shared<view_update_write_response_handler>(shared_from_this(), std::move(ermp), cl, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info);
|
|
||||||
} else {
|
} else {
|
||||||
h = ::make_shared<write_response_handler>(shared_from_this(), std::move(ermp), cl, type, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info);
|
h = ::make_shared<write_response_handler>(shared_from_this(), std::move(ermp), cl, type, std::move(m), std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats, std::move(permit), rate_limit_info, cancellable);
|
||||||
}
|
}
|
||||||
return bo::success(register_response_handler(std::move(h)));
|
return bo::success(register_response_handler(std::move(h)));
|
||||||
}
|
}
|
||||||
@@ -2724,7 +2723,7 @@ storage_proxy::storage_proxy(distributed<replica::database>& db, gms::gossiper&
|
|||||||
, _background_write_throttle_threahsold(cfg.available_memory / 10)
|
, _background_write_throttle_threahsold(cfg.available_memory / 10)
|
||||||
, _mutate_stage{"storage_proxy_mutate", &storage_proxy::do_mutate}
|
, _mutate_stage{"storage_proxy_mutate", &storage_proxy::do_mutate}
|
||||||
, _max_view_update_backlog(max_view_update_backlog)
|
, _max_view_update_backlog(max_view_update_backlog)
|
||||||
, _view_update_handlers_list(std::make_unique<view_update_handlers_list>()) {
|
, _cancellable_write_handlers_list(std::make_unique<cancellable_write_handlers_list>()) {
|
||||||
namespace sm = seastar::metrics;
|
namespace sm = seastar::metrics;
|
||||||
_metrics.add_group(storage_proxy_stats::COORDINATOR_STATS_CATEGORY, {
|
_metrics.add_group(storage_proxy_stats::COORDINATOR_STATS_CATEGORY, {
|
||||||
sm::make_queue_length("current_throttled_writes", [this] { return _throttled_writes.size(); },
|
sm::make_queue_length("current_throttled_writes", [this] { return _throttled_writes.size(); },
|
||||||
@@ -2841,7 +2840,7 @@ storage_proxy::mutate_counter_on_leader_and_replicate(const schema_ptr& s, froze
|
|||||||
|
|
||||||
result<storage_proxy::response_id_type>
|
result<storage_proxy::response_id_type>
|
||||||
storage_proxy::create_write_response_handler_helper(schema_ptr s, const dht::token& token, std::unique_ptr<mutation_holder> mh,
|
storage_proxy::create_write_response_handler_helper(schema_ptr s, const dht::token& token, std::unique_ptr<mutation_holder> mh,
|
||||||
db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit) {
|
db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit, is_cancellable cancellable) {
|
||||||
replica::table& table = _db.local().find_column_family(s->id());
|
replica::table& table = _db.local().find_column_family(s->id());
|
||||||
auto erm = table.get_effective_replication_map();
|
auto erm = table.get_effective_replication_map();
|
||||||
inet_address_vector_replica_set natural_endpoints = erm->get_natural_endpoints_without_node_being_replaced(token);
|
inet_address_vector_replica_set natural_endpoints = erm->get_natural_endpoints_without_node_being_replaced(token);
|
||||||
@@ -2905,7 +2904,7 @@ storage_proxy::create_write_response_handler_helper(schema_ptr s, const dht::tok
|
|||||||
db::assure_sufficient_live_nodes(cl, *erm, live_endpoints, pending_endpoints);
|
db::assure_sufficient_live_nodes(cl, *erm, live_endpoints, pending_endpoints);
|
||||||
|
|
||||||
return create_write_response_handler(std::move(erm), cl, type, std::move(mh), std::move(live_endpoints), pending_endpoints,
|
return create_write_response_handler(std::move(erm), cl, type, std::move(mh), std::move(live_endpoints), pending_endpoints,
|
||||||
std::move(dead_endpoints), std::move(tr_state), get_stats(), std::move(permit), rate_limit_info);
|
std::move(dead_endpoints), std::move(tr_state), get_stats(), std::move(permit), rate_limit_info, cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -2918,13 +2917,13 @@ storage_proxy::create_write_response_handler_helper(schema_ptr s, const dht::tok
|
|||||||
result<storage_proxy::response_id_type>
|
result<storage_proxy::response_id_type>
|
||||||
storage_proxy::create_write_response_handler(const mutation& m, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit) {
|
storage_proxy::create_write_response_handler(const mutation& m, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit) {
|
||||||
return create_write_response_handler_helper(m.schema(), m.token(), std::make_unique<shared_mutation>(m), cl, type, tr_state,
|
return create_write_response_handler_helper(m.schema(), m.token(), std::make_unique<shared_mutation>(m), cl, type, tr_state,
|
||||||
std::move(permit), allow_limit);
|
std::move(permit), allow_limit, is_cancellable::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
result<storage_proxy::response_id_type>
|
result<storage_proxy::response_id_type>
|
||||||
storage_proxy::create_write_response_handler(const hint_wrapper& h, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit) {
|
storage_proxy::create_write_response_handler(const hint_wrapper& h, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit) {
|
||||||
return create_write_response_handler_helper(h.mut.schema(), h.mut.token(), std::make_unique<hint_mutation>(h.mut), cl, type, tr_state,
|
return create_write_response_handler_helper(h.mut.schema(), h.mut.token(), std::make_unique<hint_mutation>(h.mut), cl, type, tr_state,
|
||||||
std::move(permit), allow_limit);
|
std::move(permit), allow_limit, is_cancellable::yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
result<storage_proxy::response_id_type>
|
result<storage_proxy::response_id_type>
|
||||||
@@ -2939,7 +2938,7 @@ storage_proxy::create_write_response_handler(const read_repair_mutation& mut, db
|
|||||||
tracing::trace(tr_state, "Creating write handler for read repair token: {} endpoint: {}", mh->token(), endpoints);
|
tracing::trace(tr_state, "Creating write handler for read repair token: {} endpoint: {}", mh->token(), endpoints);
|
||||||
|
|
||||||
// No rate limiting for read repair
|
// No rate limiting for read repair
|
||||||
return create_write_response_handler(std::move(mut.ermp), cl, type, std::move(mh), std::move(endpoints), inet_address_vector_topology_change(), inet_address_vector_topology_change(), std::move(tr_state), get_stats(), std::move(permit), std::monostate());
|
return create_write_response_handler(std::move(mut.ermp), cl, type, std::move(mh), std::move(endpoints), inet_address_vector_topology_change(), inet_address_vector_topology_change(), std::move(tr_state), get_stats(), std::move(permit), std::monostate(), is_cancellable::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
result<storage_proxy::response_id_type>
|
result<storage_proxy::response_id_type>
|
||||||
@@ -2948,7 +2947,7 @@ storage_proxy::create_write_response_handler(const std::tuple<lw_shared_ptr<paxo
|
|||||||
auto& [commit, s, h, t] = meta;
|
auto& [commit, s, h, t] = meta;
|
||||||
|
|
||||||
return create_write_response_handler_helper(s, t, std::make_unique<cas_mutation>(std::move(commit), s, std::move(h)), cl,
|
return create_write_response_handler_helper(s, t, std::make_unique<cas_mutation>(std::move(commit), s, std::move(h)), cl,
|
||||||
db::write_type::CAS, tr_state, std::move(permit), allow_limit);
|
db::write_type::CAS, tr_state, std::move(permit), allow_limit, is_cancellable::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
result<storage_proxy::response_id_type>
|
result<storage_proxy::response_id_type>
|
||||||
@@ -2965,7 +2964,7 @@ storage_proxy::create_write_response_handler(const std::tuple<lw_shared_ptr<paxo
|
|||||||
|
|
||||||
// No rate limiting for paxos (yet)
|
// No rate limiting for paxos (yet)
|
||||||
return create_write_response_handler(std::move(ermp), cl, db::write_type::CAS, std::make_unique<cas_mutation>(std::move(commit), s, nullptr), std::move(endpoints),
|
return create_write_response_handler(std::move(ermp), cl, db::write_type::CAS, std::make_unique<cas_mutation>(std::move(commit), s, nullptr), std::move(endpoints),
|
||||||
inet_address_vector_topology_change(), inet_address_vector_topology_change(), std::move(tr_state), get_stats(), std::move(permit), std::monostate());
|
inet_address_vector_topology_change(), inet_address_vector_topology_change(), std::move(tr_state), get_stats(), std::move(permit), std::monostate(), is_cancellable::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
void storage_proxy::register_cdc_operation_result_tracker(const storage_proxy::unique_response_handler_vector& ids, lw_shared_ptr<cdc::operation_result_tracker> tracker) {
|
void storage_proxy::register_cdc_operation_result_tracker(const storage_proxy::unique_response_handler_vector& ids, lw_shared_ptr<cdc::operation_result_tracker> tracker) {
|
||||||
@@ -3493,7 +3492,7 @@ storage_proxy::mutate_atomically_result(std::vector<mutation> mutations, db::con
|
|||||||
return _p.mutate_prepare<>(std::array<mutation, 1>{std::move(m)}, cl, db::write_type::BATCH_LOG, _permit, [this] (const mutation& m, db::consistency_level cl, db::write_type type, service_permit permit) {
|
return _p.mutate_prepare<>(std::array<mutation, 1>{std::move(m)}, cl, db::write_type::BATCH_LOG, _permit, [this] (const mutation& m, db::consistency_level cl, db::write_type type, service_permit permit) {
|
||||||
auto& table = _p._db.local().find_column_family(m.schema()->id());
|
auto& table = _p._db.local().find_column_family(m.schema()->id());
|
||||||
auto ermp = table.get_effective_replication_map();
|
auto ermp = table.get_effective_replication_map();
|
||||||
return _p.create_write_response_handler(std::move(ermp), cl, type, std::make_unique<shared_mutation>(m), _batchlog_endpoints, {}, {}, _trace_state, _stats, std::move(permit), std::monostate());
|
return _p.create_write_response_handler(std::move(ermp), cl, type, std::make_unique<shared_mutation>(m), _batchlog_endpoints, {}, {}, _trace_state, _stats, std::move(permit), std::monostate(), is_cancellable::no);
|
||||||
}).then(utils::result_wrap([this, cl] (unique_response_handler_vector ids) {
|
}).then(utils::result_wrap([this, cl] (unique_response_handler_vector ids) {
|
||||||
_p.register_cdc_operation_result_tracker(ids, _cdc_tracker);
|
_p.register_cdc_operation_result_tracker(ids, _cdc_tracker);
|
||||||
return _p.mutate_begin(std::move(ids), cl, _trace_state, _timeout);
|
return _p.mutate_begin(std::move(ids), cl, _trace_state, _timeout);
|
||||||
@@ -3598,7 +3597,8 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
db::write_type type,
|
db::write_type type,
|
||||||
tracing::trace_state_ptr tr_state,
|
tracing::trace_state_ptr tr_state,
|
||||||
write_stats& stats,
|
write_stats& stats,
|
||||||
allow_hints allow_hints) {
|
allow_hints allow_hints,
|
||||||
|
is_cancellable cancellable) {
|
||||||
utils::latency_counter lc;
|
utils::latency_counter lc;
|
||||||
lc.start();
|
lc.start();
|
||||||
|
|
||||||
@@ -3610,7 +3610,7 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
timeout = clock_type::now() + 5min;
|
timeout = clock_type::now() + 5min;
|
||||||
}
|
}
|
||||||
return mutate_prepare(std::array{std::move(m)}, cl, type, /* does view building should hold a real permit */ empty_service_permit(),
|
return mutate_prepare(std::array{std::move(m)}, cl, type, /* does view building should hold a real permit */ empty_service_permit(),
|
||||||
[this, tr_state, target = std::array{target}, pending_endpoints = std::move(pending_endpoints), &stats] (
|
[this, tr_state, target = std::array{target}, pending_endpoints = std::move(pending_endpoints), &stats, cancellable] (
|
||||||
std::unique_ptr<mutation_holder>& m,
|
std::unique_ptr<mutation_holder>& m,
|
||||||
db::consistency_level cl,
|
db::consistency_level cl,
|
||||||
db::write_type type, service_permit permit) mutable {
|
db::write_type type, service_permit permit) mutable {
|
||||||
@@ -3637,7 +3637,8 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
tr_state,
|
tr_state,
|
||||||
stats,
|
stats,
|
||||||
std::move(permit),
|
std::move(permit),
|
||||||
std::monostate()); // TODO: Pass the correct enforcement type
|
std::monostate(), // TODO: Pass the correct enforcement type
|
||||||
|
cancellable);
|
||||||
}).then(utils::result_wrap([this, cl, tr_state = std::move(tr_state), timeout = std::move(timeout)] (unique_response_handler_vector ids) mutable {
|
}).then(utils::result_wrap([this, cl, tr_state = std::move(tr_state), timeout = std::move(timeout)] (unique_response_handler_vector ids) mutable {
|
||||||
return mutate_begin(std::move(ids), cl, std::move(tr_state), std::move(timeout));
|
return mutate_begin(std::move(ids), cl, std::move(tr_state), std::move(timeout));
|
||||||
})).then_wrapped([p = shared_from_this(), lc, &stats] (future<result<>> f) {
|
})).then_wrapped([p = shared_from_this(), lc, &stats] (future<result<>> f) {
|
||||||
@@ -3651,7 +3652,8 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
inet_address_vector_topology_change pending_endpoints,
|
inet_address_vector_topology_change pending_endpoints,
|
||||||
db::write_type type,
|
db::write_type type,
|
||||||
tracing::trace_state_ptr tr_state,
|
tracing::trace_state_ptr tr_state,
|
||||||
allow_hints allow_hints) {
|
allow_hints allow_hints,
|
||||||
|
is_cancellable cancellable) {
|
||||||
return send_to_endpoint(
|
return send_to_endpoint(
|
||||||
std::make_unique<shared_mutation>(std::move(fm_a_s)),
|
std::make_unique<shared_mutation>(std::move(fm_a_s)),
|
||||||
std::move(target),
|
std::move(target),
|
||||||
@@ -3659,7 +3661,8 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
type,
|
type,
|
||||||
std::move(tr_state),
|
std::move(tr_state),
|
||||||
get_stats(),
|
get_stats(),
|
||||||
allow_hints);
|
allow_hints,
|
||||||
|
cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> storage_proxy::send_to_endpoint(
|
future<> storage_proxy::send_to_endpoint(
|
||||||
@@ -3669,7 +3672,8 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
db::write_type type,
|
db::write_type type,
|
||||||
tracing::trace_state_ptr tr_state,
|
tracing::trace_state_ptr tr_state,
|
||||||
write_stats& stats,
|
write_stats& stats,
|
||||||
allow_hints allow_hints) {
|
allow_hints allow_hints,
|
||||||
|
is_cancellable cancellable) {
|
||||||
return send_to_endpoint(
|
return send_to_endpoint(
|
||||||
std::make_unique<shared_mutation>(std::move(fm_a_s)),
|
std::make_unique<shared_mutation>(std::move(fm_a_s)),
|
||||||
std::move(target),
|
std::move(target),
|
||||||
@@ -3677,7 +3681,8 @@ future<> storage_proxy::send_to_endpoint(
|
|||||||
type,
|
type,
|
||||||
std::move(tr_state),
|
std::move(tr_state),
|
||||||
stats,
|
stats,
|
||||||
allow_hints);
|
allow_hints,
|
||||||
|
cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> storage_proxy::send_hint_to_endpoint(frozen_mutation_and_schema fm_a_s, gms::inet_address target) {
|
future<> storage_proxy::send_hint_to_endpoint(frozen_mutation_and_schema fm_a_s, gms::inet_address target) {
|
||||||
@@ -3689,7 +3694,8 @@ future<> storage_proxy::send_hint_to_endpoint(frozen_mutation_and_schema fm_a_s,
|
|||||||
db::write_type::SIMPLE,
|
db::write_type::SIMPLE,
|
||||||
tracing::trace_state_ptr(),
|
tracing::trace_state_ptr(),
|
||||||
get_stats(),
|
get_stats(),
|
||||||
allow_hints::no);
|
allow_hints::no,
|
||||||
|
is_cancellable::yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
return send_to_endpoint(
|
return send_to_endpoint(
|
||||||
@@ -3699,7 +3705,8 @@ future<> storage_proxy::send_hint_to_endpoint(frozen_mutation_and_schema fm_a_s,
|
|||||||
db::write_type::SIMPLE,
|
db::write_type::SIMPLE,
|
||||||
tracing::trace_state_ptr(),
|
tracing::trace_state_ptr(),
|
||||||
get_stats(),
|
get_stats(),
|
||||||
allow_hints::no);
|
allow_hints::no,
|
||||||
|
is_cancellable::yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> storage_proxy::send_hint_to_all_replicas(frozen_mutation_and_schema fm_a_s) {
|
future<> storage_proxy::send_hint_to_all_replicas(frozen_mutation_and_schema fm_a_s) {
|
||||||
@@ -6239,24 +6246,24 @@ void storage_proxy::on_leave_cluster(const gms::inet_address& endpoint) {
|
|||||||
|
|
||||||
void storage_proxy::on_up(const gms::inet_address& endpoint) {};
|
void storage_proxy::on_up(const gms::inet_address& endpoint) {};
|
||||||
|
|
||||||
void storage_proxy::retire_view_response_handlers(noncopyable_function<bool(const abstract_write_response_handler&)> filter_fun) {
|
void storage_proxy::cancel_write_handlers(noncopyable_function<bool(const abstract_write_response_handler&)> filter_fun) {
|
||||||
assert(thread::running_in_thread());
|
assert(thread::running_in_thread());
|
||||||
auto it = _view_update_handlers_list->begin();
|
auto it = _cancellable_write_handlers_list->begin();
|
||||||
while (it != _view_update_handlers_list->end()) {
|
while (it != _cancellable_write_handlers_list->end()) {
|
||||||
auto guard = it->shared_from_this();
|
auto guard = it->shared_from_this();
|
||||||
if (filter_fun(*it) && _response_handlers.contains(it->id())) {
|
if (filter_fun(*it) && _response_handlers.contains(it->id())) {
|
||||||
it->timeout_cb();
|
it->timeout_cb();
|
||||||
}
|
}
|
||||||
++it;
|
++it;
|
||||||
if (need_preempt()) {
|
if (need_preempt()) {
|
||||||
view_update_handlers_list::iterator_guard ig{*_view_update_handlers_list, it};
|
cancellable_write_handlers_list::iterator_guard ig{*_cancellable_write_handlers_list, it};
|
||||||
seastar::thread::yield();
|
seastar::thread::yield();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void storage_proxy::on_down(const gms::inet_address& endpoint) {
|
void storage_proxy::on_down(const gms::inet_address& endpoint) {
|
||||||
return retire_view_response_handlers([endpoint] (const abstract_write_response_handler& handler) {
|
return cancel_write_handlers([endpoint] (const abstract_write_response_handler& handler) {
|
||||||
const auto& targets = handler.get_targets();
|
const auto& targets = handler.get_targets();
|
||||||
return boost::find(targets, endpoint) != targets.end();
|
return boost::find(targets, endpoint) != targets.end();
|
||||||
});
|
});
|
||||||
@@ -6266,7 +6273,7 @@ future<> storage_proxy::drain_on_shutdown() {
|
|||||||
//NOTE: the thread is spawned here because there are delicate lifetime issues to consider
|
//NOTE: the thread is spawned here because there are delicate lifetime issues to consider
|
||||||
// and writing them down with plain futures is error-prone.
|
// and writing them down with plain futures is error-prone.
|
||||||
return async([this] {
|
return async([this] {
|
||||||
retire_view_response_handlers([] (const abstract_write_response_handler&) { return true; });
|
cancel_write_handlers([] (const abstract_write_response_handler&) { return true; });
|
||||||
_hints_resource_manager.stop().get();
|
_hints_resource_manager.stop().get();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,6 +99,8 @@ struct view_update_backlog_timestamped {
|
|||||||
struct allow_hints_tag {};
|
struct allow_hints_tag {};
|
||||||
using allow_hints = bool_class<allow_hints_tag>;
|
using allow_hints = bool_class<allow_hints_tag>;
|
||||||
|
|
||||||
|
using is_cancellable = bool_class<struct cancellable_tag>;
|
||||||
|
|
||||||
struct storage_proxy_coordinator_query_result {
|
struct storage_proxy_coordinator_query_result {
|
||||||
foreign_ptr<lw_shared_ptr<query::result>> query_result;
|
foreign_ptr<lw_shared_ptr<query::result>> query_result;
|
||||||
replicas_per_token_range last_replicas;
|
replicas_per_token_range last_replicas;
|
||||||
@@ -273,8 +275,8 @@ private:
|
|||||||
std::unordered_map<gms::inet_address, view_update_backlog_timestamped> _view_update_backlogs;
|
std::unordered_map<gms::inet_address, view_update_backlog_timestamped> _view_update_backlogs;
|
||||||
|
|
||||||
//NOTICE(sarna): This opaque pointer is here just to avoid moving write handler class definitions from .cc to .hh. It's slow path.
|
//NOTICE(sarna): This opaque pointer is here just to avoid moving write handler class definitions from .cc to .hh. It's slow path.
|
||||||
class view_update_handlers_list;
|
class cancellable_write_handlers_list;
|
||||||
std::unique_ptr<view_update_handlers_list> _view_update_handlers_list;
|
std::unique_ptr<cancellable_write_handlers_list> _cancellable_write_handlers_list;
|
||||||
|
|
||||||
/* This is a pointer to the shard-local part of the sharded cdc_service:
|
/* This is a pointer to the shard-local part of the sharded cdc_service:
|
||||||
* storage_proxy needs access to cdc_service to augument mutations.
|
* storage_proxy needs access to cdc_service to augument mutations.
|
||||||
@@ -307,9 +309,9 @@ private:
|
|||||||
::shared_ptr<abstract_write_response_handler>& get_write_response_handler(storage_proxy::response_id_type id);
|
::shared_ptr<abstract_write_response_handler>& get_write_response_handler(storage_proxy::response_id_type id);
|
||||||
result<response_id_type> create_write_response_handler_helper(schema_ptr s, const dht::token& token,
|
result<response_id_type> create_write_response_handler_helper(schema_ptr s, const dht::token& token,
|
||||||
std::unique_ptr<mutation_holder> mh, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state,
|
std::unique_ptr<mutation_holder> mh, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state,
|
||||||
service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
service_permit permit, db::allow_per_partition_rate_limit allow_limit, is_cancellable);
|
||||||
result<response_id_type> create_write_response_handler(locator::effective_replication_map_ptr ermp, db::consistency_level cl, db::write_type type, std::unique_ptr<mutation_holder> m, inet_address_vector_replica_set targets,
|
result<response_id_type> create_write_response_handler(locator::effective_replication_map_ptr ermp, db::consistency_level cl, db::write_type type, std::unique_ptr<mutation_holder> m, inet_address_vector_replica_set targets,
|
||||||
const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change, tracing::trace_state_ptr tr_state, storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info);
|
const inet_address_vector_topology_change& pending_endpoints, inet_address_vector_topology_change, tracing::trace_state_ptr tr_state, storage_proxy::write_stats& stats, service_permit permit, db::per_partition_rate_limit::info rate_limit_info, is_cancellable);
|
||||||
result<response_id_type> create_write_response_handler(const mutation&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
result<response_id_type> create_write_response_handler(const mutation&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
||||||
result<response_id_type> create_write_response_handler(const hint_wrapper&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
result<response_id_type> create_write_response_handler(const hint_wrapper&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
||||||
result<response_id_type> create_write_response_handler(const read_repair_mutation&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
result<response_id_type> create_write_response_handler(const read_repair_mutation&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit);
|
||||||
@@ -416,7 +418,8 @@ private:
|
|||||||
db::write_type type,
|
db::write_type type,
|
||||||
tracing::trace_state_ptr tr_state,
|
tracing::trace_state_ptr tr_state,
|
||||||
write_stats& stats,
|
write_stats& stats,
|
||||||
allow_hints allow_hints = allow_hints::yes);
|
allow_hints,
|
||||||
|
is_cancellable);
|
||||||
|
|
||||||
db::view::update_backlog get_view_update_backlog() const;
|
db::view::update_backlog get_view_update_backlog() const;
|
||||||
|
|
||||||
@@ -427,7 +430,8 @@ private:
|
|||||||
template<typename Range>
|
template<typename Range>
|
||||||
future<> mutate_counters(Range&& mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state, service_permit permit, clock_type::time_point timeout);
|
future<> mutate_counters(Range&& mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state, service_permit permit, clock_type::time_point timeout);
|
||||||
|
|
||||||
void retire_view_response_handlers(noncopyable_function<bool(const abstract_write_response_handler&)> filter_fun);
|
// Retires (times out) write response handlers which were constructed as `cancellable` and pass the given filter.
|
||||||
|
void cancel_write_handlers(noncopyable_function<bool(const abstract_write_response_handler&)> filter_fun);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether for a range query doing a query against merged is likely
|
* Returns whether for a range query doing a query against merged is likely
|
||||||
@@ -470,10 +474,6 @@ public:
|
|||||||
return _cdc;
|
return _cdc;
|
||||||
}
|
}
|
||||||
|
|
||||||
view_update_handlers_list& get_view_update_handlers_list() {
|
|
||||||
return *_view_update_handlers_list;
|
|
||||||
}
|
|
||||||
|
|
||||||
response_id_type get_next_response_id() {
|
response_id_type get_next_response_id() {
|
||||||
auto next = _next_response_id++;
|
auto next = _next_response_id++;
|
||||||
if (next == 0) { // 0 is reserved for unique_response_handler
|
if (next == 0) { // 0 is reserved for unique_response_handler
|
||||||
@@ -568,9 +568,9 @@ public:
|
|||||||
// hinted handoff support, and just one target. See also
|
// hinted handoff support, and just one target. See also
|
||||||
// send_to_live_endpoints() - another take on the same original function.
|
// send_to_live_endpoints() - another take on the same original function.
|
||||||
future<> send_to_endpoint(frozen_mutation_and_schema fm_a_s, gms::inet_address target, inet_address_vector_topology_change pending_endpoints, db::write_type type,
|
future<> send_to_endpoint(frozen_mutation_and_schema fm_a_s, gms::inet_address target, inet_address_vector_topology_change pending_endpoints, db::write_type type,
|
||||||
tracing::trace_state_ptr tr_state, write_stats& stats, allow_hints allow_hints = allow_hints::yes);
|
tracing::trace_state_ptr tr_state, write_stats& stats, allow_hints, is_cancellable);
|
||||||
future<> send_to_endpoint(frozen_mutation_and_schema fm_a_s, gms::inet_address target, inet_address_vector_topology_change pending_endpoints, db::write_type type,
|
future<> send_to_endpoint(frozen_mutation_and_schema fm_a_s, gms::inet_address target, inet_address_vector_topology_change pending_endpoints, db::write_type type,
|
||||||
tracing::trace_state_ptr tr_state, allow_hints allow_hints = allow_hints::yes);
|
tracing::trace_state_ptr tr_state, allow_hints, is_cancellable);
|
||||||
|
|
||||||
// Send a mutation to a specific remote target as a hint.
|
// Send a mutation to a specific remote target as a hint.
|
||||||
// Unlike regular mutations during write operations, hints are sent on the streaming connection
|
// Unlike regular mutations during write operations, hints are sent on the streaming connection
|
||||||
|
|||||||
@@ -161,7 +161,7 @@ class ManagerClient():
|
|||||||
await self.server_sees_others(server_id, wait_others, interval = wait_interval)
|
await self.server_sees_others(server_id, wait_others, interval = wait_interval)
|
||||||
self._driver_update()
|
self._driver_update()
|
||||||
|
|
||||||
async def server_add(self, replace_cfg: Optional[ReplaceConfig] = None, cmdline: Optional[List[str]] = None, config: Optional[dict[str, str]] = None, start: bool = True) -> ServerInfo:
|
async def server_add(self, replace_cfg: Optional[ReplaceConfig] = None, cmdline: Optional[List[str]] = None, config: Optional[dict[str, Any]] = None, start: bool = True) -> ServerInfo:
|
||||||
"""Add a new server"""
|
"""Add a new server"""
|
||||||
try:
|
try:
|
||||||
data: dict[str, Any] = {'start': start}
|
data: dict[str, Any] = {'start': start}
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional, Dict, List, Set, Tuple, Callable, AsyncIterator, NamedTuple, Union
|
from typing import Any, Optional, Dict, List, Set, Tuple, Callable, AsyncIterator, NamedTuple, Union
|
||||||
import uuid
|
import uuid
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import BufferedWriter
|
from io import BufferedWriter
|
||||||
@@ -203,7 +203,7 @@ class ScyllaServer:
|
|||||||
logger: Union[logging.Logger, logging.LoggerAdapter],
|
logger: Union[logging.Logger, logging.LoggerAdapter],
|
||||||
cluster_name: str, ip_addr: str, seeds: List[str],
|
cluster_name: str, ip_addr: str, seeds: List[str],
|
||||||
cmdline_options: List[str],
|
cmdline_options: List[str],
|
||||||
config_options: Dict[str, str]) -> None:
|
config_options: Dict[str, Any]) -> None:
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
self.server_id = ServerNum(ScyllaServer.newid())
|
self.server_id = ServerNum(ScyllaServer.newid())
|
||||||
self.exe = pathlib.Path(exe).resolve()
|
self.exe = pathlib.Path(exe).resolve()
|
||||||
@@ -502,9 +502,15 @@ class ScyllaServer:
|
|||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# FIXME: add timeout, fail the test and mark cluster as dirty
|
STOP_TIMEOUT_SECONDS = 60
|
||||||
# if we timeout.
|
wait_task = self.cmd.wait()
|
||||||
await self.cmd.wait()
|
try:
|
||||||
|
await asyncio.wait_for(wait_task, timeout=STOP_TIMEOUT_SECONDS)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
self.cmd.kill()
|
||||||
|
await self.cmd.wait()
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Stopping server {self} gracefully took longer than {STOP_TIMEOUT_SECONDS}s")
|
||||||
finally:
|
finally:
|
||||||
if self.cmd:
|
if self.cmd:
|
||||||
self.logger.info("gracefully stopped %s", self)
|
self.logger.info("gracefully stopped %s", self)
|
||||||
@@ -559,7 +565,7 @@ class ScyllaCluster:
|
|||||||
cluster_name: str
|
cluster_name: str
|
||||||
ip_addr: IPAddress
|
ip_addr: IPAddress
|
||||||
seeds: List[str]
|
seeds: List[str]
|
||||||
config_from_test: dict[str, str]
|
config_from_test: dict[str, Any]
|
||||||
cmdline_from_test: List[str]
|
cmdline_from_test: List[str]
|
||||||
|
|
||||||
def __init__(self, logger: Union[logging.Logger, logging.LoggerAdapter],
|
def __init__(self, logger: Union[logging.Logger, logging.LoggerAdapter],
|
||||||
@@ -646,11 +652,11 @@ class ScyllaCluster:
|
|||||||
def _seeds(self) -> List[str]:
|
def _seeds(self) -> List[str]:
|
||||||
return [server.ip_addr for server in self.running.values()]
|
return [server.ip_addr for server in self.running.values()]
|
||||||
|
|
||||||
async def add_server(self, replace_cfg: Optional[ReplaceConfig] = None, cmdline: Optional[List[str]] = None, config: Optional[dict[str, str]] = None, start: bool = True) -> ServerInfo:
|
async def add_server(self, replace_cfg: Optional[ReplaceConfig] = None, cmdline: Optional[List[str]] = None, config: Optional[dict[str, Any]] = None, start: bool = True) -> ServerInfo:
|
||||||
"""Add a new server to the cluster"""
|
"""Add a new server to the cluster"""
|
||||||
self.is_dirty = True
|
self.is_dirty = True
|
||||||
|
|
||||||
extra_config: dict[str, str] = config.copy() if config else {}
|
extra_config: dict[str, Any] = config.copy() if config else {}
|
||||||
if replace_cfg:
|
if replace_cfg:
|
||||||
replaced_id = replace_cfg.replaced_id
|
replaced_id = replace_cfg.replaced_id
|
||||||
assert replaced_id in self.servers, \
|
assert replaced_id in self.servers, \
|
||||||
|
|||||||
@@ -5,3 +5,7 @@ cluster:
|
|||||||
extra_scylla_config_options:
|
extra_scylla_config_options:
|
||||||
authenticator: AllowAllAuthenticator
|
authenticator: AllowAllAuthenticator
|
||||||
authorizer: AllowAllAuthorizer
|
authorizer: AllowAllAuthorizer
|
||||||
|
skip_in_release:
|
||||||
|
- test_shutdown_hang
|
||||||
|
skip_in_debug:
|
||||||
|
- test_shutdown_hang
|
||||||
|
|||||||
63
test/topology_custom/test_shutdown_hang.py
Normal file
63
test/topology_custom/test_shutdown_hang.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from cassandra.query import SimpleStatement # type: ignore
|
||||||
|
from cassandra.cluster import ConsistencyLevel # type: ignore
|
||||||
|
from cassandra.protocol import WriteTimeout # type: ignore
|
||||||
|
|
||||||
|
from test.pylib.manager_client import ManagerClient
|
||||||
|
from test.topology.util import wait_for_token_ring_and_group0_consistency
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_hints_manager_shutdown_hang(manager: ManagerClient) -> None:
|
||||||
|
"""Reproducer for #8079"""
|
||||||
|
s1 = await manager.server_add(config={
|
||||||
|
'error_injections_at_startup': ['decrease_hints_flush_period']
|
||||||
|
})
|
||||||
|
s2 = await manager.server_add()
|
||||||
|
await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30)
|
||||||
|
|
||||||
|
cql = manager.get_cql()
|
||||||
|
|
||||||
|
logger.info("Create keyspace and table")
|
||||||
|
await cql.run_async("create keyspace ks with replication = {'class': 'SimpleStrategy', 'replication_factor': 2}")
|
||||||
|
await cql.run_async("create table ks.t (pk int primary key)")
|
||||||
|
|
||||||
|
logger.info(f"Stop {s2}")
|
||||||
|
await manager.server_stop(s2.server_id)
|
||||||
|
|
||||||
|
logger.info("Write data with small timeout")
|
||||||
|
# We're using a small timeout for the insert so it's not unexpected that it would fail on slow
|
||||||
|
# CI machines. To avoid flakiness we disable the test in debug mode (as well as release since
|
||||||
|
# it requires an error injection - so it will run only in dev mode) and we retry the write 10 times.
|
||||||
|
passed = False
|
||||||
|
for _ in range(10):
|
||||||
|
try:
|
||||||
|
await cql.run_async(SimpleStatement("insert into ks.t (pk) values (0) using timeout 500ms",
|
||||||
|
consistency_level=ConsistencyLevel.ONE))
|
||||||
|
except WriteTimeout:
|
||||||
|
logger.info("write timeout, retrying")
|
||||||
|
else:
|
||||||
|
passed = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not passed:
|
||||||
|
pytest.fail("Write timed out on each attempt")
|
||||||
|
|
||||||
|
# The write succeeded but a background task was left to finish the write to the other node
|
||||||
|
# (which is dead but the first node didn't mark it as dead yet).
|
||||||
|
# The background task will timeout shortly because of 'using timeout' in the statement.
|
||||||
|
# This will cause a hint to get created.
|
||||||
|
# The hints manager starts sending the hint soon after (hint flushing happens every
|
||||||
|
# ~1 second with the error injection).
|
||||||
|
logger.info("Sleep")
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
logger.info(f"Stop {s1} gracefully")
|
||||||
|
await manager.server_stop_gracefully(s1.server_id)
|
||||||
Reference in New Issue
Block a user