api: Add force_remove_endpoint for gossip
It is used to force remove a node from gossip membership if something goes wrong. Note: run the force_remove_endpoint api at the same time on _all_ the nodes in the cluster in order to prevent the removed nodes come back. Becasue nodes without running the force_remove_endpoint api cmd can gossip around the removed node information to other nodes in 2 * ring_delay (2 * 30 seconds by default) time. For instance, in a 3 nodes cluster, node 3 is decommissioned, to remove node 3 from gossip membership prior the auto removal (3 days by default), run the api cmd on both node 1 and node 2 at the same time. $ curl -X POST --header "Accept: application/json" "http://127.0.0.1:10000/gossiper/force_remove_endpoint/127.0.0.3" $ curl -X POST --header "Accept: application/json" "http://127.0.0.2:10000/gossiper/force_remove_endpoint/127.0.0.3" Then run 'nodetool gossipinfo' on all the nodes to check the removed nodes are not present. Fixes #2134 Closes #5436
This commit is contained in:
@@ -148,6 +148,30 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/gossiper/force_remove_endpoint/{addr}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Force remove an endpoint from gossip",
|
||||
"type":"void",
|
||||
"nickname":"force_remove_endpoint",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"addr",
|
||||
"description":"The endpoint address",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -66,6 +66,13 @@ void set_gossiper(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
httpd::gossiper_json::force_remove_endpoint.set(r, [](std::unique_ptr<request> req) {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
return gms::get_local_gossiper().force_remove_endpoint(ep).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -677,6 +677,21 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
|
||||
});
|
||||
}
|
||||
|
||||
future<> gossiper::force_remove_endpoint(inet_address endpoint) {
|
||||
if (endpoint == get_broadcast_address()) {
|
||||
return make_exception_future<>(std::runtime_error(format("Can not force remove node {} itself", endpoint)));
|
||||
}
|
||||
return get_gossiper().invoke_on(0, [endpoint] (auto& gossiper) mutable {
|
||||
return seastar::async([&gossiper, g = gossiper.shared_from_this(), endpoint] () mutable {
|
||||
gossiper.remove_endpoint(endpoint);
|
||||
gossiper.evict_from_membership(endpoint);
|
||||
logger.info("Finished to force remove node {}", endpoint);
|
||||
}).handle_exception([endpoint] (auto ep) {
|
||||
logger.warn("Failed to force remove node {}: {}", endpoint, ep);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::remove_endpoint(inet_address endpoint) {
|
||||
// do subscribers first so anything in the subscriber that depends on gossiper state won't get confused
|
||||
|
||||
@@ -301,6 +301,7 @@ public:
|
||||
* Removes the endpoint from Gossip but retains endpoint state
|
||||
*/
|
||||
void remove_endpoint(inet_address endpoint);
|
||||
future<> force_remove_endpoint(inet_address endpoint);
|
||||
private:
|
||||
/**
|
||||
* Quarantines the endpoint for QUARANTINE_DELAY
|
||||
|
||||
Reference in New Issue
Block a user