nodetool: status: Show excluded nodes as having status 'X'

Example:

$ build/dev/scylla nodetool status
Datacenter: dc1
===============
Status=Up/Down/eXcluded
|/ State=Normal/Leaving/Joining/Moving
-- Address   Load      Tokens Owns Host ID                              Rack
UN 127.0.0.1 783.42 KB 1      ?    753cb7b0-1b90-4614-ae17-2cfe470f5104 rack1
XN 127.0.0.2 785.10 KB 1      ?    92ccdd23-5526-4863-844a-5c8e8906fa55 rack2
UN 127.0.0.3 708.91 KB 1      ?    781646ad-c85b-4d77-b7e3-8d50c34f1f17 rack3
This commit is contained in:
Tomasz Grabiec
2025-10-22 01:40:51 +02:00
parent 87492d3073
commit 2bd173da97
6 changed files with 135 additions and 82 deletions

View File

@@ -220,6 +220,25 @@
}
]
},
{
"path":"/storage_service/nodes/excluded",
"operations":[
{
"method":"GET",
"summary":"Retrieve host ids of nodes which are marked as excluded",
"type":"array",
"items":{
"type":"string"
},
"nickname":"get_excluded_nodes",
"produces":[
"application/json"
],
"parameters":[
]
}
]
},
{
"path":"/storage_service/nodes/joining",
"operations":[

View File

@@ -62,6 +62,17 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
return addr | std::ranges::to<std::vector>();
});
ss::get_excluded_nodes.set(r, [&tm](const_req req) {
const auto& local_tm = *tm.local().get();
std::vector<sstring> eps;
local_tm.get_topology().for_each_node([&] (auto& node) {
if (node.is_excluded()) {
eps.push_back(node.host_id().to_sstring());
}
});
return eps;
});
ss::get_joining_nodes.set(r, [&tm, &g](const_req req) {
const auto& local_tm = *tm.local().get();
const auto& points = local_tm.get_bootstrap_tokens();
@@ -130,6 +141,7 @@ void unset_token_metadata(http_context& ctx, routes& r) {
ss::get_leaving_nodes.unset(r);
ss::get_moving_nodes.unset(r);
ss::get_joining_nodes.unset(r);
ss::get_excluded_nodes.unset(r);
ss::get_host_id_map.unset(r);
httpd::endpoint_snitch_info_json::get_datacenter.unset(r);
httpd::endpoint_snitch_info_json::get_rack.unset(r);

View File

@@ -18,84 +18,93 @@ Example output:
Datacenter: datacenter1
=======================
Status=Up/Down
Status=Up/Down/eXcluded
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Tokens Owns (effective) Host ID Rack
UN 127.0.0.1 394.97 MB 256 33.4% 292a6c7f-2063-484c-b54d-9015216f1750 rack1
UN 127.0.0.2 151.07 MB 256 34.3% 102b6ecd-2081-4073-8172-bf818c35e27b rack1
UN 127.0.0.3 249.07 MB 256 32.3% 20db6ecd-2981-447s-l172-jf118c17o27y rack1
XN 127.0.0.4 149.07 MB 256 32.3% dd961642-c7c6-4962-9f5a-ea774dbaed77 rack1
+----------+---------------------------------------+
|Parameter |Description |
| | |
| | |
| | |
| | |
+==========+=======================================+
|Datacenter|The data center that holds |
| |the information. |
| | |
| | |
| | |
| | |
+----------+---------------------------------------+
|Status |``U`` - The node is up. |
| | |
| |``D`` - The node is down. |
+----------+---------------------------------------+
|State |``N`` - Normal |
| | |
| |``L`` - Leaving |
| | |
| |``J`` - Joining |
| | |
| |``M`` - Moving |
+----------+---------------------------------------+
|Address |The IP address of the node. |
| | |
+----------+---------------------------------------+
|Load |The size on disk the ScyllaDB data |
| | takes up (updates every 60 seconds). |
| | |
| | |
| | |
| | |
+----------+---------------------------------------+
|Tokens |The number of tokens per node. |
| | |
| | |
| | |
+----------+---------------------------------------+
|Owns |The percentage of data owned by |
| |the node (per datacenter) multiplied by|
| |the replication factor you are using. |
| | |
| |For example, if the node owns 25% of |
| |the data and the replication factor |
| |is 4, the value will equal 100%. |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
+----------+---------------------------------------+
|Host ID |The unique identifier (UUID) |
| |automatically assigned to the node. |
| | |
+----------+---------------------------------------+
|Rack |The name of the rack. |
+----------+---------------------------------------+
+----------+---------------------------------------------------------------+
|Parameter |Description |
| | |
| | |
| | |
| | |
+==========+===============================================================+
|Datacenter|The data center that holds |
| |the information. |
| | |
| | |
| | |
| | |
+----------+---------------------------------------------------------------+
|Status |``U`` - The node is up. |
| | |
| |``D`` - The node is down. |
| | |
| |``X`` - The node is :ref:`excluded <status-excluded>`. |
+----------+---------------------------------------------------------------+
|State |``N`` - Normal |
| | |
| |``L`` - Leaving |
| | |
| |``J`` - Joining |
| | |
| |``M`` - Moving |
+----------+---------------------------------------------------------------+
|Address |The IP address of the node. |
| | |
+----------+---------------------------------------------------------------+
|Load |The size on disk the ScyllaDB data |
| | takes up (updates every 60 seconds). |
| | |
| | |
| | |
| | |
+----------+---------------------------------------------------------------+
|Tokens |The number of tokens per node. |
| | |
| | |
| | |
+----------+---------------------------------------------------------------+
|Owns |The percentage of data owned by |
| |the node (per datacenter) multiplied by |
| |the replication factor you are using. |
| | |
| |For example, if the node owns 25% of |
| |the data and the replication factor |
| |is 4, the value will equal 100%. |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
+----------+---------------------------------------------------------------+
|Host ID |The unique identifier (UUID) |
| |automatically assigned to the node. |
| | |
+----------+---------------------------------------------------------------+
|Rack |The name of the rack. |
+----------+---------------------------------------------------------------+
.. _status-excluded:
Nodes in the excluded status (``X``) are down nodes which were marked as excluded
by ``removenode``, ``excludenode``` or node replace, and means that they are considered permanently lost.
See :doc:`nodetool excludenode </operating-scylla/nodetool-commands/excludenode>` for more information.
.. include:: nodetool-index.rst

View File

@@ -24,7 +24,7 @@ async def validate_status_operation(result: str, live_eps: list, down_eps: list,
assert lines[i] == "=" * dc_line_len
i += 1
assert lines[i] == "Status=Up/Down"
assert lines[i].startswith("Status=Up/Down")
i += 1
assert lines[i] == "|/ State=Normal/Leaving/Joining/Moving"
@@ -47,7 +47,10 @@ async def validate_status_operation(result: str, live_eps: list, down_eps: list,
assert ep in (live_eps + down_eps)
assert status_state[0] == ('U' if ep in live_eps else 'D')
if ep in live_eps:
assert status_state[0] == 'U'
else:
assert status_state[0] in ['D', 'X']
if ep in joining:
assert status_state[1] == 'J'

View File

@@ -16,6 +16,7 @@ import pytest
class NodeStatus(Enum):
Up = 'U'
Down = 'D'
Excluded = 'X'
Unknown = '?'
@@ -64,7 +65,7 @@ def validate_status_output(res, keyspace, nodes, ownership, resolve, effective_o
assert lines[i] == "=" * dc_line_len
i += 1
assert lines[i] == "Status=Up/Down"
lines[i].startswith("Status=Up/Down")
i += 1
assert lines[i] == "|/ State=Normal/Leaving/Joining/Moving"
@@ -200,6 +201,7 @@ def _do_test_status(request, nodetool, status_query_target, node_list, resolve=N
moving = [n.endpoint for n in node_list if n.state == NodeState.Moving]
live = [n.endpoint for n in node_list if n.status == NodeStatus.Up]
down = [n.endpoint for n in node_list if n.status == NodeStatus.Down]
excluded = [n.host_id for n in node_list if n.status == NodeStatus.Excluded]
load_map = [{"key": ep, "value": node.load} for ep, node in nodes.items() if node.load is not None]
@@ -223,6 +225,7 @@ def _do_test_status(request, nodetool, status_query_target, node_list, resolve=N
expected_request("GET", "/storage_service/nodes/joining", response=joining),
expected_request("GET", "/storage_service/nodes/leaving", response=leaving),
expected_request("GET", "/storage_service/nodes/moving", response=moving),
expected_request("GET", "/storage_service/nodes/excluded", response=excluded),
expected_request("GET", "/storage_service/load_map", response=load_map),
expected_request("GET", "/storage_service/tokens_endpoint", params=tokens_endpoint_params,
response=tokens_endpoint),

View File

@@ -2412,6 +2412,7 @@ void status_operation(scylla_rest_client& client, const bpo::variables_map& vm)
const auto joining = get_nodes_of_state(client, "joining");
const auto leaving = get_nodes_of_state(client, "leaving");
const auto moving = get_nodes_of_state(client, "moving");
const auto excluded = get_nodes_of_state(client, "excluded");
const auto endpoint_load = rjson_to_map<ssize_t>(client.get("/storage_service/load_map"));
const auto tablets_keyspace = keyspace && keyspace_uses_tablets(client, *keyspace);
@@ -2456,7 +2457,7 @@ void status_operation(scylla_rest_client& client, const bpo::variables_map& vm)
const auto dc_header = fmt::format("Datacenter: {}", dc);
fmt::print("{}\n", dc_header);
fmt::print("{}\n", std::string(dc_header.size(), '='));
fmt::print("Status=Up/Down\n");
fmt::print("Status=Up/Down/eXcluded\n");
fmt::print("|/ State=Normal/Leaving/Joining/Moving\n");
Tabulate table;
if (keyspace) {
@@ -2465,13 +2466,19 @@ void status_operation(scylla_rest_client& client, const bpo::variables_map& vm)
table.add("--", "Address", "Load", "Tokens", "Owns", "Host ID", "Rack");
}
for (const auto& ep : endpoints) {
char status, state;
if (live.contains(ep)) {
status = 'U';
char state;
sstring status;
if (endpoint_host_id.contains(ep) && excluded.contains(endpoint_host_id.at(ep))) {
status = "X";
if (live.contains(ep)) {
status = "XU"; // Should not happen, but when it does, we better know.
}
} else if (live.contains(ep)) {
status = "U";
} else if (down.contains(ep)) {
status = 'D';
status = "D";
} else {
status = '?';
status = "?";
}
if (joining.contains(ep)) {
state = 'J';