tools: add scylla-sstable-scripts directory
To be the home of example scripts for scylla-sstable. For now only a README.md is added describing the directory's purpose and with links to useful resources. One example script is added in this patch, more will come later.
This commit is contained in:
9
tools/scylla-sstable-scripts/README.md
Normal file
9
tools/scylla-sstable-scripts/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
Scripts for scylla-sstable
|
||||
==========================
|
||||
|
||||
This directory contains various example scripts for `scylla-sstable script`.
|
||||
It serves the dual purpose of being a repository of examples scripts so people can see the Lua API in action and also as a collection of the most commonly used and useful scripts.
|
||||
|
||||
For more details on the Lua API, see https://docs.scylladb.com/operating-scylla/admin-tools/scylla-sstable#script.
|
||||
|
||||
For more details on Lua, see the [Lua manual](http://www.lua.org/manual/).
|
||||
254
tools/scylla-sstable-scripts/dump.lua
Normal file
254
tools/scylla-sstable-scripts/dump.lua
Normal file
@@ -0,0 +1,254 @@
|
||||
--
|
||||
-- Copyright (C) 2022-present ScyllaDB
|
||||
--
|
||||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
|
||||
-- Dumps the content of the sstable(s).
|
||||
--
|
||||
-- Mirrors the dump-data operation. Useful for testing the lua bindings and
|
||||
-- showcasing how to use the lua API to traverse all corners of the data, as well
|
||||
-- as how to generate custom JSON.
|
||||
-- For dumping the content of sstables, prefer the dump-data operation, it is
|
||||
-- much more performant.
|
||||
|
||||
writer = Scylla.new_json_writer()
|
||||
|
||||
clustering_array_created = false
|
||||
|
||||
function write_key(obj)
|
||||
writer:start_object()
|
||||
|
||||
if obj.token then
|
||||
writer:key("token")
|
||||
writer:string(tostring(obj.token))
|
||||
end
|
||||
|
||||
writer:key("raw")
|
||||
writer:string(obj.key:to_hex())
|
||||
|
||||
writer:key("value")
|
||||
writer:string(tostring(obj.key))
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
function write_tombstone(tombstone)
|
||||
writer:start_object()
|
||||
|
||||
if tombstone then
|
||||
writer:key("timestamp")
|
||||
writer:int(tombstone.timestamp)
|
||||
|
||||
writer:key("deletion_time")
|
||||
writer:string(tostring(tombstone.deletion_time))
|
||||
end
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
function write_ttl(obj)
|
||||
writer:key("ttl")
|
||||
writer:string(string.format("%is", obj.ttl))
|
||||
writer:key("expiry")
|
||||
writer:string(tostring(obj.expiry))
|
||||
end
|
||||
|
||||
function maybe_start_clustering_array()
|
||||
if clustering_array_created then
|
||||
return
|
||||
end
|
||||
writer:key("clustering_elements")
|
||||
writer:start_array()
|
||||
clustering_array_created = true
|
||||
end
|
||||
|
||||
function write_atomic_cell(cell)
|
||||
writer:key("is_live")
|
||||
writer:bool(cell.is_live)
|
||||
|
||||
writer:key("type")
|
||||
writer:string(cell.type)
|
||||
|
||||
writer:key("timestamp")
|
||||
writer:int(cell.timestamp)
|
||||
|
||||
if cell.type == "counter-shards" then
|
||||
writer:key("value")
|
||||
writer:start_array()
|
||||
for _, shard in ipairs(cell.value.shards) do
|
||||
writer:start_object()
|
||||
writer:key("id")
|
||||
writer:string(shard.id)
|
||||
writer:key("value")
|
||||
writer:int(shard.value)
|
||||
writer:key("clock")
|
||||
writer:int(shard.clock)
|
||||
writer:end_object()
|
||||
end
|
||||
writer:end_array()
|
||||
elseif cell.is_live then -- type == "regular" | "frozen-collection" | "counter-update"
|
||||
writer:key("value")
|
||||
writer:string(tostring(cell.value))
|
||||
end
|
||||
|
||||
if cell.is_live and cell.has_ttl then
|
||||
write_ttl(cell)
|
||||
end
|
||||
if not cell.is_live then
|
||||
writer:key("deletion_time")
|
||||
writer:string(tostring(cell.deletion_time))
|
||||
end
|
||||
end
|
||||
|
||||
function write_collection(cell)
|
||||
if cell.tombstone then
|
||||
writer:key("tombstone")
|
||||
write_tombstone(cell.tombstone)
|
||||
end
|
||||
writer:key("cells")
|
||||
writer:start_array()
|
||||
for _, v in ipairs(cell.values) do
|
||||
writer:start_object()
|
||||
|
||||
writer:key("key")
|
||||
writer:string(tostring(v.key))
|
||||
|
||||
writer:key("value")
|
||||
writer:start_object()
|
||||
write_atomic_cell(v.value)
|
||||
writer:end_object()
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
writer:end_array()
|
||||
end
|
||||
|
||||
function write_cells(cells)
|
||||
writer:start_object()
|
||||
|
||||
for name, cell in pairs(cells) do
|
||||
writer:key(name)
|
||||
writer:start_object()
|
||||
|
||||
if cell.type == "collection" then
|
||||
write_collection(cell)
|
||||
else
|
||||
write_atomic_cell(cell)
|
||||
end
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
function consume_stream_start()
|
||||
writer:start_object()
|
||||
writer:key("sstables")
|
||||
writer:start_object()
|
||||
end
|
||||
|
||||
function consume_sstable_start(sst)
|
||||
if sst == nil then
|
||||
writer:key("anonymous")
|
||||
else
|
||||
writer:key(sst.filename)
|
||||
end
|
||||
writer:start_array()
|
||||
end
|
||||
|
||||
function consume_partition_start(ps)
|
||||
writer:start_object()
|
||||
|
||||
clustering_array_created = false
|
||||
|
||||
writer:key("key")
|
||||
write_key(ps)
|
||||
|
||||
if ps.tombstone then
|
||||
writer:key("tombstone")
|
||||
write_tombstone(ps.tombstone)
|
||||
end
|
||||
end
|
||||
|
||||
function consume_static_row(sr)
|
||||
writer:key("static_row")
|
||||
write_cells(sr.cells)
|
||||
end
|
||||
|
||||
function consume_clustering_row(cr)
|
||||
maybe_start_clustering_array()
|
||||
|
||||
writer:start_object()
|
||||
|
||||
writer:key("type")
|
||||
writer:string("clustering-row")
|
||||
|
||||
writer:key("key")
|
||||
write_key(cr)
|
||||
|
||||
if cr.tombstone then
|
||||
writer:key("tombstone")
|
||||
write_tombstone(cr.tombstone)
|
||||
writer:key("shadowable_tombstone")
|
||||
write_tombstone(cr.shadowable_tombstone)
|
||||
end
|
||||
|
||||
if cr.marker then
|
||||
writer:key("marker")
|
||||
writer:start_object()
|
||||
|
||||
writer:key("timestamp")
|
||||
writer:int(cr.marker.timestamp)
|
||||
|
||||
if cr.marker.is_live and cr.marker.has_ttl then
|
||||
write_ttl(cr.marker)
|
||||
end
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
writer:key("columns")
|
||||
write_cells(cr.cells)
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
function consume_range_tombstone_change(crt)
|
||||
maybe_start_clustering_array()
|
||||
|
||||
writer:start_object()
|
||||
|
||||
writer:key("type")
|
||||
writer:string("range-tombstone-change")
|
||||
|
||||
if crt.key then
|
||||
writer:key("key")
|
||||
write_key(crt)
|
||||
end
|
||||
|
||||
writer:key("weight")
|
||||
writer:int(crt.key_weight)
|
||||
|
||||
writer:key("tombstone")
|
||||
write_tombstone(crt.tombstone)
|
||||
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
function consume_partition_end()
|
||||
if clustering_array_created then
|
||||
writer:end_array()
|
||||
end
|
||||
writer:end_object()
|
||||
end
|
||||
|
||||
function consume_sstable_end()
|
||||
writer:end_array()
|
||||
end
|
||||
|
||||
function consume_stream_end()
|
||||
writer:end_object()
|
||||
writer:end_object()
|
||||
end
|
||||
@@ -0,0 +1,34 @@
|
||||
--
|
||||
-- Copyright (C) 2022-present ScyllaDB
|
||||
--
|
||||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
|
||||
-- Finds clustering rows which have incomplete (prefix) keys.
|
||||
--
|
||||
-- Such keys can be created in tables created with the `WITH COMPACT STORAGE`
|
||||
-- legacy CQL option.
|
||||
-- Found keys are printed to the standard output.
|
||||
|
||||
partition_key = nil
|
||||
|
||||
function format_key(key)
|
||||
key_str = ""
|
||||
for i, component in ipairs(key.components) do
|
||||
key_str = key_str..tostring(component)
|
||||
if i < #key.components then
|
||||
key_str = key_str..":"
|
||||
end
|
||||
end
|
||||
return key_str
|
||||
end
|
||||
|
||||
function consume_partition_start(ps)
|
||||
partition_key = format_key(ps.key)
|
||||
end
|
||||
|
||||
function consume_clustering_row(cr)
|
||||
if #cr.key.components < #schema.clustering_key_columns then
|
||||
print(string.format("Incomplete key in partition %s: %s (%s)", partition_key, format_key(cr.key), cr.key:to_hex()))
|
||||
end
|
||||
end
|
||||
82
tools/scylla-sstable-scripts/fragment-stats.lua
Normal file
82
tools/scylla-sstable-scripts/fragment-stats.lua
Normal file
@@ -0,0 +1,82 @@
|
||||
--
|
||||
-- Copyright (C) 2022-present ScyllaDB
|
||||
--
|
||||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
|
||||
-- Creates simple statistics of the fragments in the sstable
|
||||
--
|
||||
-- Prints the number of each fragment type as well as the total fragment count
|
||||
-- and stats for the partition with the most fragments.
|
||||
|
||||
function new_stats(key)
|
||||
return {
|
||||
partition_key = key,
|
||||
total = 0,
|
||||
partition = 0,
|
||||
static_row = 0,
|
||||
clustering_row = 0,
|
||||
range_tombstone_change = 0,
|
||||
}
|
||||
end
|
||||
|
||||
total_stats = new_stats(nil)
|
||||
|
||||
function inc_stat(stats, field)
|
||||
stats[field] = stats[field] + 1
|
||||
stats.total = stats.total + 1
|
||||
total_stats[field] = total_stats[field] + 1
|
||||
total_stats.total = total_stats.total + 1
|
||||
end
|
||||
|
||||
function consume_sstable_start(sst)
|
||||
max_partition_stats = new_stats(nil)
|
||||
if sst then
|
||||
current_sst_filename = sst.filename
|
||||
else
|
||||
current_sst_filename = nil
|
||||
end
|
||||
end
|
||||
|
||||
function consume_partition_start(ps)
|
||||
current_partition_stats = new_stats(ps.key)
|
||||
inc_stat(current_partition_stats, "partition")
|
||||
end
|
||||
|
||||
function consume_static_row(sr)
|
||||
inc_stat(current_partition_stats, "static_row")
|
||||
end
|
||||
|
||||
function consume_clustering_row(cr)
|
||||
inc_stat(current_partition_stats, "clustering_row")
|
||||
end
|
||||
|
||||
function consume_range_tombstone_change(crt)
|
||||
inc_stat(current_partition_stats, "range_tombstone_change")
|
||||
end
|
||||
|
||||
function consume_partition_end()
|
||||
if current_partition_stats.total > max_partition_stats.total then
|
||||
max_partition_stats = current_partition_stats
|
||||
end
|
||||
end
|
||||
|
||||
function consume_sstable_end()
|
||||
if current_sst_filename then
|
||||
print(string.format("Stats for sstable %s:", current_sst_filename))
|
||||
else
|
||||
print("Stats for stream:")
|
||||
end
|
||||
print(string.format("\t%d fragments in %d partitions - %d static rows, %d clustering rows and %d range tombstone changes",
|
||||
total_stats.total,
|
||||
total_stats.partition,
|
||||
total_stats.static_row,
|
||||
total_stats.clustering_row,
|
||||
total_stats.range_tombstone_change))
|
||||
print(string.format("\tPartition with max number of fragments (%d): %s - %d static rows, %d clustering rows and %d range tombstone changes",
|
||||
max_partition_stats.total,
|
||||
max_partition_stats.partition_key,
|
||||
max_partition_stats.static_row,
|
||||
max_partition_stats.clustering_row,
|
||||
max_partition_stats.range_tombstone_change))
|
||||
end
|
||||
183
tools/scylla-sstable-scripts/slice.lua
Normal file
183
tools/scylla-sstable-scripts/slice.lua
Normal file
@@ -0,0 +1,183 @@
|
||||
--
|
||||
-- Copyright (C) 2022-present ScyllaDB
|
||||
--
|
||||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
|
||||
-- Filters and dumps the content of the sstable(s).
|
||||
--
|
||||
-- With no arguments, this script is identical to the dump-data operation.
|
||||
-- It demonstrates how to use the high-level JSON write API, as well as how to
|
||||
-- work with partition and clustering keys and how to accepts arguments from the
|
||||
-- command-line
|
||||
-- The script accepts two kind of arguments from the command-line: partition
|
||||
-- ranges and clustering ranges.
|
||||
-- Partition ranges are expected to have keys with the format of `prN`, where N
|
||||
-- is an integer. N can have any value but it should be unique across all other
|
||||
-- partition-ranges passed to the script. A simple scheme is to use a running
|
||||
-- counter to number them.
|
||||
-- Clustering ranges are expected to have keys with the format of `crN`. The same
|
||||
-- restrictions apply to `N`.
|
||||
-- The ranges themselves have the format:
|
||||
-- * [X,Y] - inclusive range from X to Y
|
||||
-- * [X,Y) - from X (inclusive) to Y (exclusive)
|
||||
-- * (X,Y] - from X (exclusive) to Y (inclusive)
|
||||
-- * (X,Y) - exclusive range from X to Y
|
||||
--
|
||||
-- The key values should be hex encoded serialized keys. For partition ranges, it
|
||||
-- is possible to pass tokens, as `tTOKEN`. The special values of `-inf` and
|
||||
-- `+inf` can be used to denote infinity, but note that infinity should always be
|
||||
-- an exclusive bound.
|
||||
--
|
||||
-- Examples:
|
||||
--
|
||||
-- # a single partition range, from key 000400000005 to inf
|
||||
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=[000400000005,+inf)"
|
||||
--
|
||||
-- # two partition ranges
|
||||
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=(-inf,000400000002):pk1=[000400000005,+inf)"
|
||||
--
|
||||
-- # token-range
|
||||
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=(t-1000,t1000)"
|
||||
--
|
||||
-- # a single clustering range, from key 000400000005 to inf
|
||||
-- $ scylla sstable script --script-file slice.lua --script-args "ck0=[000400000005,+inf)"
|
||||
--
|
||||
-- # partition (mixed key and token) and clustering range
|
||||
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=(000400000001,t1000):ck0=[000400000005,+inf)"
|
||||
|
||||
wr = Scylla.new_json_writer()
|
||||
|
||||
partition_ranges = {}
|
||||
clustering_ranges = {}
|
||||
|
||||
arg_key_pattern = "^([pc]r)(%d*)$"
|
||||
arg_value_pattern = "^([%(%[])(.+),(.+)([%]%)])$"
|
||||
key_pattern = "^([0-9a-f]+)$"
|
||||
token_pattern = "^t(-?%d+)$"
|
||||
|
||||
paren_to_key_weight = {
|
||||
["["] = 0,
|
||||
["]"] = 0,
|
||||
["("] = 1,
|
||||
[")"] = -1,
|
||||
}
|
||||
|
||||
paren_to_token_weight = {
|
||||
["["] = -1,
|
||||
["]"] = 1,
|
||||
["("] = 1,
|
||||
[")"] = -1,
|
||||
}
|
||||
|
||||
function make_ring_position(paren, bound)
|
||||
if bound == '-inf' or bound == 'inf' or bound == '+inf' then
|
||||
return Scylla.new_ring_position(paren_to_token_weight[paren], nil)
|
||||
end
|
||||
|
||||
local serialized_key = string.match(bound, key_pattern)
|
||||
if serialized_key ~= nil then
|
||||
return Scylla.new_ring_position(paren_to_key_weight[paren], Scylla.unserialize_partition_key(serialized_key))
|
||||
end
|
||||
|
||||
local token = string.match(bound, token_pattern)
|
||||
if token ~= nil then
|
||||
return Scylla.new_ring_position(paren_to_token_weight[paren], tonumber(token))
|
||||
end
|
||||
|
||||
error(string.format("failed to parse %s as a partition-range bound, expected t$TOKEN, +-inf or a serialized key value", bound))
|
||||
end
|
||||
|
||||
function make_position_in_partition(paren, bound)
|
||||
local weight = paren_to_key_weight[paren]
|
||||
|
||||
if bound == '-inf' or bound == 'inf' or bound == '+inf' then
|
||||
return Scylla.new_position_in_partition(weight, nil)
|
||||
end
|
||||
|
||||
local serialized_key = string.match(bound, key_pattern)
|
||||
if serialized_key ~= nil then
|
||||
return Scylla.new_position_in_partition(weight, Scylla.unserialize_clustering_key(serialized_key))
|
||||
end
|
||||
|
||||
error(string.format("failed to parse %s as a clustering-range bound, expected +-inf or a serialized key value", bound))
|
||||
end
|
||||
|
||||
function parse_ranges(args)
|
||||
for k, v in pairs(args) do
|
||||
local kind, index = string.match(k, arg_key_pattern)
|
||||
if kind == nil then
|
||||
error(string.format("failed to parse command line argument key: %s", k))
|
||||
end
|
||||
local start_paren, start_bound, end_bound, end_paren = string.match(v, arg_value_pattern)
|
||||
if start_paren == nil then
|
||||
error(string.format("failed to parse command line argument value for key %s: %s", k, v))
|
||||
end
|
||||
if kind == 'pr' then
|
||||
partition_ranges[#partition_ranges + 1] = {make_ring_position(start_paren, start_bound), make_ring_position(end_paren, end_bound)}
|
||||
else
|
||||
clustering_ranges[#clustering_ranges + 1] = {make_position_in_partition(start_paren, start_bound), make_position_in_partition(end_paren, end_bound)}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function filter(point, ranges)
|
||||
if #ranges == 0 then
|
||||
return true
|
||||
end
|
||||
for _, range in ipairs(ranges) do
|
||||
if range[1]:tri_cmp(point) <= 0 and range[2]:tri_cmp(point) >= 0 then
|
||||
return true
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
function consume_stream_start(args)
|
||||
parse_ranges(args)
|
||||
wr:start_stream()
|
||||
end
|
||||
|
||||
function consume_sstable_start(sst)
|
||||
wr:start_sstable(sst)
|
||||
end
|
||||
|
||||
skip_partition = false
|
||||
|
||||
function consume_partition_start(ps)
|
||||
skip_partition = not filter(Scylla.new_ring_position(0, ps.key, ps.token), partition_ranges)
|
||||
if skip_partition then
|
||||
return false
|
||||
end
|
||||
wr:start_partition(ps)
|
||||
end
|
||||
|
||||
function consume_static_row(sr)
|
||||
wr:static_row(sr)
|
||||
end
|
||||
|
||||
function consume_clustering_row(cr)
|
||||
if filter(Scylla.new_position_in_partition(0, cr.key), clustering_ranges) then
|
||||
wr:clustering_row(cr)
|
||||
end
|
||||
end
|
||||
|
||||
function consume_range_tombstone_change(rtc)
|
||||
wr:range_tombstone_change(rtc)
|
||||
end
|
||||
|
||||
function consume_partition_end()
|
||||
if skip_partition then
|
||||
return
|
||||
end
|
||||
wr:end_partition()
|
||||
end
|
||||
|
||||
function consume_sstable_end()
|
||||
wr:end_sstable()
|
||||
end
|
||||
|
||||
function consume_stream_end()
|
||||
wr:end_stream()
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user