tools: add scylla-sstable-scripts directory

To be the home of example scripts for scylla-sstable. For now only a
README.md is added describing the directory's purpose and with links to
useful resources.
One example script is added in this patch, more will come later.
This commit is contained in:
Botond Dénes
2022-09-23 07:49:51 +03:00
parent 7b40463f29
commit ace42202df
5 changed files with 562 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
Scripts for scylla-sstable
==========================
This directory contains various example scripts for `scylla-sstable script`.
It serves the dual purpose of being a repository of examples scripts so people can see the Lua API in action and also as a collection of the most commonly used and useful scripts.
For more details on the Lua API, see https://docs.scylladb.com/operating-scylla/admin-tools/scylla-sstable#script.
For more details on Lua, see the [Lua manual](http://www.lua.org/manual/).

View File

@@ -0,0 +1,254 @@
--
-- Copyright (C) 2022-present ScyllaDB
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
--
-- Dumps the content of the sstable(s).
--
-- Mirrors the dump-data operation. Useful for testing the lua bindings and
-- showcasing how to use the lua API to traverse all corners of the data, as well
-- as how to generate custom JSON.
-- For dumping the content of sstables, prefer the dump-data operation, it is
-- much more performant.
writer = Scylla.new_json_writer()
clustering_array_created = false
function write_key(obj)
writer:start_object()
if obj.token then
writer:key("token")
writer:string(tostring(obj.token))
end
writer:key("raw")
writer:string(obj.key:to_hex())
writer:key("value")
writer:string(tostring(obj.key))
writer:end_object()
end
function write_tombstone(tombstone)
writer:start_object()
if tombstone then
writer:key("timestamp")
writer:int(tombstone.timestamp)
writer:key("deletion_time")
writer:string(tostring(tombstone.deletion_time))
end
writer:end_object()
end
function write_ttl(obj)
writer:key("ttl")
writer:string(string.format("%is", obj.ttl))
writer:key("expiry")
writer:string(tostring(obj.expiry))
end
function maybe_start_clustering_array()
if clustering_array_created then
return
end
writer:key("clustering_elements")
writer:start_array()
clustering_array_created = true
end
function write_atomic_cell(cell)
writer:key("is_live")
writer:bool(cell.is_live)
writer:key("type")
writer:string(cell.type)
writer:key("timestamp")
writer:int(cell.timestamp)
if cell.type == "counter-shards" then
writer:key("value")
writer:start_array()
for _, shard in ipairs(cell.value.shards) do
writer:start_object()
writer:key("id")
writer:string(shard.id)
writer:key("value")
writer:int(shard.value)
writer:key("clock")
writer:int(shard.clock)
writer:end_object()
end
writer:end_array()
elseif cell.is_live then -- type == "regular" | "frozen-collection" | "counter-update"
writer:key("value")
writer:string(tostring(cell.value))
end
if cell.is_live and cell.has_ttl then
write_ttl(cell)
end
if not cell.is_live then
writer:key("deletion_time")
writer:string(tostring(cell.deletion_time))
end
end
function write_collection(cell)
if cell.tombstone then
writer:key("tombstone")
write_tombstone(cell.tombstone)
end
writer:key("cells")
writer:start_array()
for _, v in ipairs(cell.values) do
writer:start_object()
writer:key("key")
writer:string(tostring(v.key))
writer:key("value")
writer:start_object()
write_atomic_cell(v.value)
writer:end_object()
writer:end_object()
end
writer:end_array()
end
function write_cells(cells)
writer:start_object()
for name, cell in pairs(cells) do
writer:key(name)
writer:start_object()
if cell.type == "collection" then
write_collection(cell)
else
write_atomic_cell(cell)
end
writer:end_object()
end
writer:end_object()
end
function consume_stream_start()
writer:start_object()
writer:key("sstables")
writer:start_object()
end
function consume_sstable_start(sst)
if sst == nil then
writer:key("anonymous")
else
writer:key(sst.filename)
end
writer:start_array()
end
function consume_partition_start(ps)
writer:start_object()
clustering_array_created = false
writer:key("key")
write_key(ps)
if ps.tombstone then
writer:key("tombstone")
write_tombstone(ps.tombstone)
end
end
function consume_static_row(sr)
writer:key("static_row")
write_cells(sr.cells)
end
function consume_clustering_row(cr)
maybe_start_clustering_array()
writer:start_object()
writer:key("type")
writer:string("clustering-row")
writer:key("key")
write_key(cr)
if cr.tombstone then
writer:key("tombstone")
write_tombstone(cr.tombstone)
writer:key("shadowable_tombstone")
write_tombstone(cr.shadowable_tombstone)
end
if cr.marker then
writer:key("marker")
writer:start_object()
writer:key("timestamp")
writer:int(cr.marker.timestamp)
if cr.marker.is_live and cr.marker.has_ttl then
write_ttl(cr.marker)
end
writer:end_object()
end
writer:key("columns")
write_cells(cr.cells)
writer:end_object()
end
function consume_range_tombstone_change(crt)
maybe_start_clustering_array()
writer:start_object()
writer:key("type")
writer:string("range-tombstone-change")
if crt.key then
writer:key("key")
write_key(crt)
end
writer:key("weight")
writer:int(crt.key_weight)
writer:key("tombstone")
write_tombstone(crt.tombstone)
writer:end_object()
end
function consume_partition_end()
if clustering_array_created then
writer:end_array()
end
writer:end_object()
end
function consume_sstable_end()
writer:end_array()
end
function consume_stream_end()
writer:end_object()
writer:end_object()
end

View File

@@ -0,0 +1,34 @@
--
-- Copyright (C) 2022-present ScyllaDB
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
--
-- Finds clustering rows which have incomplete (prefix) keys.
--
-- Such keys can be created in tables created with the `WITH COMPACT STORAGE`
-- legacy CQL option.
-- Found keys are printed to the standard output.
partition_key = nil
function format_key(key)
key_str = ""
for i, component in ipairs(key.components) do
key_str = key_str..tostring(component)
if i < #key.components then
key_str = key_str..":"
end
end
return key_str
end
function consume_partition_start(ps)
partition_key = format_key(ps.key)
end
function consume_clustering_row(cr)
if #cr.key.components < #schema.clustering_key_columns then
print(string.format("Incomplete key in partition %s: %s (%s)", partition_key, format_key(cr.key), cr.key:to_hex()))
end
end

View File

@@ -0,0 +1,82 @@
--
-- Copyright (C) 2022-present ScyllaDB
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
--
-- Creates simple statistics of the fragments in the sstable
--
-- Prints the number of each fragment type as well as the total fragment count
-- and stats for the partition with the most fragments.
function new_stats(key)
return {
partition_key = key,
total = 0,
partition = 0,
static_row = 0,
clustering_row = 0,
range_tombstone_change = 0,
}
end
total_stats = new_stats(nil)
function inc_stat(stats, field)
stats[field] = stats[field] + 1
stats.total = stats.total + 1
total_stats[field] = total_stats[field] + 1
total_stats.total = total_stats.total + 1
end
function consume_sstable_start(sst)
max_partition_stats = new_stats(nil)
if sst then
current_sst_filename = sst.filename
else
current_sst_filename = nil
end
end
function consume_partition_start(ps)
current_partition_stats = new_stats(ps.key)
inc_stat(current_partition_stats, "partition")
end
function consume_static_row(sr)
inc_stat(current_partition_stats, "static_row")
end
function consume_clustering_row(cr)
inc_stat(current_partition_stats, "clustering_row")
end
function consume_range_tombstone_change(crt)
inc_stat(current_partition_stats, "range_tombstone_change")
end
function consume_partition_end()
if current_partition_stats.total > max_partition_stats.total then
max_partition_stats = current_partition_stats
end
end
function consume_sstable_end()
if current_sst_filename then
print(string.format("Stats for sstable %s:", current_sst_filename))
else
print("Stats for stream:")
end
print(string.format("\t%d fragments in %d partitions - %d static rows, %d clustering rows and %d range tombstone changes",
total_stats.total,
total_stats.partition,
total_stats.static_row,
total_stats.clustering_row,
total_stats.range_tombstone_change))
print(string.format("\tPartition with max number of fragments (%d): %s - %d static rows, %d clustering rows and %d range tombstone changes",
max_partition_stats.total,
max_partition_stats.partition_key,
max_partition_stats.static_row,
max_partition_stats.clustering_row,
max_partition_stats.range_tombstone_change))
end

View File

@@ -0,0 +1,183 @@
--
-- Copyright (C) 2022-present ScyllaDB
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
--
-- Filters and dumps the content of the sstable(s).
--
-- With no arguments, this script is identical to the dump-data operation.
-- It demonstrates how to use the high-level JSON write API, as well as how to
-- work with partition and clustering keys and how to accepts arguments from the
-- command-line
-- The script accepts two kind of arguments from the command-line: partition
-- ranges and clustering ranges.
-- Partition ranges are expected to have keys with the format of `prN`, where N
-- is an integer. N can have any value but it should be unique across all other
-- partition-ranges passed to the script. A simple scheme is to use a running
-- counter to number them.
-- Clustering ranges are expected to have keys with the format of `crN`. The same
-- restrictions apply to `N`.
-- The ranges themselves have the format:
-- * [X,Y] - inclusive range from X to Y
-- * [X,Y) - from X (inclusive) to Y (exclusive)
-- * (X,Y] - from X (exclusive) to Y (inclusive)
-- * (X,Y) - exclusive range from X to Y
--
-- The key values should be hex encoded serialized keys. For partition ranges, it
-- is possible to pass tokens, as `tTOKEN`. The special values of `-inf` and
-- `+inf` can be used to denote infinity, but note that infinity should always be
-- an exclusive bound.
--
-- Examples:
--
-- # a single partition range, from key 000400000005 to inf
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=[000400000005,+inf)"
--
-- # two partition ranges
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=(-inf,000400000002):pk1=[000400000005,+inf)"
--
-- # token-range
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=(t-1000,t1000)"
--
-- # a single clustering range, from key 000400000005 to inf
-- $ scylla sstable script --script-file slice.lua --script-args "ck0=[000400000005,+inf)"
--
-- # partition (mixed key and token) and clustering range
-- $ scylla sstable script --script-file slice.lua --script-args "pk0=(000400000001,t1000):ck0=[000400000005,+inf)"
wr = Scylla.new_json_writer()
partition_ranges = {}
clustering_ranges = {}
arg_key_pattern = "^([pc]r)(%d*)$"
arg_value_pattern = "^([%(%[])(.+),(.+)([%]%)])$"
key_pattern = "^([0-9a-f]+)$"
token_pattern = "^t(-?%d+)$"
paren_to_key_weight = {
["["] = 0,
["]"] = 0,
["("] = 1,
[")"] = -1,
}
paren_to_token_weight = {
["["] = -1,
["]"] = 1,
["("] = 1,
[")"] = -1,
}
function make_ring_position(paren, bound)
if bound == '-inf' or bound == 'inf' or bound == '+inf' then
return Scylla.new_ring_position(paren_to_token_weight[paren], nil)
end
local serialized_key = string.match(bound, key_pattern)
if serialized_key ~= nil then
return Scylla.new_ring_position(paren_to_key_weight[paren], Scylla.unserialize_partition_key(serialized_key))
end
local token = string.match(bound, token_pattern)
if token ~= nil then
return Scylla.new_ring_position(paren_to_token_weight[paren], tonumber(token))
end
error(string.format("failed to parse %s as a partition-range bound, expected t$TOKEN, +-inf or a serialized key value", bound))
end
function make_position_in_partition(paren, bound)
local weight = paren_to_key_weight[paren]
if bound == '-inf' or bound == 'inf' or bound == '+inf' then
return Scylla.new_position_in_partition(weight, nil)
end
local serialized_key = string.match(bound, key_pattern)
if serialized_key ~= nil then
return Scylla.new_position_in_partition(weight, Scylla.unserialize_clustering_key(serialized_key))
end
error(string.format("failed to parse %s as a clustering-range bound, expected +-inf or a serialized key value", bound))
end
function parse_ranges(args)
for k, v in pairs(args) do
local kind, index = string.match(k, arg_key_pattern)
if kind == nil then
error(string.format("failed to parse command line argument key: %s", k))
end
local start_paren, start_bound, end_bound, end_paren = string.match(v, arg_value_pattern)
if start_paren == nil then
error(string.format("failed to parse command line argument value for key %s: %s", k, v))
end
if kind == 'pr' then
partition_ranges[#partition_ranges + 1] = {make_ring_position(start_paren, start_bound), make_ring_position(end_paren, end_bound)}
else
clustering_ranges[#clustering_ranges + 1] = {make_position_in_partition(start_paren, start_bound), make_position_in_partition(end_paren, end_bound)}
end
end
end
function filter(point, ranges)
if #ranges == 0 then
return true
end
for _, range in ipairs(ranges) do
if range[1]:tri_cmp(point) <= 0 and range[2]:tri_cmp(point) >= 0 then
return true
end
end
return false
end
function consume_stream_start(args)
parse_ranges(args)
wr:start_stream()
end
function consume_sstable_start(sst)
wr:start_sstable(sst)
end
skip_partition = false
function consume_partition_start(ps)
skip_partition = not filter(Scylla.new_ring_position(0, ps.key, ps.token), partition_ranges)
if skip_partition then
return false
end
wr:start_partition(ps)
end
function consume_static_row(sr)
wr:static_row(sr)
end
function consume_clustering_row(cr)
if filter(Scylla.new_position_in_partition(0, cr.key), clustering_ranges) then
wr:clustering_row(cr)
end
end
function consume_range_tombstone_change(rtc)
wr:range_tombstone_change(rtc)
end
function consume_partition_end()
if skip_partition then
return
end
wr:end_partition()
end
function consume_sstable_end()
wr:end_sstable()
end
function consume_stream_end()
wr:end_stream()
end