Merge "Use radix tree to store cells on a row" from Pavel E
" Current storage of cells in a row is a union of vector and set. The vector holds 5 cell_and_hash's inline, up to 32 ones in the external storage and then it's switched to std::set. Once switched, the whole union becomes the waste of space, as it's size is sizeof(vector head) + 5 * sizeof(cell and hash) = 90+ bytes and only 3 pointers from it are used (std::set header). Also the overhead to keep cell_and_hash as a set entry is more then the size of the structure itself. Column ids are 32-bit integers that most likely come sequentialy. For this kind of a search key a radix tree (with some care for non-sequential cases) can be beneficial. This set introduces a compact radix tree, that uses 7-bit sub values from the search key to index on each node and compacts the nodes themselves for better memory usage. Then the row::_storage is replaced with the new tree. The most notable result is the memory footprint decrease, for wide rows down to 2x times. The performance of micro-benchmarks is a bit lower for small rows and (!) higer for longer (8+ cells). The numbers are in patch #12 (spoiler: they are better than for v2) v3: - trimmed size of radix down to 7 bits - simplified the nodes layouts, now there are 2 of them (was 4) - enhanced perf_mutation to test N-cells schema - added AVX intra-nodes search for medium-sized nodes - added .clone_from() method that helped to improve perf_mutation - minor - changed functions not to return values via refs-arguments - fixed nested classes to properly use language constructors - renamed index_to to key_t to distinguish from node_index_t - improved recurring variadic templates not to use sentinel argument - use standard concepts v2: - fixed potential mis-compilation due to strict-aliasing violation - added oracle test (radix tree is compared with std::map) - added radix to perf_collection - cosmetic changes (concepts, comments, names) A note on item 1 from v2 changelog. The nodes are no longer packed perfectly, each has grown 3 bytes. But it turned out that when used as cells container most of this growth drowned in lsa alignments. next todo: - aarch64 version of 16-keys node search tests: unit(dev), unit(debug for radix*), pref(dev) " * 'br-radix-tree-for-cells-3' of https://github.com/xemul/scylla: test/memory_footpring: Print radix tree node sizes row: Remove old storages row: Prepare row::equal for switch row: Prepare row::difference for switch row: Introduce radix tree storage type row-equal: Re-declare the cells_equal lambda test: Add tests for radix tree utils: Compact radix tree array-search: Add helpers to search for a byte in array test/perf_collection: Add callback to check the speed of clone test/perf_mutation: Add option to run with more than 1 columns test/perf_mutation: Prepare to have several regular columns test/perf_mutation: Use builder to build schema
This commit is contained in:
@@ -410,6 +410,7 @@ scylla_tests = set([
|
||||
'test/boost/virtual_reader_test',
|
||||
'test/boost/bptree_test',
|
||||
'test/boost/btree_test',
|
||||
'test/boost/radix_tree_test',
|
||||
'test/boost/double_decker_test',
|
||||
'test/boost/stall_free_test',
|
||||
'test/boost/raft_sys_table_storage_test',
|
||||
@@ -441,6 +442,8 @@ scylla_tests = set([
|
||||
'test/unit/btree_stress_test',
|
||||
'test/unit/bptree_compaction_test',
|
||||
'test/unit/btree_compaction_test',
|
||||
'test/unit/radix_tree_stress_test',
|
||||
'test/unit/radix_tree_compaction_test',
|
||||
])
|
||||
|
||||
perf_tests = set([
|
||||
@@ -1066,6 +1069,8 @@ tests_not_using_seastar_test_framework = set([
|
||||
'test/unit/btree_stress_test',
|
||||
'test/unit/bptree_compaction_test',
|
||||
'test/unit/btree_compaction_test',
|
||||
'test/unit/radix_tree_stress_test',
|
||||
'test/unit/radix_tree_compaction_test',
|
||||
'test/manual/sstable_scan_footprint_test',
|
||||
]) | pure_boost_tests
|
||||
|
||||
|
||||
@@ -399,8 +399,6 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
|
||||
|
||||
_row_cache_tracker.set_compaction_scheduling_group(dbcfg.memory_compaction_scheduling_group);
|
||||
|
||||
dblog.debug("Row: max_vector_size: {}, internal_count: {}", size_t(row::max_vector_size), size_t(row::internal_count));
|
||||
|
||||
_infinite_bound_range_deletions_reg = _feat.cluster_supports_unbounded_range_tombstones().when_enabled([this] {
|
||||
dblog.debug("Enabling infinite bound range deletions");
|
||||
_supports_infinite_bound_range_deletions = true;
|
||||
|
||||
@@ -826,14 +826,8 @@ void appending_hash<row>::operator()<legacy_xx_hasher_without_null_digest>(legac
|
||||
}
|
||||
|
||||
cell_hash_opt row::cell_hash_for(column_id id) const {
|
||||
if (_type == storage_type::vector) {
|
||||
return id < max_vector_size && _storage.vector.present.test(id) ? _storage.vector.v[id].hash : cell_hash_opt();
|
||||
}
|
||||
auto it = _storage.set.find(id, cell_entry::compare());
|
||||
if (it != _storage.set.end()) {
|
||||
return it->hash();
|
||||
}
|
||||
return cell_hash_opt();
|
||||
const cell_and_hash* cah = _cells.get(id);
|
||||
return cah != nullptr ? cah->hash : cell_hash_opt();
|
||||
}
|
||||
|
||||
void row::prepare_hash(const schema& s, column_kind kind) const {
|
||||
@@ -925,24 +919,15 @@ static auto prefixed(const sstring& prefix, const RangeOfPrintable& r) {
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, const row::printer& p) {
|
||||
auto add_printer = [&] (const auto& c) {
|
||||
auto& column_def = p._schema.column_at(p._kind, c.first);
|
||||
return std::pair<sstring, atomic_cell_or_collection::printer>(std::piecewise_construct,
|
||||
std::forward_as_tuple(column_def.name_as_text()),
|
||||
std::forward_as_tuple(column_def, c.second)
|
||||
);
|
||||
};
|
||||
auto& cells = p._row._cells;
|
||||
|
||||
sstring cells;
|
||||
switch (p._row._type) {
|
||||
case row::storage_type::set:
|
||||
cells = ::join(",", prefixed("\n ", p._row.get_range_set() | boost::adaptors::transformed(add_printer)));
|
||||
break;
|
||||
case row::storage_type::vector:
|
||||
cells = ::join(",", prefixed("\n ", p._row.get_range_vector() | boost::adaptors::transformed(add_printer)));
|
||||
break;
|
||||
}
|
||||
return fmt_print(os, "{{row: {}}}", cells);
|
||||
os << "{{row:";
|
||||
cells.walk([&] (column_id id, const cell_and_hash& cah) {
|
||||
auto& cdef = p._schema.column_at(p._kind, id);
|
||||
os << "\n " << cdef.name_as_text() << atomic_cell_or_collection::printer(cdef, cah.cell);
|
||||
return true;
|
||||
});
|
||||
return os << "}}";
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
@@ -1184,24 +1169,11 @@ row::apply(const column_definition& column, atomic_cell_or_collection&& value, c
|
||||
|
||||
template<typename Func>
|
||||
void row::consume_with(Func&& func) {
|
||||
if (_type == storage_type::vector) {
|
||||
unsigned i = 0;
|
||||
for (; i < _storage.vector.v.size(); i++) {
|
||||
if (_storage.vector.present.test(i)) {
|
||||
func(i, _storage.vector.v[i]);
|
||||
_storage.vector.present.reset(i);
|
||||
--_size;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto del = current_deleter<cell_entry>();
|
||||
auto i = _storage.set.begin();
|
||||
while (i != _storage.set.end()) {
|
||||
func(i->id(), i->get_cell_and_hash());
|
||||
i = _storage.set.erase_and_dispose(i, del);
|
||||
--_size;
|
||||
}
|
||||
}
|
||||
_cells.weed([func, this] (column_id id, cell_and_hash& cah) {
|
||||
_size--;
|
||||
func(id, cah);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1212,68 +1184,26 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
|
||||
// our mutations are not yet immutable
|
||||
auto id = column.id;
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (id >= _storage.vector.v.size()) {
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(std::move(value), std::move(hash));
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
|
||||
cell_and_hash = { std::move(value), std::move(hash) };
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else {
|
||||
::apply_monotonically(column, cell_and_hash, value, std::move(hash));
|
||||
}
|
||||
|
||||
cell_and_hash* cah = _cells.get(id);
|
||||
if (cah == nullptr) {
|
||||
// FIXME -- add .locate method to radix_tree to find or allocate a spot
|
||||
_cells.emplace(id, std::move(value), std::move(hash));
|
||||
_size++;
|
||||
} else {
|
||||
if (_type == storage_type::vector) {
|
||||
vector_to_set();
|
||||
}
|
||||
auto i = _storage.set.lower_bound(id, cell_entry::compare());
|
||||
if (i == _storage.set.end() || i->id() != id) {
|
||||
cell_entry* e = current_allocator().construct<cell_entry>(id);
|
||||
_storage.set.insert(i, *e);
|
||||
_size++;
|
||||
e->_cell_and_hash = { std::move(value), std::move(hash) };
|
||||
} else {
|
||||
::apply_monotonically(column, i->_cell_and_hash, value, std::move(hash));
|
||||
}
|
||||
::apply_monotonically(column, *cah, value, std::move(hash));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (_storage.vector.v.size() > id) {
|
||||
on_internal_error(mplog, format("Attempted to append cell#{} to row already having {} cells", id, _storage.vector.v.size()));
|
||||
}
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
||||
_storage.vector.present.set(id);
|
||||
} else {
|
||||
if (_type == storage_type::vector) {
|
||||
vector_to_set();
|
||||
}
|
||||
auto e = current_allocator().construct<cell_entry>(id, std::move(value));
|
||||
_storage.set.insert(_storage.set.end(), *e);
|
||||
}
|
||||
_cells.emplace(id, std::move(value), cell_hash_opt());
|
||||
_size++;
|
||||
}
|
||||
|
||||
const cell_and_hash*
|
||||
row::find_cell_and_hash(column_id id) const {
|
||||
if (_type == storage_type::vector) {
|
||||
if (id >= _storage.vector.v.size() || !_storage.vector.present.test(id)) {
|
||||
return nullptr;
|
||||
}
|
||||
return &_storage.vector.v[id];
|
||||
} else {
|
||||
auto i = _storage.set.find(id, cell_entry::compare());
|
||||
if (i == _storage.set.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return &i->get_cell_and_hash();
|
||||
}
|
||||
return _cells.get(id);
|
||||
}
|
||||
|
||||
const atomic_cell_or_collection*
|
||||
@@ -1283,21 +1213,10 @@ row::find_cell(column_id id) const {
|
||||
}
|
||||
|
||||
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
|
||||
size_t mem = 0;
|
||||
if (_type == storage_type::vector) {
|
||||
mem += _storage.vector.v.used_space_external_memory_usage();
|
||||
column_id id = 0;
|
||||
for (auto&& c_a_h : _storage.vector.v) {
|
||||
auto& cdef = s.column_at(kind, id++);
|
||||
mem += c_a_h.cell.external_memory_usage(*cdef.type);
|
||||
}
|
||||
} else {
|
||||
for (auto&& ce : _storage.set) {
|
||||
auto& cdef = s.column_at(kind, ce.id());
|
||||
mem += sizeof(cell_entry) + ce.cell().external_memory_usage(*cdef.type);
|
||||
}
|
||||
}
|
||||
return mem;
|
||||
return _cells.memory_usage([&] (column_id id, const cell_and_hash& cah) noexcept {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
return cah.cell.external_memory_usage(*cdef.type);
|
||||
});
|
||||
}
|
||||
|
||||
size_t rows_entry::memory_usage(const schema& s) const {
|
||||
@@ -1533,62 +1452,17 @@ void rows_entry::replace_with(rows_entry&& o) noexcept {
|
||||
_row = std::move(o._row);
|
||||
}
|
||||
|
||||
row::row(const schema& s, column_kind kind, const row& o)
|
||||
: _type(o._type)
|
||||
, _size(o._size)
|
||||
row::row(const schema& s, column_kind kind, const row& o) : _size(o._size)
|
||||
{
|
||||
if (_type == storage_type::vector) {
|
||||
auto& other_vec = o._storage.vector;
|
||||
auto& vec = *new (&_storage.vector) vector_storage;
|
||||
try {
|
||||
vec.present = other_vec.present;
|
||||
vec.v.reserve(other_vec.v.size());
|
||||
column_id id = 0;
|
||||
for (auto& cell : other_vec.v) {
|
||||
auto& cdef = s.column_at(kind, id++);
|
||||
vec.v.emplace_back(cell_and_hash{cell.cell.copy(*cdef.type), cell.hash});
|
||||
}
|
||||
} catch (...) {
|
||||
_storage.vector.~vector_storage();
|
||||
throw;
|
||||
}
|
||||
} else {
|
||||
auto cloner = [&] (const auto& x) {
|
||||
auto& cdef = s.column_at(kind, x.id());
|
||||
return current_allocator().construct<cell_entry>(*cdef.type, x);
|
||||
};
|
||||
new (&_storage.set) map_type;
|
||||
try {
|
||||
_storage.set.clone_from(o._storage.set, cloner, current_deleter<cell_entry>());
|
||||
} catch (...) {
|
||||
_storage.set.~map_type();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
auto clone_cell_and_hash = [&s, &kind] (column_id id, const cell_and_hash& cah) {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
return cell_and_hash(cah.cell.copy(*cdef.type), cah.hash);
|
||||
};
|
||||
|
||||
_cells.clone_from(o._cells, clone_cell_and_hash);
|
||||
}
|
||||
|
||||
row::~row() {
|
||||
if (_type == storage_type::vector) {
|
||||
_storage.vector.~vector_storage();
|
||||
} else {
|
||||
_storage.set.clear_and_dispose(current_deleter<cell_entry>());
|
||||
_storage.set.~map_type();
|
||||
}
|
||||
}
|
||||
|
||||
row::cell_entry::cell_entry(const abstract_type& type, const cell_entry& o)
|
||||
: _id(o._id)
|
||||
, _cell_and_hash{ o._cell_and_hash.cell.copy(type), o._cell_and_hash.hash }
|
||||
{ }
|
||||
|
||||
row::cell_entry::cell_entry(cell_entry&& o) noexcept
|
||||
: _link()
|
||||
, _id(o._id)
|
||||
, _cell_and_hash(std::move(o._cell_and_hash))
|
||||
{
|
||||
using container_type = row::map_type;
|
||||
container_type::node_algorithms::replace_node(o._link.this_ptr(), _link.this_ptr());
|
||||
container_type::node_algorithms::init(o._link.this_ptr());
|
||||
}
|
||||
|
||||
const atomic_cell_or_collection& row::cell_at(column_id id) const {
|
||||
@@ -1599,86 +1473,48 @@ const atomic_cell_or_collection& row::cell_at(column_id id) const {
|
||||
return *cell;
|
||||
}
|
||||
|
||||
void row::vector_to_set()
|
||||
{
|
||||
assert(_type == storage_type::vector);
|
||||
map_type set;
|
||||
try {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
auto& c_a_h = _storage.vector.v[i];
|
||||
auto e = current_allocator().construct<cell_entry>(i, std::move(c_a_h));
|
||||
set.insert(set.end(), *e);
|
||||
}
|
||||
} catch (...) {
|
||||
set.clear_and_dispose([this, del = current_deleter<cell_entry>()] (cell_entry* ce) noexcept {
|
||||
_storage.vector.v[ce->id()] = std::move(ce->get_cell_and_hash());
|
||||
del(ce);
|
||||
});
|
||||
throw;
|
||||
}
|
||||
_storage.vector.~vector_storage();
|
||||
new (&_storage.set) map_type(std::move(set));
|
||||
_type = storage_type::set;
|
||||
}
|
||||
|
||||
void row::reserve(column_id last_column)
|
||||
{
|
||||
if (_type == storage_type::vector && last_column >= internal_count) {
|
||||
if (last_column >= max_vector_size) {
|
||||
vector_to_set();
|
||||
} else {
|
||||
_storage.vector.v.reserve(last_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
auto row::with_both_ranges(const row& other, Func&& func) const {
|
||||
if (_type == storage_type::vector) {
|
||||
if (other._type == storage_type::vector) {
|
||||
return func(get_range_vector(), other.get_range_vector());
|
||||
} else {
|
||||
return func(get_range_vector(), other.get_range_set());
|
||||
}
|
||||
} else {
|
||||
if (other._type == storage_type::vector) {
|
||||
return func(get_range_set(), other.get_range_vector());
|
||||
} else {
|
||||
return func(get_range_set(), other.get_range_set());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool row::equal(column_kind kind, const schema& this_schema, const row& other, const schema& other_schema) const {
|
||||
if (size() != other.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cells_equal = [&] (std::pair<column_id, const atomic_cell_or_collection&> c1,
|
||||
std::pair<column_id, const atomic_cell_or_collection&> c2) {
|
||||
auto cells_equal = [&] (column_id id1, const atomic_cell_or_collection& c1,
|
||||
column_id id2, const atomic_cell_or_collection& c2) {
|
||||
static_assert(schema::row_column_ids_are_ordered_by_name::value, "Relying on column ids being ordered by name");
|
||||
auto& at1 = *this_schema.column_at(kind, c1.first).type;
|
||||
auto& at2 = *other_schema.column_at(kind, c2.first).type;
|
||||
auto& at1 = *this_schema.column_at(kind, id1).type;
|
||||
auto& at2 = *other_schema.column_at(kind, id2).type;
|
||||
return at1 == at2
|
||||
&& this_schema.column_at(kind, c1.first).name() == other_schema.column_at(kind, c2.first).name()
|
||||
&& c1.second.equals(at1, c2.second);
|
||||
&& this_schema.column_at(kind, id1).name() == other_schema.column_at(kind, id2).name()
|
||||
&& c1.equals(at1, c2);
|
||||
};
|
||||
return with_both_ranges(other, [&] (auto r1, auto r2) {
|
||||
return boost::equal(r1, r2, cells_equal);
|
||||
});
|
||||
|
||||
auto i1 = _cells.begin();
|
||||
auto i1_end = _cells.end();
|
||||
auto i2 = other._cells.begin();
|
||||
auto i2_end = other._cells.end();
|
||||
|
||||
while (true) {
|
||||
if (i1 == i1_end) {
|
||||
return i2 == i2_end;
|
||||
}
|
||||
if (i2 == i2_end) {
|
||||
return i1 == i1_end;
|
||||
}
|
||||
|
||||
if (!cells_equal(i1.key(), i1->cell, i2.key(), i2->cell)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
i1++;
|
||||
i2++;
|
||||
}
|
||||
}
|
||||
|
||||
row::row() {
|
||||
new (&_storage.vector) vector_storage;
|
||||
}
|
||||
|
||||
row::row(row&& other) noexcept
|
||||
: _type(other._type), _size(other._size) {
|
||||
if (_type == storage_type::vector) {
|
||||
new (&_storage.vector) vector_storage(std::move(other._storage.vector));
|
||||
} else {
|
||||
new (&_storage.set) map_type(std::move(other._storage.set));
|
||||
}
|
||||
: _size(other._size), _cells(std::move(other._cells)) {
|
||||
other._size = 0;
|
||||
}
|
||||
|
||||
@@ -1694,11 +1530,6 @@ void row::apply(const schema& s, column_kind kind, const row& other) {
|
||||
if (other.empty()) {
|
||||
return;
|
||||
}
|
||||
if (other._type == storage_type::vector) {
|
||||
reserve(other._storage.vector.v.size() - 1);
|
||||
} else {
|
||||
reserve(other._storage.set.rbegin()->id());
|
||||
}
|
||||
other.for_each_cell([&] (column_id id, const cell_and_hash& c_a_h) {
|
||||
apply(s.column_at(kind, id), c_a_h.cell, c_a_h.hash);
|
||||
});
|
||||
@@ -1712,11 +1543,6 @@ void row::apply_monotonically(const schema& s, column_kind kind, row&& other) {
|
||||
if (other.empty()) {
|
||||
return;
|
||||
}
|
||||
if (other._type == storage_type::vector) {
|
||||
reserve(other._storage.vector.v.size() - 1);
|
||||
} else {
|
||||
reserve(other._storage.set.rbegin()->id());
|
||||
}
|
||||
other.consume_with([&] (column_id id, cell_and_hash& c_a_h) {
|
||||
apply_monotonically(s.column_at(kind, id), std::move(c_a_h.cell), std::move(c_a_h.hash));
|
||||
});
|
||||
@@ -1853,33 +1679,38 @@ deletable_row deletable_row::difference(const schema& s, column_kind kind, const
|
||||
row row::difference(const schema& s, column_kind kind, const row& other) const
|
||||
{
|
||||
row r;
|
||||
with_both_ranges(other, [&] (auto this_range, auto other_range) {
|
||||
auto it = other_range.begin();
|
||||
for (auto&& c : this_range) {
|
||||
while (it != other_range.end() && it->first < c.first) {
|
||||
++it;
|
||||
|
||||
auto c = _cells.begin();
|
||||
auto c_end = _cells.end();
|
||||
auto it = other._cells.begin();
|
||||
auto it_end = other._cells.end();
|
||||
|
||||
while (c != c_end) {
|
||||
while (it != it_end && it.key() < c.key()) {
|
||||
++it;
|
||||
}
|
||||
auto& cdef = s.column_at(kind, c.key());
|
||||
if (it == it_end || it.key() != c.key()) {
|
||||
r.append_cell(c.key(), c->cell.copy(*cdef.type));
|
||||
} else if (cdef.is_counter()) {
|
||||
auto cell = counter_cell_view::difference(c->cell.as_atomic_cell(cdef), it->cell.as_atomic_cell(cdef));
|
||||
if (cell) {
|
||||
r.append_cell(c.key(), std::move(*cell));
|
||||
}
|
||||
auto& cdef = s.column_at(kind, c.first);
|
||||
if (it == other_range.end() || it->first != c.first) {
|
||||
r.append_cell(c.first, c.second.copy(*cdef.type));
|
||||
} else if (cdef.is_counter()) {
|
||||
auto cell = counter_cell_view::difference(c.second.as_atomic_cell(cdef), it->second.as_atomic_cell(cdef));
|
||||
if (cell) {
|
||||
r.append_cell(c.first, std::move(*cell));
|
||||
}
|
||||
} else if (s.column_at(kind, c.first).is_atomic()) {
|
||||
if (compare_atomic_cell_for_merge(c.second.as_atomic_cell(cdef), it->second.as_atomic_cell(cdef)) > 0) {
|
||||
r.append_cell(c.first, c.second.copy(*cdef.type));
|
||||
}
|
||||
} else {
|
||||
auto diff = ::difference(*s.column_at(kind, c.first).type,
|
||||
c.second.as_collection_mutation(), it->second.as_collection_mutation());
|
||||
if (!static_cast<collection_mutation_view>(diff).is_empty()) {
|
||||
r.append_cell(c.first, std::move(diff));
|
||||
}
|
||||
} else if (s.column_at(kind, c.key()).is_atomic()) {
|
||||
if (compare_atomic_cell_for_merge(c->cell.as_atomic_cell(cdef), it->cell.as_atomic_cell(cdef)) > 0) {
|
||||
r.append_cell(c.key(), c->cell.copy(*cdef.type));
|
||||
}
|
||||
} else {
|
||||
auto diff = ::difference(*s.column_at(kind, c.key()).type,
|
||||
c->cell.as_collection_mutation(), it->cell.as_collection_mutation());
|
||||
if (!static_cast<collection_mutation_view>(diff).is_empty()) {
|
||||
r.append_cell(c.key(), std::move(diff));
|
||||
}
|
||||
}
|
||||
});
|
||||
c++;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "utils/intrusive_btree.hh"
|
||||
#include "utils/preempt.hh"
|
||||
#include "utils/managed_ref.hh"
|
||||
#include "utils/compact-radix-tree.hh"
|
||||
|
||||
class mutation_fragment;
|
||||
|
||||
@@ -94,86 +95,13 @@ class compaction_garbage_collector;
|
||||
// for space-efficiency reasons. Whenever a method accepts a column_kind,
|
||||
// the caller must always supply the same column_kind.
|
||||
//
|
||||
// Can be used as a range of row::cell_entry.
|
||||
//
|
||||
class row {
|
||||
|
||||
class cell_entry {
|
||||
boost::intrusive::set_member_hook<> _link;
|
||||
column_id _id;
|
||||
cell_and_hash _cell_and_hash;
|
||||
friend class row;
|
||||
public:
|
||||
cell_entry(column_id id, cell_and_hash c_a_h)
|
||||
: _id(id)
|
||||
, _cell_and_hash(std::move(c_a_h))
|
||||
{ }
|
||||
cell_entry(column_id id, atomic_cell_or_collection cell)
|
||||
: cell_entry(id, cell_and_hash{std::move(cell), cell_hash_opt()})
|
||||
{ }
|
||||
cell_entry(column_id id)
|
||||
: _id(id)
|
||||
{ }
|
||||
cell_entry(cell_entry&&) noexcept;
|
||||
cell_entry(const abstract_type&, const cell_entry&);
|
||||
|
||||
column_id id() const { return _id; }
|
||||
const atomic_cell_or_collection& cell() const { return _cell_and_hash.cell; }
|
||||
atomic_cell_or_collection& cell() { return _cell_and_hash.cell; }
|
||||
const cell_hash_opt& hash() const { return _cell_and_hash.hash; }
|
||||
const cell_and_hash& get_cell_and_hash() const { return _cell_and_hash; }
|
||||
cell_and_hash& get_cell_and_hash() { return _cell_and_hash; }
|
||||
|
||||
struct compare {
|
||||
bool operator()(const cell_entry& e1, const cell_entry& e2) const {
|
||||
return e1._id < e2._id;
|
||||
}
|
||||
bool operator()(column_id id1, const cell_entry& e2) const {
|
||||
return id1 < e2._id;
|
||||
}
|
||||
bool operator()(const cell_entry& e1, column_id id2) const {
|
||||
return e1._id < id2;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
friend class size_calculator;
|
||||
using size_type = std::make_unsigned_t<column_id>;
|
||||
|
||||
enum class storage_type {
|
||||
vector,
|
||||
set,
|
||||
};
|
||||
storage_type _type = storage_type::vector;
|
||||
size_type _size = 0;
|
||||
|
||||
using map_type = boost::intrusive::set<cell_entry,
|
||||
boost::intrusive::member_hook<cell_entry, boost::intrusive::set_member_hook<>, &cell_entry::_link>,
|
||||
boost::intrusive::compare<cell_entry::compare>, boost::intrusive::constant_time_size<false>>;
|
||||
public:
|
||||
static constexpr size_t max_vector_size = 32;
|
||||
static constexpr size_t internal_count = 5;
|
||||
private:
|
||||
using vector_type = managed_vector<cell_and_hash, internal_count, size_type>;
|
||||
|
||||
struct vector_storage {
|
||||
std::bitset<max_vector_size> present;
|
||||
vector_type v;
|
||||
|
||||
vector_storage() = default;
|
||||
vector_storage(const vector_storage&) = default;
|
||||
vector_storage(vector_storage&& other) noexcept
|
||||
: present(other.present)
|
||||
, v(std::move(other.v)) {
|
||||
other.present = {};
|
||||
}
|
||||
};
|
||||
|
||||
union storage {
|
||||
storage() { }
|
||||
~storage() { }
|
||||
map_type set;
|
||||
vector_storage vector;
|
||||
} _storage;
|
||||
using sparse_array_type = compact_radix_tree::tree<cell_and_hash, column_id>;
|
||||
sparse_array_type _cells;
|
||||
public:
|
||||
row();
|
||||
~row();
|
||||
@@ -183,8 +111,6 @@ public:
|
||||
size_t size() const { return _size; }
|
||||
bool empty() const { return _size == 0; }
|
||||
|
||||
void reserve(column_id);
|
||||
|
||||
const atomic_cell_or_collection& cell_at(column_id id) const;
|
||||
|
||||
// Returns a pointer to cell's value or nullptr if column is not set.
|
||||
@@ -194,53 +120,17 @@ public:
|
||||
|
||||
template<typename Func>
|
||||
void remove_if(Func&& func) {
|
||||
if (_type == storage_type::vector) {
|
||||
for (unsigned i = 0; i < _storage.vector.v.size(); i++) {
|
||||
if (!_storage.vector.present.test(i)) {
|
||||
continue;
|
||||
}
|
||||
auto& c = _storage.vector.v[i].cell;
|
||||
if (func(i, c)) {
|
||||
c = atomic_cell_or_collection();
|
||||
_storage.vector.present.reset(i);
|
||||
_size--;
|
||||
}
|
||||
_cells.weed([func, this] (column_id id, cell_and_hash& cah) {
|
||||
if (!func(id, cah.cell)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for (auto it = _storage.set.begin(); it != _storage.set.end();) {
|
||||
if (func(it->id(), it->cell())) {
|
||||
auto& entry = *it;
|
||||
it = _storage.set.erase(it);
|
||||
current_allocator().destroy(&entry);
|
||||
_size--;
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_size--;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
auto get_range_vector() const {
|
||||
auto id_range = boost::irange<column_id>(0, _storage.vector.v.size());
|
||||
return boost::combine(id_range, _storage.vector.v)
|
||||
| boost::adaptors::filtered([this] (const boost::tuple<const column_id&, const cell_and_hash&>& t) {
|
||||
return _storage.vector.present.test(t.get<0>());
|
||||
}) | boost::adaptors::transformed([] (const boost::tuple<const column_id&, const cell_and_hash&>& t) {
|
||||
return std::pair<column_id, const atomic_cell_or_collection&>(t.get<0>(), t.get<1>().cell);
|
||||
});
|
||||
}
|
||||
auto get_range_set() const {
|
||||
auto range = boost::make_iterator_range(_storage.set.begin(), _storage.set.end());
|
||||
return range | boost::adaptors::transformed([] (const cell_entry& c) {
|
||||
return std::pair<column_id, const atomic_cell_or_collection&>(c.id(), c.cell());
|
||||
});
|
||||
}
|
||||
template<typename Func>
|
||||
auto with_both_ranges(const row& other, Func&& func) const;
|
||||
|
||||
void vector_to_set();
|
||||
|
||||
template<typename Func>
|
||||
void consume_with(Func&&);
|
||||
|
||||
@@ -260,45 +150,25 @@ public:
|
||||
// noexcept if Func doesn't throw.
|
||||
template<typename Func>
|
||||
void for_each_cell(Func&& func) {
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
maybe_invoke_with_hash(func, i, _storage.vector.v[i]);
|
||||
}
|
||||
} else {
|
||||
for (auto& cell : _storage.set) {
|
||||
maybe_invoke_with_hash(func, cell.id(), cell.get_cell_and_hash());
|
||||
}
|
||||
}
|
||||
_cells.walk([func] (column_id id, cell_and_hash& cah) {
|
||||
maybe_invoke_with_hash(func, id, cah);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
void for_each_cell(Func&& func) const {
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
maybe_invoke_with_hash(func, i, _storage.vector.v[i]);
|
||||
}
|
||||
} else {
|
||||
for (auto& cell : _storage.set) {
|
||||
maybe_invoke_with_hash(func, cell.id(), cell.get_cell_and_hash());
|
||||
}
|
||||
}
|
||||
_cells.walk([func] (column_id id, const cell_and_hash& cah) {
|
||||
maybe_invoke_with_hash(func, id, cah);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
void for_each_cell_until(Func&& func) const {
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
if (maybe_invoke_with_hash(func, i, _storage.vector.v[i]) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto& cell : _storage.set) {
|
||||
if (maybe_invoke_with_hash(func, cell.id(), cell.get_cell_and_hash()) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_cells.walk([func] (column_id id, const cell_and_hash& cah) {
|
||||
return maybe_invoke_with_hash(func, id, cah) != stop_iteration::yes;
|
||||
});
|
||||
}
|
||||
|
||||
// Merges cell's value into the row.
|
||||
@@ -434,7 +304,7 @@ public:
|
||||
|
||||
void reserve(column_id nr) {
|
||||
if (nr) {
|
||||
maybe_create().reserve(nr);
|
||||
maybe_create();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1368,10 +1368,6 @@ public:
|
||||
|
||||
virtual proceed consume_row_end() override {
|
||||
auto fill_cells = [this] (column_kind kind, row& cells) {
|
||||
auto max_id = boost::max_element(_cells, [](auto &&a, auto &&b) {
|
||||
return a.id < b.id;
|
||||
});
|
||||
cells.reserve(max_id->id);
|
||||
for (auto &&c : _cells) {
|
||||
cells.apply(_schema->column_at(kind, c.id), std::move(c.val));
|
||||
}
|
||||
|
||||
192
test/boost/radix_tree_test.cc
Normal file
192
test/boost/radix_tree_test.cc
Normal file
@@ -0,0 +1,192 @@
|
||||
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <seastar/testing/test_case.hh>
|
||||
#include <seastar/testing/thread_test_case.hh>
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "utils/compact-radix-tree.hh"
|
||||
|
||||
using namespace compact_radix_tree;
|
||||
using namespace seastar;
|
||||
|
||||
class test_data {
|
||||
unsigned long _val;
|
||||
unsigned long *_pval;
|
||||
public:
|
||||
test_data(unsigned long val) : _val(val), _pval(new unsigned long(val)) {}
|
||||
test_data(const test_data&) = delete;
|
||||
test_data(test_data&& o) noexcept : _val(o._val), _pval(std::exchange(o._pval, nullptr)) {}
|
||||
~test_data() {
|
||||
if (_pval != nullptr) {
|
||||
delete _pval;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long value() const {
|
||||
return _pval != nullptr ? *_pval : _val + 1000000;
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const test_data& d) {
|
||||
out << d.value();
|
||||
return out;
|
||||
}
|
||||
|
||||
using test_tree = tree<test_data>;
|
||||
|
||||
SEASTAR_TEST_CASE(test_exception_safety_of_emplace) {
|
||||
return seastar::async([] {
|
||||
test_tree tree;
|
||||
|
||||
int next = 0;
|
||||
memory::with_allocation_failures([&] {
|
||||
while (next < 1024) {
|
||||
BOOST_REQUIRE(tree.get(next) == nullptr);
|
||||
tree.emplace(next, next);
|
||||
next++;
|
||||
}
|
||||
});
|
||||
|
||||
int count = 0;
|
||||
auto it = tree.begin();
|
||||
while (it != tree.end()) {
|
||||
BOOST_REQUIRE(it.key() == it->value());
|
||||
it++;
|
||||
count++;
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(count == 1024);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_weed_from_tree) {
|
||||
test_tree tree;
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
tree.emplace(i, i);
|
||||
}
|
||||
|
||||
auto filter = [] (unsigned idx) noexcept {
|
||||
return idx % 2 == 0 || idx % 3 == 0;
|
||||
};
|
||||
|
||||
tree.weed([&filter] (unsigned idx, test_data& d) noexcept {
|
||||
BOOST_REQUIRE(idx == d.value());
|
||||
return filter(idx);
|
||||
});
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
test_data* d = tree.get(i);
|
||||
if (filter(i)) {
|
||||
BOOST_REQUIRE(d == nullptr);
|
||||
} else {
|
||||
BOOST_REQUIRE(d != nullptr);
|
||||
BOOST_REQUIRE(d->value() == (unsigned long)i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lower_bound) {
|
||||
test_tree tree;
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
tree.emplace(i * 2, i * 2 + 1);
|
||||
}
|
||||
|
||||
for (int i = 0; ; i++) {
|
||||
test_data* d = tree.lower_bound(i);
|
||||
if (d == nullptr) {
|
||||
BOOST_REQUIRE(i == 1999);
|
||||
break;
|
||||
}
|
||||
|
||||
if (i % 2 == 0) {
|
||||
BOOST_REQUIRE(d->value() == (unsigned long)(i + 1));
|
||||
} else {
|
||||
BOOST_REQUIRE(d->value() == (unsigned long)(i + 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_clear) {
|
||||
test_tree tree;
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
tree.emplace(i * 3, i * 3);
|
||||
}
|
||||
|
||||
tree.clear();
|
||||
BOOST_REQUIRE(tree.lower_bound(0) == nullptr);
|
||||
}
|
||||
|
||||
static void do_test_clone(size_t sz) {
|
||||
test_tree t;
|
||||
|
||||
for (unsigned i = 0; i < sz; i++) {
|
||||
t.emplace(i, i);
|
||||
}
|
||||
|
||||
test_tree ct;
|
||||
|
||||
ct.clone_from(t, [] (unsigned idx, const test_data& td) {
|
||||
BOOST_REQUIRE(idx == td.value());
|
||||
return test_data(td.value());
|
||||
});
|
||||
|
||||
BOOST_REQUIRE(std::equal(t.begin(), t.end(), ct.begin(), ct.end(),
|
||||
[] (const test_data& a, const test_data& b) {
|
||||
return a.value() == b.value();
|
||||
}));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_clone) {
|
||||
do_test_clone(0);
|
||||
do_test_clone(2);
|
||||
do_test_clone(99);
|
||||
do_test_clone(1111);
|
||||
do_test_clone(333333);
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_exception_safety_of_clone) {
|
||||
return seastar::async([] {
|
||||
test_tree t;
|
||||
|
||||
for (unsigned i = 0; i < 2345; i++) {
|
||||
t.emplace(i, i);
|
||||
}
|
||||
|
||||
test_tree ct;
|
||||
|
||||
memory::with_allocation_failures([&] {
|
||||
ct.clone_from(t, [] (unsigned idx, const test_data& td) {
|
||||
return test_data(td.value());
|
||||
});
|
||||
});
|
||||
|
||||
BOOST_REQUIRE(std::equal(t.begin(), t.end(), ct.begin(), ct.end(),
|
||||
[] (const test_data& a, const test_data& b) {
|
||||
return a.value() == b.value();
|
||||
}));
|
||||
});
|
||||
}
|
||||
@@ -1692,7 +1692,7 @@ private:
|
||||
generate_uncompactable _uncompactable;
|
||||
const size_t _external_blob_size = 128; // Should be enough to force use of external bytes storage
|
||||
const size_t n_blobs = 1024;
|
||||
const column_id column_count = row::max_vector_size * 2;
|
||||
const column_id column_count = 64;
|
||||
std::mt19937 _gen;
|
||||
schema_ptr _schema;
|
||||
std::vector<bytes> _blobs;
|
||||
@@ -1716,7 +1716,6 @@ private:
|
||||
auto col_type = type == counter_type || _bool_dist(_gen) ? type : list_type_impl::get_instance(type, true);
|
||||
builder.with_column(to_bytes(column_name), col_type, kind);
|
||||
};
|
||||
// Create enough columns so that row can overflow its vector storage
|
||||
for (column_id i = 0; i < column_count; ++i) {
|
||||
add_column(format("v{:d}", i), column_kind::regular_column);
|
||||
add_column(format("s{:d}", i), column_kind::static_column);
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
#include "test/lib/reader_permit.hh"
|
||||
|
||||
class size_calculator {
|
||||
using cells_type = row::sparse_array_type;
|
||||
|
||||
class nest {
|
||||
public:
|
||||
static thread_local int level;
|
||||
@@ -73,6 +75,19 @@ public:
|
||||
std::cout << prefix() << "sizeof(lru_link_type) = " << sizeof(rows_entry::lru_link_type) << "\n";
|
||||
std::cout << prefix() << "sizeof(deletable_row) = " << sizeof(deletable_row) << "\n";
|
||||
std::cout << prefix() << "sizeof(row) = " << sizeof(row) << "\n";
|
||||
std::cout << prefix() << "radix_tree::inner_node::node_sizes = ";
|
||||
for (int i = 4; i <= 128; i *= 2) {
|
||||
std::cout << " " << cells_type::inner_node::node_type::node_size(cells_type::layout::direct_dynamic, i);
|
||||
}
|
||||
std::cout << "\n";
|
||||
std::cout << prefix() << "radix_tree::leaf_node::node_sizes = ";
|
||||
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_tiny, 0);
|
||||
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_small, 0);
|
||||
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_medium, 0);
|
||||
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_large, 0);
|
||||
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::direct_static, 0);
|
||||
std::cout << "\n";
|
||||
|
||||
std::cout << prefix() << "sizeof(atomic_cell_or_collection) = " << sizeof(atomic_cell_or_collection) << "\n";
|
||||
std::cout << prefix() << "btree::linear_node_size(1) = " << mutation_partition::rows_type::node::linear_node_size(1) << "\n";
|
||||
std::cout << prefix() << "btree::inner_node_size = " << mutation_partition::rows_type::node::inner_node_size << "\n";
|
||||
|
||||
@@ -82,6 +82,7 @@ public:
|
||||
virtual void erase(per_key_t k) = 0;
|
||||
virtual void drain(int batch) = 0;
|
||||
virtual void clear() = 0;
|
||||
virtual void clone() = 0;
|
||||
virtual void show_stats() = 0;
|
||||
virtual void insert_and_erase(per_key_t k) = 0;
|
||||
virtual ~collection_tester() {};
|
||||
@@ -128,6 +129,7 @@ public:
|
||||
}
|
||||
}
|
||||
virtual void clear() override { _t.clear(); }
|
||||
virtual void clone() override { }
|
||||
virtual void insert_and_erase(per_key_t k) override {
|
||||
auto i = _t.emplace(k, 0);
|
||||
assert(i.second);
|
||||
@@ -148,6 +150,56 @@ public:
|
||||
virtual ~bptree_tester() { clear(); }
|
||||
};
|
||||
|
||||
#include "utils/compact-radix-tree.hh"
|
||||
|
||||
class radix_tester : public collection_tester {
|
||||
public:
|
||||
using test_tree = compact_radix_tree::tree<unsigned long>;
|
||||
|
||||
private:
|
||||
test_tree _t;
|
||||
public:
|
||||
radix_tester() : _t() {}
|
||||
virtual void insert(per_key_t k) override { _t.emplace(k, 0); }
|
||||
virtual void lower_bound(per_key_t k) override {
|
||||
auto i = _t.get(k);
|
||||
assert(i != nullptr);
|
||||
}
|
||||
virtual void scan(int batch) override {
|
||||
scan_collection(_t, batch);
|
||||
}
|
||||
virtual void erase(per_key_t k) override { _t.erase(k); }
|
||||
virtual void drain(int batch) override {
|
||||
int x = 0;
|
||||
while (!_t.empty()) {
|
||||
_t.erase(_t.begin().key());
|
||||
if (++x % batch == 0) {
|
||||
seastar::thread::yield();
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual void clear() override { _t.clear(); }
|
||||
virtual void clone() override {
|
||||
test_tree ct;
|
||||
ct.clone_from(_t, [] (unsigned, const unsigned long& data) { return data; });
|
||||
}
|
||||
virtual void insert_and_erase(per_key_t k) override {
|
||||
_t.emplace(k, 0);
|
||||
_t.erase(k);
|
||||
}
|
||||
void show_node_stats(std::string typ, typename test_tree::stats::node_stats& st) {
|
||||
fmt::print("{}: indirect: {}/{}/{}/{} direct: static {} dynamic {}\n", typ,
|
||||
st.indirect_tiny, st.indirect_small, st.indirect_medium, st.indirect_large,
|
||||
st.direct_static, st.direct_dynamic);
|
||||
}
|
||||
virtual void show_stats() {
|
||||
test_tree::stats st = _t.get_stats();
|
||||
show_node_stats("inner", st.inners);
|
||||
show_node_stats(" leaf", st.leaves);
|
||||
}
|
||||
virtual ~radix_tester() { clear(); }
|
||||
};
|
||||
|
||||
#include "intrusive_set_external_comparator.hh"
|
||||
|
||||
class isec_tester : public collection_tester {
|
||||
@@ -208,6 +260,7 @@ public:
|
||||
virtual void clear() override {
|
||||
_t.clear_and_dispose([] (isec_node* n) { delete n; });
|
||||
}
|
||||
virtual void clone() override { }
|
||||
virtual void insert_and_erase(per_key_t k) override {
|
||||
isec_node n(k);
|
||||
auto i = _t.insert_before(_t.end(), n);
|
||||
@@ -244,6 +297,7 @@ public:
|
||||
virtual void clear() override {
|
||||
_t.clear_and_dispose([] (perf_intrusive_key* k) noexcept { delete k; });
|
||||
}
|
||||
virtual void clone() override { }
|
||||
virtual void insert_and_erase(per_key_t k) override {
|
||||
perf_intrusive_key key(k);
|
||||
auto i = _t.insert_before(_t.end(), key);
|
||||
@@ -288,6 +342,7 @@ public:
|
||||
}
|
||||
}
|
||||
virtual void clear() override { _s.clear(); }
|
||||
virtual void clone() override { }
|
||||
virtual void insert_and_erase(per_key_t k) override {
|
||||
auto i = _s.insert(k);
|
||||
assert(i.second);
|
||||
@@ -320,6 +375,7 @@ public:
|
||||
}
|
||||
}
|
||||
virtual void clear() override { _m.clear(); }
|
||||
virtual void clone() override { }
|
||||
virtual void insert_and_erase(per_key_t k) override {
|
||||
auto i = _m.insert({k, 0});
|
||||
assert(i.second);
|
||||
@@ -361,6 +417,8 @@ int main(int argc, char **argv) {
|
||||
c = std::make_unique<map_tester>();
|
||||
} else if (col == "isec") {
|
||||
c = std::make_unique<isec_tester>();
|
||||
} else if (col == "radix") {
|
||||
c = std::make_unique<radix_tester>();
|
||||
} else {
|
||||
fmt::print("Unknown collection\n");
|
||||
return;
|
||||
@@ -456,6 +514,11 @@ int main(int argc, char **argv) {
|
||||
});
|
||||
|
||||
fmt::print("scan: {:.6f} ms\n", d.count() * 1000);
|
||||
} else if (tst == "clone") {
|
||||
d = duration_in_seconds([&] {
|
||||
c->clone();
|
||||
});
|
||||
fmt::print("clone: {:.6f} ms\n", d.count() * 1000);
|
||||
}
|
||||
|
||||
c->clear();
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include "database.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "test/perf/perf.hh"
|
||||
#include <seastar/core/app-template.hh>
|
||||
#include <seastar/core/reactor.hh>
|
||||
@@ -30,10 +31,23 @@ static atomic_cell make_atomic_cell(data_type dt, bytes value) {
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
return app_template().run_deprecated(argc, argv, [] {
|
||||
auto s = make_shared_schema({}, "ks", "cf",
|
||||
{{"p1", utf8_type}}, {{"c1", int32_type}}, {{"r1", int32_type}}, {}, utf8_type);
|
||||
namespace bpo = boost::program_options;
|
||||
app_template app;
|
||||
app.add_options()
|
||||
("column-count", bpo::value<size_t>()->default_value(1), "column count");
|
||||
return app.run_deprecated(argc, argv, [&] {
|
||||
size_t column_count = app.configuration()["column-count"].as<size_t>();
|
||||
auto builder = schema_builder("ks", "cf")
|
||||
.with_column("p1", utf8_type, column_kind::partition_key)
|
||||
.with_column("c1", int32_type, column_kind::clustering_key);
|
||||
|
||||
std::vector<sstring> cnames;
|
||||
for (int i = 0; i < column_count; i++) {
|
||||
cnames.push_back(fmt::format("r{}", i + 1));
|
||||
builder.with_column(to_bytes(cnames.back()), int32_type);
|
||||
}
|
||||
|
||||
auto s = builder.build();
|
||||
memtable mt(s);
|
||||
|
||||
std::cout << "Timing mutation of single column within one row...\n";
|
||||
@@ -44,7 +58,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
time_it([&] {
|
||||
mutation m(s, key);
|
||||
const column_definition& col = *s->get_column_definition("r1");
|
||||
const column_definition& col = *s->get_column_definition(to_bytes(cnames[std::rand() % column_count]));
|
||||
m.set_clustered_cell(c_key, col, make_atomic_cell(col.type, value));
|
||||
mt.apply(std::move(m));
|
||||
});
|
||||
|
||||
126
test/unit/radix_tree_compaction_test.cc
Normal file
126
test/unit/radix_tree_compaction_test.cc
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <seastar/core/app-template.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fmt/core.h>
|
||||
#include "utils/logalloc.hh"
|
||||
|
||||
#include "utils/compact-radix-tree.hh"
|
||||
#include "radix_tree_printer.hh"
|
||||
#include "collection_stress.hh"
|
||||
|
||||
using namespace compact_radix_tree;
|
||||
using namespace seastar;
|
||||
|
||||
class test_data {
|
||||
unsigned long *_data;
|
||||
unsigned long _val;
|
||||
public:
|
||||
test_data(unsigned long val) : _data(new unsigned long(val)), _val(val) {}
|
||||
test_data(const test_data&) = delete;
|
||||
test_data(test_data&& o) noexcept : _data(std::exchange(o._data, nullptr)), _val(o._val) {}
|
||||
~test_data() {
|
||||
if (_data != nullptr) {
|
||||
delete _data;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long value() const {
|
||||
return _data == nullptr ? _val + 0x80000000 : *_data;
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const test_data& d) {
|
||||
out << d.value();
|
||||
return out;
|
||||
}
|
||||
|
||||
using test_tree = tree<test_data>;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
namespace bpo = boost::program_options;
|
||||
app_template app;
|
||||
app.add_options()
|
||||
("count", bpo::value<int>()->default_value(132564), "number of indices to fill the tree with")
|
||||
("iters", bpo::value<int>()->default_value(32), "number of iterations")
|
||||
("verb", bpo::value<bool>()->default_value(false), "be verbose");
|
||||
|
||||
return app.run(argc, argv, [&app] {
|
||||
auto count = app.configuration()["count"].as<int>();
|
||||
auto iter = app.configuration()["iters"].as<int>();
|
||||
auto verb = app.configuration()["verb"].as<bool>();
|
||||
|
||||
return seastar::async([count, iter, verb] {
|
||||
tree_pointer<test_tree> t;
|
||||
|
||||
stress_config cfg;
|
||||
cfg.count = count;
|
||||
cfg.iters = 1;
|
||||
cfg.keys = "rand";
|
||||
cfg.verb = verb;
|
||||
|
||||
unsigned col_size = 0;
|
||||
|
||||
for (int i = 0; i < iter; i++) {
|
||||
stress_compact_collection(cfg,
|
||||
/* insert */ [&] (int key) {
|
||||
t->emplace(key, key);
|
||||
col_size++;
|
||||
},
|
||||
/* erase */ [&] (int key) {
|
||||
t->erase(key);
|
||||
col_size--;
|
||||
},
|
||||
/* validate */ [&] {
|
||||
if (verb) {
|
||||
compact_radix_tree::printer<test_data, unsigned>::show(*t);
|
||||
}
|
||||
|
||||
int nr = 0;
|
||||
auto ti = t->begin();
|
||||
while (ti != t->end()) {
|
||||
assert(ti->value() == ti.key());
|
||||
nr++;
|
||||
ti++;
|
||||
}
|
||||
assert(nr == col_size);
|
||||
},
|
||||
/* clear */ [&] {
|
||||
t->clear();
|
||||
col_size = 0;
|
||||
}
|
||||
);
|
||||
|
||||
if (cfg.count < 4) {
|
||||
cfg.count = count / 3;
|
||||
} else {
|
||||
cfg.count /= 2;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
121
test/unit/radix_tree_printer.hh
Normal file
121
test/unit/radix_tree_printer.hh
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
namespace compact_radix_tree {
|
||||
|
||||
template <typename T, typename Idx>
|
||||
class printer {
|
||||
using tree_t = tree<T, Idx>;
|
||||
using node_head_t = typename tree_t::node_head;
|
||||
using leaf_node_t = typename tree_t::leaf_node;
|
||||
using inner_node_t = typename tree_t::inner_node;
|
||||
using layout = typename tree_t::layout;
|
||||
|
||||
static std::string node_id(const node_head_t& n) {
|
||||
return fmt::format("{:03x}", (reinterpret_cast<uintptr_t>(&n)>>3) & 0xfff);
|
||||
}
|
||||
|
||||
static std::string format(const T& val) noexcept { return fmt::format("{}", val); }
|
||||
static std::string format(const typename tree_t::node_head_ptr& p) noexcept { return node_id(*(p.raw())); }
|
||||
|
||||
template <typename Tbl>
|
||||
static void print_indirect(const node_head_t& head, const Tbl& table, unsigned depth, std::string id) {
|
||||
fmt::print("{:<{}}{}.ind{} nr={} depth={} prefix={}.{}:", " ", int(depth * 2), id, Tbl::size, head._size, depth,
|
||||
head._prefix & tree_t::prefix_mask, head.prefix_len());
|
||||
for (unsigned i = 0; i < Tbl::size; i++) {
|
||||
if (table.has(i)) {
|
||||
fmt::print(" [{}] {}:{}", i, table._idx[i], format(table._slots[i]));
|
||||
}
|
||||
}
|
||||
fmt::print("\n");
|
||||
}
|
||||
|
||||
template <typename Arr>
|
||||
static void print_direct(const node_head_t& head, const Arr& array, unsigned depth, std::string id) {
|
||||
unsigned cap = head._base_layout == layout::direct_static ? tree_t::node_index_limit : head._capacity;
|
||||
fmt::print("{:<{}}{}.dir{} nr={} depth={} prefix={}.{}:", " ", int(depth * 2), id, cap, array._data.count(head), depth,
|
||||
head._prefix & tree_t::prefix_mask, head.prefix_len());
|
||||
|
||||
for (unsigned i = 0; i < cap; i++) {
|
||||
if (array._data.has(i)) {
|
||||
fmt::print(" [{}] {}", i, format(array._data._slots[i]));
|
||||
}
|
||||
}
|
||||
fmt::print("\n");
|
||||
}
|
||||
|
||||
template <typename NT>
|
||||
static void print(const NT& n, unsigned depth) {
|
||||
switch (n._base._head._base_layout) {
|
||||
case layout::indirect_tiny: return print_indirect(n._base._head, n._base._layouts._this, depth, node_id(n._base._head));
|
||||
case layout::indirect_small: return print_indirect(n._base._head, n._base._layouts._other._this, depth, node_id(n._base._head));
|
||||
case layout::indirect_medium: return print_indirect(n._base._head, n._base._layouts._other._other._this, depth, node_id(n._base._head));
|
||||
case layout::indirect_large: return print_indirect(n._base._head, n._base._layouts._other._other._other._this, depth, node_id(n._base._head));
|
||||
case layout::direct_static: return print_direct(n._base._head, n._base._layouts._other._other._other._other._this, depth, node_id(n._base._head));
|
||||
default: break;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
template <>
|
||||
static void print<inner_node_t>(const inner_node_t& n, unsigned depth) {
|
||||
switch (n._base._head._base_layout) {
|
||||
case layout::direct_dynamic: return print_direct(n._base._head, n._base._layouts._this, depth, node_id(n._base._head));
|
||||
default: break;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
static void print(const node_head_t& n, unsigned depth) {
|
||||
if (depth == tree_t::leaf_depth) {
|
||||
print(n.template as_node<leaf_node_t>(), depth);
|
||||
} else {
|
||||
print(n.template as_node<inner_node_t>(), depth);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static void show(const tree_t& t) {
|
||||
struct printing_visitor {
|
||||
bool sorted = false;
|
||||
|
||||
bool operator()(Idx idx, const T& val) {
|
||||
std::abort();
|
||||
}
|
||||
bool operator()(const node_head_t& n, unsigned depth, bool enter) {
|
||||
if (enter) {
|
||||
print(n, depth);
|
||||
}
|
||||
return depth != tree_t::leaf_depth;
|
||||
}
|
||||
};
|
||||
|
||||
fmt::print("tree:\n");
|
||||
t.visit(printing_visitor{});
|
||||
fmt::print("---\n");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
170
test/unit/radix_tree_stress_test.cc
Normal file
170
test/unit/radix_tree_stress_test.cc
Normal file
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <seastar/core/app-template.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fmt/core.h>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "utils/compact-radix-tree.hh"
|
||||
#include "radix_tree_printer.hh"
|
||||
#include "collection_stress.hh"
|
||||
|
||||
using namespace compact_radix_tree;
|
||||
using namespace seastar;
|
||||
|
||||
class test_data {
|
||||
unsigned long *_data;
|
||||
unsigned long _val;
|
||||
public:
|
||||
test_data(unsigned long val) : _data(new unsigned long(val)), _val(val) {}
|
||||
test_data(const test_data&) = delete;
|
||||
test_data(test_data&& o) noexcept : _data(std::exchange(o._data, nullptr)), _val(o._val) {}
|
||||
~test_data() {
|
||||
if (_data != nullptr) {
|
||||
delete _data;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long value() const {
|
||||
return _data == nullptr ? _val + 0x80000000 : *_data;
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const test_data& d) {
|
||||
out << d.value();
|
||||
return out;
|
||||
}
|
||||
|
||||
using test_tree = tree<test_data>;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
namespace bpo = boost::program_options;
|
||||
app_template app;
|
||||
app.add_options()
|
||||
("count", bpo::value<int>()->default_value(35642), "number of indices to fill the tree with")
|
||||
("iters", bpo::value<int>()->default_value(5), "number of iterations")
|
||||
("keys", bpo::value<std::string>()->default_value("rand"), "how to generate keys (rand, asc, desc)")
|
||||
("verb", bpo::value<bool>()->default_value(false), "be verbose");
|
||||
|
||||
return app.run(argc, argv, [&app] {
|
||||
auto count = app.configuration()["count"].as<int>();
|
||||
auto iters = app.configuration()["iters"].as<int>();
|
||||
auto ks = app.configuration()["keys"].as<std::string>();
|
||||
auto verb = app.configuration()["verb"].as<bool>();
|
||||
|
||||
return seastar::async([count, iters, ks, verb] {
|
||||
auto t = std::make_unique<test_tree>();
|
||||
std::map<unsigned, test_data> oracle;
|
||||
|
||||
unsigned col_size = 0;
|
||||
enum class validate {
|
||||
oracle, iterator, walk, lower_bound,
|
||||
};
|
||||
validate vld = validate::oracle;
|
||||
|
||||
stress_config cfg;
|
||||
cfg.count = count;
|
||||
cfg.iters = 1;
|
||||
cfg.keys = ks;
|
||||
cfg.verb = verb;
|
||||
|
||||
for (int i = 0; i < iters; i++) {
|
||||
stress_collection(cfg,
|
||||
/* insert */ [&] (int key) {
|
||||
t->emplace(key, key);
|
||||
oracle.emplace(std::make_pair(key, key));
|
||||
col_size++;
|
||||
},
|
||||
/* erase */ [&] (int key) {
|
||||
t->erase(key);
|
||||
oracle.erase(key);
|
||||
col_size--;
|
||||
},
|
||||
/* validate */ [&] {
|
||||
if (verb) {
|
||||
compact_radix_tree::printer<test_data, unsigned>::show(*t);
|
||||
}
|
||||
if (vld == validate::oracle) {
|
||||
for (auto&& d : oracle) {
|
||||
test_data* td = t->get(d.first);
|
||||
assert(td != nullptr);
|
||||
assert(td->value() == d.second.value());
|
||||
}
|
||||
vld = validate::iterator;
|
||||
} else if (vld == validate::iterator) {
|
||||
int nr = 0;
|
||||
auto ti = t->begin();
|
||||
while (ti != t->end()) {
|
||||
assert(ti->value() == ti.key());
|
||||
nr++;
|
||||
ti++;
|
||||
assert(nr <= col_size);
|
||||
}
|
||||
assert(nr == col_size);
|
||||
vld = validate::walk;
|
||||
} else if (vld == validate::walk) {
|
||||
int nr = 0;
|
||||
t->walk([&nr, col_size] (unsigned idx, test_data& td) {
|
||||
assert(idx == td.value());
|
||||
nr++;
|
||||
assert(nr <= col_size);
|
||||
return true;
|
||||
});
|
||||
assert(nr == col_size);
|
||||
vld = validate::lower_bound;
|
||||
} else if (vld == validate::lower_bound) {
|
||||
int nr = 0;
|
||||
unsigned idx = 0;
|
||||
while (true) {
|
||||
test_data* td = t->lower_bound(idx);
|
||||
if (td == nullptr) {
|
||||
break;
|
||||
}
|
||||
assert(td->value() >= idx);
|
||||
nr++;
|
||||
idx = td->value() + 1;
|
||||
assert(nr <= col_size);
|
||||
}
|
||||
assert(nr == col_size);
|
||||
vld = validate::oracle;
|
||||
}
|
||||
},
|
||||
/* step */ [] (stress_step step) { }
|
||||
);
|
||||
|
||||
if (cfg.count < 4) {
|
||||
cfg.count = count / 2;
|
||||
} else {
|
||||
cfg.count /= 3;
|
||||
}
|
||||
}
|
||||
|
||||
t->clear();
|
||||
oracle.clear();
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -41,6 +41,30 @@ arch_target("default") int array_search_gt_impl(int64_t val, const int64_t* arra
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline unsigned array_search_eq_impl(uint8_t val, const uint8_t* arr, unsigned len) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (arr[i] == val) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
arch_target("default") unsigned array_search_16_eq_impl(uint8_t val, const uint8_t* arr) {
|
||||
return array_search_eq_impl(val, arr, 16);
|
||||
}
|
||||
|
||||
arch_target("default") unsigned array_search_32_eq_impl(uint8_t val, const uint8_t* arr) {
|
||||
return array_search_eq_impl(val, arr, 32);
|
||||
}
|
||||
|
||||
arch_target("default") unsigned array_search_x32_eq_impl(uint8_t val, const uint8_t* arr, int nr) {
|
||||
return array_search_eq_impl(val, arr, 32 * nr);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
/*
|
||||
@@ -89,10 +113,58 @@ arch_target("avx2") int array_search_gt_impl(int64_t val, const int64_t* array,
|
||||
return size - cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* SSE4 version of searching in array for an exact match.
|
||||
*/
|
||||
arch_target("sse") unsigned array_search_16_eq_impl(uint8_t val, const uint8_t* arr) {
|
||||
auto a = _mm_set1_epi8(val);
|
||||
auto b = _mm_lddqu_si128((__m128i*)arr);
|
||||
auto c = _mm_cmpeq_epi8(a, b);
|
||||
unsigned int m = _mm_movemask_epi8(c);
|
||||
return __builtin_ctz(m | 0x10000);
|
||||
}
|
||||
|
||||
/*
|
||||
* AVX2 version of searching in array for an exact match.
|
||||
*/
|
||||
arch_target("avx2") unsigned array_search_32_eq_impl(uint8_t val, const uint8_t* arr) {
|
||||
auto a = _mm256_set1_epi8(val);
|
||||
auto b = _mm256_lddqu_si256((__m256i*)arr);
|
||||
auto c = _mm256_cmpeq_epi8(a, b);
|
||||
unsigned long long m = _mm256_movemask_epi8(c);
|
||||
return __builtin_ctzll(m | 0x100000000ull);
|
||||
}
|
||||
|
||||
arch_target("avx2") unsigned array_search_x32_eq_impl(uint8_t val, const uint8_t* arr, int nr) {
|
||||
unsigned len = 32 * nr;
|
||||
auto a = _mm256_set1_epi8(val);
|
||||
for (unsigned off = 0; off < len; off += 32) {
|
||||
auto b = _mm256_lddqu_si256((__m256i*)arr);
|
||||
auto c = _mm256_cmpeq_epi8(a, b);
|
||||
unsigned m = _mm256_movemask_epi8(c);
|
||||
if (m != 0) {
|
||||
return __builtin_ctz(m) + off;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int array_search_gt(int64_t val, const int64_t* array, const int capacity, const int size) {
|
||||
return array_search_gt_impl(val, array, capacity, size);
|
||||
}
|
||||
|
||||
unsigned array_search_16_eq(uint8_t val, const uint8_t* arr) {
|
||||
return array_search_16_eq_impl(val, arr);
|
||||
}
|
||||
|
||||
unsigned array_search_32_eq(uint8_t val, const uint8_t* array) {
|
||||
return array_search_32_eq_impl(val, array);
|
||||
}
|
||||
|
||||
unsigned array_search_x32_eq(uint8_t val, const uint8_t* array, int nr) {
|
||||
return array_search_x32_eq_impl(val, array, nr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -42,4 +42,32 @@ static constexpr int64_t simple_key_unused_value = std::numeric_limits<int64_t>:
|
||||
*/
|
||||
int array_search_gt(int64_t val, const int64_t* array, const int capacity, const int size);
|
||||
|
||||
static inline unsigned array_search_4_eq(uint8_t val, const uint8_t* array) {
|
||||
// Unrolled loop is few %s faster
|
||||
if (array[0] == val) {
|
||||
return 0;
|
||||
} else if (array[1] == val) {
|
||||
return 1;
|
||||
} else if (array[2] == val) {
|
||||
return 2;
|
||||
} else if (array[3] == val) {
|
||||
return 3;
|
||||
} else {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned array_search_8_eq(uint8_t val, const uint8_t* array) {
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
if (array[i] == val) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 8;
|
||||
}
|
||||
|
||||
unsigned array_search_16_eq(uint8_t val, const uint8_t* array);
|
||||
unsigned array_search_32_eq(uint8_t val, const uint8_t* array);
|
||||
unsigned array_search_x32_eq(uint8_t val, const uint8_t* array, int nr);
|
||||
|
||||
}
|
||||
|
||||
2103
utils/compact-radix-tree.hh
Normal file
2103
utils/compact-radix-tree.hh
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user