Merge "Improve LSA descriptor encoding" from Pavel
" The LSA small objects allocation latency is greatly affected by the way this allocator encodes the object descriptor in front of each allocated slot. Nowadays it's one of VLE variants implemented with the help of a loop. Re-implementing this piece with less instructions and without a loop allows greatly reducing the allocation latency. The speed-up mostly comes from loop-less code that doesn't confuse branch predictor. Also the express encoder seems to benefit from writing 8 bytes of the encoded value in one go, rather than byte- -by-byte. Perf measurements: 1. (new) logallog test shows ~40% smaller times 2. perf_mutation in release mode shows ~2% increase in tps 3. the encoder itself is 2 - 4 times faster on x86_64 and 1.05 - 3 times faster on aarch64. The speed-up depends on the 'encoded length', old encoder has linear time, the new one is constant tests: unit(dev), perf(release), just encoder on Aarch64 " * 'br-lsa-alloc-latency-4' of https://github.com/xemul/scylla: lsa: Use express encoder uleb64: Add express encoding lsa: Extract uleb64 code into header test: LSA allocation perf test
This commit is contained in:
@@ -526,6 +526,7 @@ scylla_tests = set([
|
||||
'test/perf/perf_collection',
|
||||
'test/perf/perf_row_cache_update',
|
||||
'test/perf/perf_row_cache_reads',
|
||||
'test/perf/logalloc',
|
||||
'test/perf/perf_simple_query',
|
||||
'test/perf/perf_sstable',
|
||||
'test/unit/lsa_async_eviction_test',
|
||||
@@ -1179,6 +1180,7 @@ tests_not_using_seastar_test_framework = set([
|
||||
'test/perf/perf_mutation',
|
||||
'test/perf/perf_collection',
|
||||
'test/perf/perf_row_cache_update',
|
||||
'test/perf/logalloc',
|
||||
'test/unit/lsa_async_eviction_test',
|
||||
'test/unit/lsa_sync_eviction_test',
|
||||
'test/unit/row_cache_alloc_stress_test',
|
||||
|
||||
92
test/perf/logalloc.cc
Normal file
92
test/perf/logalloc.cc
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <seastar/core/distributed.hh>
|
||||
#include <seastar/core/app-template.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
#include <seastar/core/reactor.hh>
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <random>
|
||||
|
||||
#include "utils/allocation_strategy.hh"
|
||||
#include "utils/logalloc.hh"
|
||||
#include "log.hh"
|
||||
#include "test/perf/perf.hh"
|
||||
#include "test/lib/reader_permit.hh"
|
||||
|
||||
class piggie {
|
||||
size_t _extra_size;
|
||||
|
||||
public:
|
||||
size_t storage_size() const noexcept { return sizeof(piggie) + _extra_size; }
|
||||
piggie(size_t sz) noexcept : _extra_size(sz) {}
|
||||
};
|
||||
|
||||
static constexpr unsigned nr_seq_allocations = 1024;
|
||||
static constexpr unsigned nr_iterations = 20000;
|
||||
static constexpr unsigned nr_sizes = 32;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
app_template app;
|
||||
return app.run(argc, argv, [&app] {
|
||||
return seastar::async([&] {
|
||||
logalloc::prime_segment_pool(memory::stats().total_memory(), memory::min_free_memory()).get();
|
||||
logalloc::region reg;
|
||||
|
||||
std::array<piggie*, nr_seq_allocations> objects;
|
||||
std::array<size_t, nr_sizes> sizes;
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 g(rd());
|
||||
for (int i = 0; i < nr_sizes; i++) {
|
||||
sizes[i] = i;
|
||||
}
|
||||
|
||||
auto& allocator = reg.allocator();
|
||||
|
||||
std::chrono::duration<double> total;
|
||||
|
||||
for (int iter = 0; iter < nr_iterations; iter++) {
|
||||
std::shuffle(sizes.begin(), sizes.end(), g);
|
||||
|
||||
void* mem = allocator.alloc<piggie>(sizeof(piggie) + sizes[0]);
|
||||
objects[0] = new (mem) piggie(sizes[0]);
|
||||
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
for (int i = 1; i < nr_seq_allocations; i++) {
|
||||
void* mem = allocator.alloc<piggie>(sizeof(piggie) + sizes[i % nr_sizes]);
|
||||
objects[i] = new (mem) piggie(sizes[i % nr_sizes]);
|
||||
}
|
||||
total += std::chrono::steady_clock::now() - start;
|
||||
|
||||
for (int i = 0; i < nr_seq_allocations; i++) {
|
||||
allocator.destroy(objects[i]);
|
||||
}
|
||||
|
||||
reg.full_compaction();
|
||||
}
|
||||
|
||||
fmt::print("Total time: {} s\n", total.count());
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "utils/dynamic_bitset.hh"
|
||||
#include "utils/log_heap.hh"
|
||||
#include "utils/preempt.hh"
|
||||
#include "utils/vle.hh"
|
||||
|
||||
#include <random>
|
||||
#include <chrono>
|
||||
@@ -210,6 +211,8 @@ uint32_t
|
||||
migrate_fn_type::register_migrator(migrate_fn_type* m) {
|
||||
auto& migrators = *debug::static_migrators;
|
||||
auto idx = migrators.add(m);
|
||||
// object_descriptor encodes 2 * index() + 1
|
||||
assert(idx * 2 + 1 < utils::uleb64_express_supreme);
|
||||
m->_migrators = migrators.shared_from_this();
|
||||
return idx;
|
||||
}
|
||||
@@ -1150,77 +1153,26 @@ class region_impl final : public basic_region_impl {
|
||||
}
|
||||
|
||||
segment::size_type encoded_size() const {
|
||||
return log2floor(_n) / 6 + 1; // 0 is illegal
|
||||
return utils::uleb64_encoded_size(_n); // 0 is illegal
|
||||
}
|
||||
|
||||
void encode(char*& pos) const {
|
||||
uint64_t b = 64;
|
||||
auto n = _n;
|
||||
auto start = pos;
|
||||
do {
|
||||
b |= n & 63;
|
||||
n >>= 6;
|
||||
if (!n) {
|
||||
b |= 128;
|
||||
}
|
||||
unpoison(pos, 1);
|
||||
*pos++ = b;
|
||||
b = 0;
|
||||
} while (n);
|
||||
poison(start, pos - start);
|
||||
utils::uleb64_encode(pos, _n, poison<char>, unpoison);
|
||||
}
|
||||
|
||||
// non-canonical encoding to allow padding (for alignment); encoded_size must be
|
||||
// sufficient (greater than this->encoded_size())
|
||||
void encode(char*& pos, size_t encoded_size) const {
|
||||
uint64_t b = 64;
|
||||
auto start = pos;
|
||||
unpoison(start, encoded_size);
|
||||
auto n = _n;
|
||||
do {
|
||||
b |= n & 63;
|
||||
n >>= 6;
|
||||
if (!--encoded_size) {
|
||||
b |= 128;
|
||||
}
|
||||
*pos++ = b;
|
||||
b = 0;
|
||||
} while (encoded_size);
|
||||
poison(start, pos - start);
|
||||
// sufficient (greater than this->encoded_size()), _n must be the migrator's
|
||||
// index() (i.e. -- suitable for express encoding)
|
||||
void encode(char*& pos, size_t encoded_size, size_t size) const {
|
||||
utils::uleb64_express_encode(pos, _n, encoded_size, size, poison<char>, unpoison);
|
||||
}
|
||||
|
||||
static object_descriptor decode_forwards(const char*& pos) {
|
||||
unsigned n = 0;
|
||||
unsigned shift = 0;
|
||||
auto p = pos; // avoid aliasing; p++ doesn't touch memory
|
||||
uint8_t b;
|
||||
do {
|
||||
unpoison(p, 1);
|
||||
b = *p++;
|
||||
if (shift < 32) {
|
||||
// non-canonical encoding can cause large shift; undefined in C++
|
||||
n |= uint32_t(b & 63) << shift;
|
||||
}
|
||||
shift += 6;
|
||||
} while ((b & 128) == 0);
|
||||
poison(pos, p - pos);
|
||||
pos = p;
|
||||
return object_descriptor(n);
|
||||
return object_descriptor(utils::uleb64_decode_forwards(pos, poison<char>, unpoison));
|
||||
}
|
||||
|
||||
static object_descriptor decode_backwards(const char*& pos) {
|
||||
unsigned n = 0;
|
||||
uint8_t b;
|
||||
auto p = pos; // avoid aliasing; --p doesn't touch memory
|
||||
do {
|
||||
--p;
|
||||
unpoison(p, 1);
|
||||
b = *p;
|
||||
n = (n << 6) | (b & 63);
|
||||
} while ((b & 64) == 0);
|
||||
poison(p, pos - p);
|
||||
pos = p;
|
||||
return object_descriptor(n);
|
||||
return object_descriptor(utils::uleb64_decode_bacwards(pos, poison<char>, unpoison));
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const object_descriptor& desc) {
|
||||
@@ -1292,7 +1244,7 @@ private:
|
||||
auto old_active_offset = _active_offset;
|
||||
auto pos = _active->at<char>(_active_offset);
|
||||
// Use non-canonical encoding to allow for alignment pad
|
||||
desc.encode(pos, obj_offset - _active_offset);
|
||||
desc.encode(pos, obj_offset - _active_offset, size);
|
||||
unpoison(pos, size);
|
||||
_active_offset = obj_offset + size;
|
||||
|
||||
|
||||
146
utils/vle.hh
Normal file
146
utils/vle.hh
Normal file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/bitops.hh>
|
||||
#include <seastar/core/byteorder.hh>
|
||||
|
||||
namespace utils {
|
||||
|
||||
/*
|
||||
* The express encoder below is optimized to encode a value
|
||||
* that may only have non-zeroes in its first 12 bits
|
||||
*/
|
||||
static constexpr size_t uleb64_express_bits = 12;
|
||||
static constexpr uint32_t uleb64_express_supreme = 1 << uleb64_express_bits;
|
||||
|
||||
// Returns the number of bytes needed to encode the value
|
||||
// The value cannot be 0 (not checked)
|
||||
static inline size_t uleb64_encoded_size(uint32_t val) noexcept {
|
||||
return seastar::log2floor(val) / 6 + 1;
|
||||
}
|
||||
|
||||
template <typename Poison, typename Unpoison>
|
||||
requires std::is_invocable<Poison, const char*, size_t>::value && std::is_invocable<Unpoison, const char*, size_t>::value
|
||||
static inline void uleb64_encode(char*& pos, uint32_t val, Poison&& poison, Unpoison&& unpoison) noexcept {
|
||||
uint64_t b = 64;
|
||||
auto start = pos;
|
||||
do {
|
||||
b |= val & 63;
|
||||
val >>= 6;
|
||||
if (!val) {
|
||||
b |= 128;
|
||||
}
|
||||
unpoison(pos, 1);
|
||||
*pos++ = b;
|
||||
b = 0;
|
||||
} while (val);
|
||||
poison(start, pos - start);
|
||||
}
|
||||
|
||||
template <typename Poison, typename Unpoison>
|
||||
requires std::is_invocable<Poison, const char*, size_t>::value && std::is_invocable<Unpoison, const char*, size_t>::value
|
||||
static inline void uleb64_encode(char*& pos, uint32_t val, size_t encoded_size, Poison&& poison, Unpoison&& unpoison) noexcept {
|
||||
uint64_t b = 64;
|
||||
auto start = pos;
|
||||
unpoison(start, encoded_size);
|
||||
do {
|
||||
b |= val & 63;
|
||||
val >>= 6;
|
||||
if (!--encoded_size) {
|
||||
b |= 128;
|
||||
}
|
||||
*pos++ = b;
|
||||
b = 0;
|
||||
} while (encoded_size);
|
||||
poison(start, pos - start);
|
||||
}
|
||||
|
||||
#if !defined(SEASTAR_ASAN_ENABLED)
|
||||
static inline void uleb64_express_encode_impl(char*& pos, uint64_t val, size_t size) noexcept {
|
||||
static_assert(uleb64_express_bits == 12);
|
||||
|
||||
if (size > sizeof(uint64_t)) {
|
||||
static uint64_t zero = 0;
|
||||
std::copy_n(reinterpret_cast<char*>(&zero), sizeof(zero), pos + size - sizeof(uint64_t));
|
||||
}
|
||||
seastar::write_le(pos, uint64_t(((val & 0xfc0) << 2) | ((val & 0x3f) | 64)));
|
||||
pos += size;
|
||||
pos[-1] |= 0x80;
|
||||
}
|
||||
|
||||
template <typename Poison, typename Unpoison>
|
||||
requires std::is_invocable<Poison, const char*, size_t>::value && std::is_invocable<Unpoison, const char*, size_t>::value
|
||||
static inline void uleb64_express_encode(char*& pos, uint32_t val, size_t encoded_size, size_t gap, Poison&& poison, Unpoison&& unpoison) noexcept {
|
||||
if (encoded_size + gap > sizeof(uint64_t)) {
|
||||
uleb64_express_encode_impl(pos, val, encoded_size);
|
||||
} else {
|
||||
uleb64_encode(pos, val, encoded_size, poison, unpoison);
|
||||
}
|
||||
}
|
||||
#else
|
||||
template <typename Poison, typename Unpoison>
|
||||
requires std::is_invocable<Poison, const char*, size_t>::value && std::is_invocable<Unpoison, const char*, size_t>::value
|
||||
static inline void uleb64_express_encode(char*& pos, uint32_t val, size_t encoded_size, size_t gap, Poison&& poison, Unpoison&& unpoison) noexcept {
|
||||
uleb64_encode(pos, val, encoded_size, poison, unpoison);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Poison, typename Unpoison>
|
||||
requires std::is_invocable<Poison, const char*, size_t>::value && std::is_invocable<Unpoison, const char*, size_t>::value
|
||||
static inline uint32_t uleb64_decode_forwards(const char*& pos, Poison&& poison, Unpoison&& unpoison) noexcept {
|
||||
uint32_t n = 0;
|
||||
unsigned shift = 0;
|
||||
auto p = pos; // avoid aliasing; p++ doesn't touch memory
|
||||
uint8_t b;
|
||||
do {
|
||||
unpoison(p, 1);
|
||||
b = *p++;
|
||||
if (shift < 32) {
|
||||
// non-canonical encoding can cause large shift; undefined in C++
|
||||
n |= uint32_t(b & 63) << shift;
|
||||
}
|
||||
shift += 6;
|
||||
} while ((b & 128) == 0);
|
||||
poison(pos, p - pos);
|
||||
pos = p;
|
||||
return n;
|
||||
}
|
||||
|
||||
template <typename Poison, typename Unpoison>
|
||||
requires std::is_invocable<Poison, const char*, size_t>::value && std::is_invocable<Unpoison, const char*, size_t>::value
|
||||
static inline uint32_t uleb64_decode_bacwards(const char*& pos, Poison&& poison, Unpoison&& unpoison) noexcept {
|
||||
uint32_t n = 0;
|
||||
uint8_t b;
|
||||
auto p = pos; // avoid aliasing; --p doesn't touch memory
|
||||
do {
|
||||
--p;
|
||||
unpoison(p, 1);
|
||||
b = *p;
|
||||
n = (n << 6) | (b & 63);
|
||||
} while ((b & 64) == 0);
|
||||
poison(p, pos - p);
|
||||
pos = p;
|
||||
return n;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
Reference in New Issue
Block a user