vint: optimise deserialisation routine
At the moment, vint deserialisation is using a naive approach, reading each byte separately. In practice, vints are going to most often appears inside larger buffers. That means we can read 8-bytes at a time end then figure out unneded parts and mask them out. This way we avoid a loop and do less memory loads which are much more expensive than arithmetic operations (even if they hit the cache).
This commit is contained in:
@@ -136,7 +136,9 @@ vint_size_type unsigned_vint::serialized_size(uint64_t value) noexcept {
|
||||
}
|
||||
|
||||
uint64_t unsigned_vint::deserialize(bytes_view v) {
|
||||
const int8_t first_byte = v[0];
|
||||
auto src = v.data();
|
||||
auto len = v.size();
|
||||
const int8_t first_byte = *src;
|
||||
|
||||
// No additional bytes, since the most significant bit is not set.
|
||||
if (first_byte >= 0) {
|
||||
@@ -148,11 +150,25 @@ uint64_t unsigned_vint::deserialize(bytes_view v) {
|
||||
// Extract the bits not used for counting bytes.
|
||||
auto result = uint64_t(first_byte) & first_byte_value_mask(extra_bytes_size);
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
uint64_t value;
|
||||
// If we can overread do that. It is cheaper to have a single 64-bit read and
|
||||
// then mask out the unneeded part than to do 8x 1 byte reads.
|
||||
if (__builtin_expect(len >= sizeof(uint64_t) + 1, true)) {
|
||||
std::copy_n(src + 1, sizeof(uint64_t), reinterpret_cast<int8_t*>(&value));
|
||||
} else {
|
||||
value = 0;
|
||||
std::copy_n(src + 1, extra_bytes_size, reinterpret_cast<int8_t*>(&value));
|
||||
}
|
||||
value = be_to_cpu(value << (64 - (extra_bytes_size * 8)));
|
||||
result <<= (extra_bytes_size * 8) % 64;
|
||||
result |= value;
|
||||
#else
|
||||
for (vint_size_type index = 0; index < extra_bytes_size; ++index) {
|
||||
result <<= 8;
|
||||
result |= (uint64_t(v[index + 1]) & uint64_t(0xff));
|
||||
}
|
||||
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user