digest: add null values to row digest
With the new hashing routine, null values are taken into account when computing row digest. Previous behavior had a regression which stopped computing the hash after the first null value is encountered, but the original behavior was also prone to errors - e.g. row [1, NULL, 2] was not distinguishable from [1, 2, NULL], because their hashes were identical. This hashing is not yet active - it will only be used after the next commit introduces a proper cluster feature for it.
This commit is contained in:
@@ -28,7 +28,8 @@ namespace query {
|
||||
enum class digest_algorithm : uint8_t {
|
||||
none = 0, // digest not required
|
||||
MD5 = 1,
|
||||
xxHash = 2,// default algorithm
|
||||
legacy_xxHash_without_null_digest = 2,
|
||||
xxHash = 3, // default algorithm
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ struct noop_hasher {
|
||||
};
|
||||
|
||||
class digester final {
|
||||
std::variant<noop_hasher, md5_hasher, xx_hasher> _impl;
|
||||
std::variant<noop_hasher, md5_hasher, xx_hasher, legacy_xx_hasher_without_null_digest> _impl;
|
||||
|
||||
public:
|
||||
explicit digester(digest_algorithm algo) {
|
||||
@@ -47,6 +47,9 @@ public:
|
||||
case digest_algorithm::xxHash:
|
||||
_impl = xx_hasher();
|
||||
break;
|
||||
case digest_algorithm::legacy_xxHash_without_null_digest:
|
||||
_impl = legacy_xx_hasher_without_null_digest();
|
||||
break;
|
||||
case digest_algorithm ::none:
|
||||
_impl = noop_hasher();
|
||||
break;
|
||||
|
||||
@@ -733,7 +733,8 @@ void appending_hash<row>::operator()(Hasher& h, const row& cells, const schema&
|
||||
for (auto id : columns) {
|
||||
const cell_and_hash* cell_and_hash = cells.find_cell_and_hash(id);
|
||||
if (!cell_and_hash) {
|
||||
return;
|
||||
feed_hash(h, appending_hash<row>::null_hash_value);
|
||||
continue;
|
||||
}
|
||||
auto&& def = s.column_at(kind, id);
|
||||
if (def.is_atomic()) {
|
||||
@@ -766,6 +767,39 @@ void appending_hash<row>::operator()(Hasher& h, const row& cells, const schema&
|
||||
}
|
||||
}
|
||||
}
|
||||
// Instantiation for mutation_test.cc
|
||||
template void appending_hash<row>::operator()<xx_hasher>(xx_hasher& h, const row& cells, const schema& s, column_kind kind, const query::column_id_vector& columns, max_timestamp& max_ts) const;
|
||||
|
||||
template<>
|
||||
void appending_hash<row>::operator()<legacy_xx_hasher_without_null_digest>(legacy_xx_hasher_without_null_digest& h, const row& cells, const schema& s, column_kind kind, const query::column_id_vector& columns, max_timestamp& max_ts) const {
|
||||
for (auto id : columns) {
|
||||
const cell_and_hash* cell_and_hash = cells.find_cell_and_hash(id);
|
||||
if (!cell_and_hash) {
|
||||
return;
|
||||
}
|
||||
auto&& def = s.column_at(kind, id);
|
||||
if (def.is_atomic()) {
|
||||
max_ts.update(cell_and_hash->cell.as_atomic_cell(def).timestamp());
|
||||
if (cell_and_hash->hash) {
|
||||
feed_hash(h, *cell_and_hash->hash);
|
||||
} else {
|
||||
query::default_hasher cellh;
|
||||
feed_hash(cellh, cell_and_hash->cell.as_atomic_cell(def), def);
|
||||
feed_hash(h, cellh.finalize_uint64());
|
||||
}
|
||||
} else {
|
||||
auto cm = cell_and_hash->cell.as_collection_mutation();
|
||||
max_ts.update(cm.last_update(*def.type));
|
||||
if (cell_and_hash->hash) {
|
||||
feed_hash(h, *cell_and_hash->hash);
|
||||
} else {
|
||||
query::default_hasher cellh;
|
||||
feed_hash(cellh, cm, def);
|
||||
feed_hash(h, cellh.finalize_uint64());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cell_hash_opt row::cell_hash_for(column_id id) const {
|
||||
if (_type == storage_type::vector) {
|
||||
|
||||
@@ -659,6 +659,7 @@ struct max_timestamp {
|
||||
|
||||
template<>
|
||||
struct appending_hash<row> {
|
||||
static constexpr int null_hash_value = 0xbeefcafe;
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const row& cells, const schema& s, column_kind kind, const query::column_id_vector& columns, max_timestamp& max_ts) const;
|
||||
};
|
||||
|
||||
@@ -125,7 +125,7 @@ using fbu = utils::fb_utilities;
|
||||
static inline
|
||||
query::digest_algorithm digest_algorithm(service::storage_proxy& proxy) {
|
||||
return proxy.features().cluster_supports_xxhash_digest_algorithm()
|
||||
? query::digest_algorithm::xxHash
|
||||
? query::digest_algorithm::legacy_xxHash_without_null_digest
|
||||
: query::digest_algorithm::MD5;
|
||||
}
|
||||
|
||||
|
||||
@@ -67,3 +67,10 @@ private:
|
||||
serialize_int64(out, finalize_uint64());
|
||||
}
|
||||
};
|
||||
|
||||
// Used to specialize templates in order to fix a bug
|
||||
// in handling null values: #4567
|
||||
class legacy_xx_hasher_without_null_digest : public xx_hasher {
|
||||
public:
|
||||
explicit legacy_xx_hasher_without_null_digest(uint64_t seed = 0) noexcept : xx_hasher(seed) {}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user