utils: Use dedicated enum for Bloom filter format instead of a boolean.
It better reflects the purpose of the parameter and provides better type-safety. Signed-off-by: Vladimir Krivopalov <vladimir@scylladb.com> Message-Id: <10a4fc16dafa0fb3234969041f68f9e7bfc61312.1525899669.git.vladimir@scylladb.com>
This commit is contained in:
committed by
Avi Kivity
parent
76c64e1f26
commit
e5477c6c6c
@@ -1413,7 +1413,10 @@ future<> sstable::read_filter(const io_priority_class& pc) {
|
||||
read_simple<component_type::Filter>(filter, pc).get();
|
||||
auto nr_bits = filter.buckets.elements.size() * std::numeric_limits<typename decltype(filter.buckets.elements)::value_type>::digits;
|
||||
large_bitset bs(nr_bits, std::move(filter.buckets.elements));
|
||||
_components->filter = utils::filter::create_filter(filter.hashes, std::move(bs), (_version != sstable_version_types::mc));
|
||||
utils::filter_format format = (_version == sstable_version_types::mc)
|
||||
? utils::filter_format::m_format
|
||||
: utils::filter_format::k_l_format;
|
||||
_components->filter = utils::filter::create_filter(filter.hashes, std::move(bs), format);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2177,7 +2180,7 @@ components_writer::components_writer(sstable& sst, const schema& s, file_writer&
|
||||
, _range_tombstones(s)
|
||||
, _large_partition_handler(cfg.large_partition_handler)
|
||||
{
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), true);
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), utils::filter_format::k_l_format);
|
||||
_sst._pi_write.desired_block_size = cfg.promoted_index_block_size.value_or(get_config().column_index_size_in_kb() * 1024);
|
||||
_sst._correctly_serialize_non_compound_range_tombstones = cfg.correctly_serialize_non_compound_range_tombstones;
|
||||
_index_sampling_state.summary_byte_cost = summary_byte_cost();
|
||||
@@ -2685,7 +2688,7 @@ public:
|
||||
_sst._shards = { shard };
|
||||
|
||||
_cfg.monitor->on_write_started(_data_writer->offset_tracker());
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), false);
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), utils::filter_format::m_format);
|
||||
_pi_write_m.desired_block_size = cfg.promoted_index_block_size.value_or(get_config().column_index_size_in_kb() * 1024);
|
||||
_sst._correctly_serialize_non_compound_range_tombstones = _cfg.correctly_serialize_non_compound_range_tombstones;
|
||||
_index_sampling_state.summary_byte_cost = summary_byte_cost();
|
||||
|
||||
@@ -53,10 +53,10 @@ namespace utils {
|
||||
namespace filter {
|
||||
|
||||
template<typename Func>
|
||||
void for_each_index(hashed_key hk, int count, int64_t max, bool old_bf_hash_order, Func&& func) {
|
||||
void for_each_index(hashed_key hk, int count, int64_t max, filter_format format, Func&& func) {
|
||||
auto h = hk.hash();
|
||||
int64_t base = old_bf_hash_order ? h[0] : h[1];
|
||||
int64_t inc = old_bf_hash_order ? h[1] : h[0];
|
||||
int64_t base = (format == filter_format::k_l_format) ? h[0] : h[1];
|
||||
int64_t inc = (format == filter_format::k_l_format) ? h[1] : h[0];
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (func(std::abs(base % max)) == stop_iteration::yes) {
|
||||
break;
|
||||
@@ -67,7 +67,7 @@ void for_each_index(hashed_key hk, int count, int64_t max, bool old_bf_hash_orde
|
||||
|
||||
bool bloom_filter::is_present(hashed_key key) {
|
||||
bool result = true;
|
||||
for_each_index(key, _hash_count, _bitset.size(), _old_bf_hash_order, [this, &result] (auto i) {
|
||||
for_each_index(key, _hash_count, _bitset.size(), _format, [this, &result] (auto i) {
|
||||
if (!_bitset.test(i)) {
|
||||
result = false;
|
||||
return stop_iteration::yes;
|
||||
@@ -78,7 +78,7 @@ bool bloom_filter::is_present(hashed_key key) {
|
||||
}
|
||||
|
||||
void bloom_filter::add(const bytes_view& key) {
|
||||
for_each_index(make_hashed_key(key), _hash_count, _bitset.size(), _old_bf_hash_order, [this] (auto i) {
|
||||
for_each_index(make_hashed_key(key), _hash_count, _bitset.size(), _format, [this] (auto i) {
|
||||
_bitset.set(i);
|
||||
return stop_iteration::no;
|
||||
});
|
||||
@@ -88,15 +88,15 @@ bool bloom_filter::is_present(const bytes_view& key) {
|
||||
return is_present(make_hashed_key(key));
|
||||
}
|
||||
|
||||
filter_ptr create_filter(int hash, large_bitset&& bitset, bool old_bf_hash_order) {
|
||||
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), old_bf_hash_order);
|
||||
filter_ptr create_filter(int hash, large_bitset&& bitset, filter_format format) {
|
||||
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), format);
|
||||
}
|
||||
|
||||
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, bool old_bf_hash_order) {
|
||||
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, filter_format format) {
|
||||
int64_t num_bits = (num_elements * buckets_per) + bloom_calculations::EXCESS;
|
||||
num_bits = align_up<int64_t>(num_bits, 64); // Seems to be implied in origin
|
||||
large_bitset bitset(num_bits);
|
||||
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), old_bf_hash_order);
|
||||
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), format);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,15 +58,15 @@ public:
|
||||
private:
|
||||
bitmap _bitset;
|
||||
int _hash_count;
|
||||
bool _old_bf_hash_order;
|
||||
filter_format _format;
|
||||
public:
|
||||
int num_hashes() { return _hash_count; }
|
||||
bitmap& bits() { return _bitset; }
|
||||
|
||||
bloom_filter(int hashes, bitmap&& bs, bool old_bf_hash_order)
|
||||
bloom_filter(int hashes, bitmap&& bs, filter_format format)
|
||||
: _bitset(std::move(bs))
|
||||
, _hash_count(hashes)
|
||||
, _old_bf_hash_order(old_bf_hash_order)
|
||||
, _format(format)
|
||||
{}
|
||||
|
||||
virtual void add(const bytes_view& key) override;
|
||||
@@ -88,8 +88,8 @@ public:
|
||||
|
||||
struct murmur3_bloom_filter: public bloom_filter {
|
||||
|
||||
murmur3_bloom_filter(int hashes, bitmap&& bs, bool old_bf_hash_order)
|
||||
: bloom_filter(hashes, std::move(bs), old_bf_hash_order)
|
||||
murmur3_bloom_filter(int hashes, bitmap&& bs, filter_format format)
|
||||
: bloom_filter(hashes, std::move(bs), format)
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -114,7 +114,7 @@ struct always_present_filter: public i_filter {
|
||||
}
|
||||
};
|
||||
|
||||
filter_ptr create_filter(int hash, large_bitset&& bitset, bool old_bf_hash_order);
|
||||
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, bool old_bf_hash_order);
|
||||
filter_ptr create_filter(int hash, large_bitset&& bitset, filter_format format);
|
||||
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, filter_format format);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
namespace utils {
|
||||
static logging::logger filterlog("bloom_filter");
|
||||
|
||||
filter_ptr i_filter::get_filter(int64_t num_elements, double max_false_pos_probability, bool old_bf_hash_order) {
|
||||
filter_ptr i_filter::get_filter(int64_t num_elements, double max_false_pos_probability, filter_format format) {
|
||||
assert(seastar::thread::running_in_thread());
|
||||
|
||||
if (max_false_pos_probability > 1.0) {
|
||||
@@ -41,7 +41,7 @@ filter_ptr i_filter::get_filter(int64_t num_elements, double max_false_pos_proba
|
||||
|
||||
int buckets_per_element = bloom_calculations::max_buckets_per_element(num_elements);
|
||||
auto spec = bloom_calculations::compute_bloom_spec(buckets_per_element, max_false_pos_probability);
|
||||
return filter::create_filter(spec.K, num_elements, spec.buckets_per_element, old_bf_hash_order);
|
||||
return filter::create_filter(spec.K, num_elements, spec.buckets_per_element, format);
|
||||
}
|
||||
|
||||
hashed_key make_hashed_key(bytes_view b) {
|
||||
|
||||
@@ -48,6 +48,11 @@ namespace utils {
|
||||
struct i_filter;
|
||||
using filter_ptr = std::unique_ptr<i_filter>;
|
||||
|
||||
enum class filter_format {
|
||||
k_l_format,
|
||||
m_format,
|
||||
};
|
||||
|
||||
class hashed_key {
|
||||
private:
|
||||
std::array<uint64_t, 2> _hash;
|
||||
@@ -78,6 +83,6 @@ struct i_filter {
|
||||
* Asserts that the given probability can be satisfied using this
|
||||
* filter.
|
||||
*/
|
||||
static filter_ptr get_filter(int64_t num_elements, double max_false_pos_prob, bool old_bf_hash_order);
|
||||
static filter_ptr get_filter(int64_t num_elements, double max_false_pos_prob, filter_format format);
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user