Files
scylla/utils/i_filter.hh
Michał Chojnowski c549afa1a9 utils/bloom_filter: add add(const hashed_key&)
In one of the next patches, we will want to use (in BTI partition
index writer) the same hash as used by the bloom filter,
and we'll also want to allow rebuilding the filter in a second
pass (after the whole sstable is written) from hashes (as opposed
to rebuilding from partition keys saved in Index.db, which is
something we sometimes do today) saved to a temporary file.

For those, we need an interface that allows us to compute the hash
externally, and only pass the hash to `add()`.
2025-09-29 13:01:21 +02:00

64 lines
1.6 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#pragma once
#include <memory>
#include "bytes_fwd.hh"
namespace utils {
struct i_filter;
using filter_ptr = std::unique_ptr<i_filter>;
enum class filter_format {
k_l_format,
m_format,
};
class hashed_key {
private:
std::array<uint64_t, 2> _hash;
public:
hashed_key(std::array<uint64_t, 2> h) : _hash(h) {}
std::array<uint64_t, 2> hash() const { return _hash; };
};
hashed_key make_hashed_key(bytes_view key);
// FIXME: serialize() and serialized_size() not implemented. We should only be serializing to
// disk, not in the wire.
struct i_filter {
virtual ~i_filter() {}
virtual void add(const bytes_view& key) = 0;
virtual void add(const hashed_key& key) = 0;
virtual bool is_present(const bytes_view& key) = 0;
virtual bool is_present(hashed_key) = 0;
virtual void clear() = 0;
virtual void close() = 0;
virtual size_t memory_size() = 0;
/**
* @return The smallest bloom_filter that can provide the given false
* positive probability rate for the given number of elements.
*
* Asserts that the given probability can be satisfied using this
* filter.
*/
static filter_ptr get_filter(int64_t num_elements, double max_false_pos_prob, filter_format format);
/**
* @return the size of the smallest filter (in bytes), according to the conditions described at get_filter()
*/
static size_t get_filter_size(int64_t num_elements, double max_false_pos_prob);
};
}