cql3: expr: add optimizer for LIKE with constant pattern
Compiling a pattern is expensive and so we should try to do it at prepare time, if the pattern is a constant. Add an optimizer that looks for such cases and replaces them with a unary function that embeds the compiled pattern. This isn't integrated yet with prepare_expr(), since the filtering code isn't ready for generic expressions. Its first user will be LWT, which contains the optimization already (filtering had it as well, but lost it sometime during the expression rewrite). A unit test is added.
This commit is contained in:
@@ -700,6 +700,9 @@ std::optional<expression> try_prepare_expression(const expression& expr, data_di
|
||||
// Does some basic type checks but no advanced validation.
|
||||
extern binary_operator prepare_binary_operator(binary_operator binop, data_dictionary::database db, const schema& table_schema);
|
||||
|
||||
// Pre-compile any constant LIKE patterns and return equivalent expression
|
||||
expression optimize_like(const expression& e);
|
||||
|
||||
|
||||
/**
|
||||
* @return whether this object can be assigned to the provided receiver. We distinguish
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "types/map.hh"
|
||||
#include "types/user.hh"
|
||||
#include "exceptions/unrecognized_entity_exception.hh"
|
||||
#include "utils/like_matcher.hh"
|
||||
|
||||
#include <boost/range/algorithm/count.hpp>
|
||||
|
||||
@@ -1242,6 +1243,86 @@ static lw_shared_ptr<column_specification> get_rhs_receiver(lw_shared_ptr<column
|
||||
}
|
||||
}
|
||||
|
||||
class like_constant_function : public cql3::functions::scalar_function {
|
||||
functions::function_name _name;
|
||||
like_matcher _matcher;
|
||||
std::vector<data_type> _lhs_types;
|
||||
public:
|
||||
like_constant_function(data_type arg_type, bytes_view pattern)
|
||||
: _name("system", fmt::format("like({})",
|
||||
std::string_view(reinterpret_cast<const char*>(pattern.data()), pattern.size())))
|
||||
, _matcher(pattern) {
|
||||
_lhs_types.push_back(std::move(arg_type));
|
||||
}
|
||||
|
||||
virtual const functions::function_name& name() const override {
|
||||
return _name;
|
||||
}
|
||||
|
||||
virtual const std::vector<data_type>& arg_types() const override {
|
||||
return _lhs_types;
|
||||
}
|
||||
|
||||
virtual const data_type& return_type() const override {
|
||||
return boolean_type;
|
||||
}
|
||||
|
||||
virtual bool is_pure() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool is_native() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool requires_thread() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool is_aggregate() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void print(std::ostream& os) const override {
|
||||
os << "LIKE(compiled)";
|
||||
}
|
||||
|
||||
virtual sstring column_name(const std::vector<sstring>& column_names) const override {
|
||||
return "LIKE";
|
||||
}
|
||||
|
||||
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
|
||||
auto& str_opt = parameters[0];
|
||||
if (!str_opt) {
|
||||
return std::nullopt;
|
||||
}
|
||||
bool match_result = _matcher(*str_opt);
|
||||
return data_value(match_result).serialize();
|
||||
}
|
||||
};
|
||||
|
||||
expression
|
||||
optimize_like(const expression& e) {
|
||||
// Check for LIKE with constant pattern; replace with anonymous
|
||||
// function that contains the compiled regex.
|
||||
return search_and_replace(e, [] (const expression& subexpression) -> std::optional<expression> {
|
||||
if (auto* binop = as_if<binary_operator>(&subexpression)) {
|
||||
if (binop->op == oper_t::LIKE) {
|
||||
if (auto* rhs = as_if<constant>(&binop->rhs)) {
|
||||
if ((type_of(*rhs) == utf8_type || type_of(*rhs) == ascii_type) && !rhs->is_null()) {
|
||||
auto pattern = to_bytes(rhs->value.view());
|
||||
auto func = ::make_shared<like_constant_function>(type_of(binop->lhs), pattern);
|
||||
auto args = std::vector<expression>();
|
||||
args.push_back(binop->lhs);
|
||||
return function_call{std::move(func), std::move(args)};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
}
|
||||
|
||||
binary_operator prepare_binary_operator(binary_operator binop, data_dictionary::database db, const schema& table_schema) {
|
||||
std::optional<expression> prepared_lhs_opt = try_prepare_expression(binop.lhs, db, table_schema.ks_name(), &table_schema, {});
|
||||
if (!prepared_lhs_opt) {
|
||||
|
||||
@@ -4040,4 +4040,36 @@ BOOST_AUTO_TEST_CASE(prepare_binary_operator_with_null_rhs) {
|
||||
table_schema);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(optimized_constant_like) {
|
||||
auto check = [] (expression e, std::optional<sstring> target, bool expect_optimization, std::optional<sstring> pattern_arg = {}) {
|
||||
auto optimized = optimize_like(e);
|
||||
bool was_optimized = find_binop(optimized, [] (const binary_operator&) { return true; }) == nullptr;
|
||||
if (was_optimized != expect_optimization) {
|
||||
return false;
|
||||
}
|
||||
auto params = std::vector({target ? make_text_raw(*target) : raw_value::make_null()});
|
||||
if (pattern_arg) {
|
||||
params.push_back(make_text_raw(*pattern_arg));
|
||||
}
|
||||
return evaluate_with_bind_variables(optimized, params) == evaluate_with_bind_variables(e, params);
|
||||
};
|
||||
|
||||
auto target_var = make_bind_variable(0, utf8_type);
|
||||
auto pattern_var = make_bind_variable(1, utf8_type);
|
||||
|
||||
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")), "xxyyz", true));
|
||||
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")), "qxyyz", true));
|
||||
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")), std::nullopt, true));
|
||||
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, pattern_var), "xxyyz", false, "xx%"));
|
||||
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, pattern_var), "qxyyz", false, "xx%"));
|
||||
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, pattern_var), std::nullopt, false, "xx%"));
|
||||
|
||||
// Verify that optimization works for subexpressions, not just top-level expressions
|
||||
auto complex = make_conjunction(
|
||||
binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")),
|
||||
// repeated for simplicity
|
||||
binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")));
|
||||
BOOST_REQUIRE(check(std::move(complex), "xxyyz", true));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user