cql3: expr: add optimizer for LIKE with constant pattern

Compiling a pattern is expensive and so we should try to do it
at prepare time, if the pattern is a constant. Add an optimizer
that looks for such cases and replaces them with a unary function
that embeds the compiled pattern.

This isn't integrated yet with prepare_expr(), since the filtering
code isn't ready for generic expressions. Its first user will be LWT,
which contains the optimization already (filtering had it as well,
but lost it sometime during the expression rewrite).

A unit test is added.
This commit is contained in:
Avi Kivity
2022-12-15 12:18:39 +02:00
parent 1959f9937c
commit db2fa44a9a
3 changed files with 117 additions and 1 deletions

View File

@@ -700,6 +700,9 @@ std::optional<expression> try_prepare_expression(const expression& expr, data_di
// Does some basic type checks but no advanced validation.
extern binary_operator prepare_binary_operator(binary_operator binop, data_dictionary::database db, const schema& table_schema);
// Pre-compile any constant LIKE patterns and return equivalent expression
expression optimize_like(const expression& e);
/**
* @return whether this object can be assigned to the provided receiver. We distinguish

View File

@@ -19,6 +19,7 @@
#include "types/map.hh"
#include "types/user.hh"
#include "exceptions/unrecognized_entity_exception.hh"
#include "utils/like_matcher.hh"
#include <boost/range/algorithm/count.hpp>
@@ -1242,6 +1243,86 @@ static lw_shared_ptr<column_specification> get_rhs_receiver(lw_shared_ptr<column
}
}
class like_constant_function : public cql3::functions::scalar_function {
functions::function_name _name;
like_matcher _matcher;
std::vector<data_type> _lhs_types;
public:
like_constant_function(data_type arg_type, bytes_view pattern)
: _name("system", fmt::format("like({})",
std::string_view(reinterpret_cast<const char*>(pattern.data()), pattern.size())))
, _matcher(pattern) {
_lhs_types.push_back(std::move(arg_type));
}
virtual const functions::function_name& name() const override {
return _name;
}
virtual const std::vector<data_type>& arg_types() const override {
return _lhs_types;
}
virtual const data_type& return_type() const override {
return boolean_type;
}
virtual bool is_pure() const override {
return true;
}
virtual bool is_native() const override {
return true;
}
virtual bool requires_thread() const override {
return false;
}
virtual bool is_aggregate() const override {
return false;
}
virtual void print(std::ostream& os) const override {
os << "LIKE(compiled)";
}
virtual sstring column_name(const std::vector<sstring>& column_names) const override {
return "LIKE";
}
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
auto& str_opt = parameters[0];
if (!str_opt) {
return std::nullopt;
}
bool match_result = _matcher(*str_opt);
return data_value(match_result).serialize();
}
};
expression
optimize_like(const expression& e) {
// Check for LIKE with constant pattern; replace with anonymous
// function that contains the compiled regex.
return search_and_replace(e, [] (const expression& subexpression) -> std::optional<expression> {
if (auto* binop = as_if<binary_operator>(&subexpression)) {
if (binop->op == oper_t::LIKE) {
if (auto* rhs = as_if<constant>(&binop->rhs)) {
if ((type_of(*rhs) == utf8_type || type_of(*rhs) == ascii_type) && !rhs->is_null()) {
auto pattern = to_bytes(rhs->value.view());
auto func = ::make_shared<like_constant_function>(type_of(binop->lhs), pattern);
auto args = std::vector<expression>();
args.push_back(binop->lhs);
return function_call{std::move(func), std::move(args)};
}
}
}
}
return std::nullopt;
});
}
binary_operator prepare_binary_operator(binary_operator binop, data_dictionary::database db, const schema& table_schema) {
std::optional<expression> prepared_lhs_opt = try_prepare_expression(binop.lhs, db, table_schema.ks_name(), &table_schema, {});
if (!prepared_lhs_opt) {

View File

@@ -4040,4 +4040,36 @@ BOOST_AUTO_TEST_CASE(prepare_binary_operator_with_null_rhs) {
table_schema);
}
}
}
}
BOOST_AUTO_TEST_CASE(optimized_constant_like) {
auto check = [] (expression e, std::optional<sstring> target, bool expect_optimization, std::optional<sstring> pattern_arg = {}) {
auto optimized = optimize_like(e);
bool was_optimized = find_binop(optimized, [] (const binary_operator&) { return true; }) == nullptr;
if (was_optimized != expect_optimization) {
return false;
}
auto params = std::vector({target ? make_text_raw(*target) : raw_value::make_null()});
if (pattern_arg) {
params.push_back(make_text_raw(*pattern_arg));
}
return evaluate_with_bind_variables(optimized, params) == evaluate_with_bind_variables(e, params);
};
auto target_var = make_bind_variable(0, utf8_type);
auto pattern_var = make_bind_variable(1, utf8_type);
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")), "xxyyz", true));
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")), "qxyyz", true));
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")), std::nullopt, true));
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, pattern_var), "xxyyz", false, "xx%"));
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, pattern_var), "qxyyz", false, "xx%"));
BOOST_REQUIRE(check(binary_operator(target_var, oper_t::LIKE, pattern_var), std::nullopt, false, "xx%"));
// Verify that optimization works for subexpressions, not just top-level expressions
auto complex = make_conjunction(
binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")),
// repeated for simplicity
binary_operator(target_var, oper_t::LIKE, make_text_const("xx%")));
BOOST_REQUIRE(check(std::move(complex), "xxyyz", true));
}