Merge "lwt: support LIKE operator in conditional expressions" from Alejo

Support LIKE operator condition on column expressions.

NOTE: following the existing code, the LIKE pattern value is converted
      to raw bytes and passed straight as bytes_view to like_matcher
      without type checking; it should be checked/sanitized by caller.

Refs: #5777

Branch URL: https://github.com/alecco/scylla/tree/as_like_condition_2

Tests: unit ({dev}), unit ({debug})

NOTE: fail for unrelated test test_null_value_tuple_floating_types_and_uuids
This commit is contained in:
Tomasz Grabiec
2020-03-02 17:36:57 +01:00
7 changed files with 304 additions and 14 deletions

View File

@@ -47,6 +47,7 @@
#include <boost/range/algorithm_ext/push_back.hpp>
#include "types/map.hh"
#include "types/list.hh"
#include "utils/like_matcher.hh"
namespace {
@@ -152,7 +153,7 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
// - a predicate can operate on a column or a collection element, which must always be
// on the right side: "a = 3" or "collection['key'] IN (1,2,3)"
// - parameter markers are allowed on the right hand side only
// - only <, >, >=, <=, != and IN predicates are supported.
// - only <, >, >=, <=, !=, LIKE, and IN predicates are supported.
// - NULLs and missing values are treated differently from the WHERE clause:
// a term or cell in IF clause is allowed to be NULL or compared with NULL,
// and NULL value is treated just like any other value in the domain (there is no
@@ -245,6 +246,26 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
// directly to compare.
return is_satisfied_by(_op, *cell_value->type(), *column.type, *cell_value, to_bytes(param));
}
if (_op == operator_type::LIKE) {
if (cell_value == nullptr) {
return false;
}
if (_matcher) {
return (*_matcher)(bytes_view(cell_value->serialize_nonnull()));
} else {
auto param = _value->bind_and_get(options); // LIKE pattern
if (param.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid 'unset' value in LIKE pattern");
}
if (param.is_null()) {
throw exceptions::invalid_request_exception("Invalid NULL value in LIKE pattern");
}
like_matcher matcher(to_bytes(param));
return matcher(bytes_view(cell_value->serialize_nonnull()));
}
}
assert(_op == operator_type::IN);
std::vector<bytes_opt> in_values;
@@ -306,8 +327,27 @@ column_condition::raw::prepare(database& db, const sstring& keyspace, const colu
if (_op.is_compare()) {
validate_operation_on_durations(*receiver.type, _op);
return column_condition::condition(receiver, collection_element_term, _value->prepare(db, keyspace, value_spec), _op);
return column_condition::condition(receiver, collection_element_term,
_value->prepare(db, keyspace, value_spec), nullptr, _op);
}
if (_op == operator_type::LIKE) {
auto literal_term = dynamic_pointer_cast<constants::literal>(_value);
if (literal_term) {
// Pass matcher object
const sstring& pattern = literal_term->get_raw_text();
return column_condition::condition(receiver, collection_element_term,
_value->prepare(db, keyspace, value_spec),
std::make_unique<like_matcher>(bytes_view(reinterpret_cast<const int8_t*>(pattern.begin()), pattern.size())),
_op);
} else {
// Pass through rhs value, matcher object built on execution
// TODO: caller should validate parametrized LIKE pattern
return column_condition::condition(receiver, collection_element_term,
_value->prepare(db, keyspace, value_spec), nullptr, _op);
}
}
if (_op != operator_type::IN) {
throw exceptions::invalid_request_exception(format("Unsupported operator type {} in a condition ", _op));
}

View File

@@ -44,6 +44,7 @@
#include "cql3/term.hh"
#include "cql3/abstract_marker.hh"
#include "cql3/operator.hh"
#include "utils/like_matcher.hh"
namespace cql3 {
@@ -65,14 +66,17 @@ private:
::shared_ptr<term> _value;
// List of terminals for "a IN (value, value, ...)"
std::vector<::shared_ptr<term>> _in_values;
const std::unique_ptr<like_matcher> _matcher;
const operator_type& _op;
public:
column_condition(const column_definition& column, ::shared_ptr<term> collection_element,
::shared_ptr<term> value, std::vector<::shared_ptr<term>> in_values, const operator_type& op)
::shared_ptr<term> value, std::vector<::shared_ptr<term>> in_values,
std::unique_ptr<like_matcher> matcher, const operator_type& op)
: column(column)
, _collection_element(std::move(collection_element))
, _value(std::move(value))
, _in_values(std::move(in_values))
, _matcher(std::move(matcher))
, _op(op)
{
if (op != operator_type::IN) {
@@ -94,18 +98,23 @@ public:
// and evaluate the condition.
bool applies_to(const data_value* cell_value, const query_options& options) const;
// Helper constructor wrapper for "IF col['key'] = 'foo'" or "IF col = 'foo'" */
/**
* Helper constructor wrapper for
* "IF col['key'] = 'foo'"
* "IF col = 'foo'"
* "IF col LIKE <pattern>"
*/
static ::shared_ptr<column_condition> condition(const column_definition& def, ::shared_ptr<term> collection_element,
::shared_ptr<term> value, const operator_type& op) {
::shared_ptr<term> value, std::unique_ptr<like_matcher> matcher, const operator_type& op) {
return ::make_shared<column_condition>(def, std::move(collection_element), std::move(value),
std::vector<::shared_ptr<term>>{}, op);
std::vector<::shared_ptr<term>>{}, std::move(matcher), op);
}
// Helper constructor wrapper for "IF col IN ... and IF col['key'] IN ... */
static ::shared_ptr<column_condition> in_condition(const column_definition& def, ::shared_ptr<term> collection_element,
::shared_ptr<term> in_marker, std::vector<::shared_ptr<term>> in_values) {
return ::make_shared<column_condition>(def, std::move(collection_element), std::move(in_marker),
std::move(in_values), operator_type::IN);
std::move(in_values), nullptr, operator_type::IN);
}
class raw final {
@@ -130,7 +139,13 @@ public:
, _op(op)
{ }
/** A condition on a column or collection element. For example: "IF col['key'] = 'foo'" or "IF col = 'foo'" */
/**
* A condition on a column or collection element.
* For example:
* "IF col['key'] = 'foo'"
* "IF col = 'foo'"
* "IF col LIKE 'foo%'"
*/
static ::shared_ptr<raw> simple_condition(::shared_ptr<term::raw> value, ::shared_ptr<term::raw> collection_element,
const operator_type& op) {
return ::make_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{},

View File

@@ -3948,6 +3948,8 @@ void require_rows(cql_test_env& e,
}
}
auto B(bool x) { return boolean_type->decompose(x); }
auto I(int32_t x) { return int32_type->decompose(x); }
auto L(int64_t x) { return long_type->decompose(x); }
@@ -5064,3 +5066,66 @@ SEASTAR_TEST_CASE(test_null_value_tuple_floating_types_and_uuids) {
test_for_single_type(timeuuid_type, utils::UUID("00000000-0000-1000-0000-000000000000"));
});
}
static std::unique_ptr<cql3::query_options> q_serial_opts() {
const auto& so = cql3::query_options::specific_options::DEFAULT;
auto qo = std::make_unique<cql3::query_options>(
db::consistency_level::ONE,
infinite_timeout_config,
std::vector<cql3::raw_value>{},
// Ensure (optional) serial consistency is always specified.
cql3::query_options::specific_options{
so.page_size,
so.state,
db::consistency_level::SERIAL,
so.timestamp,
}
);
return qo;
}
// Run parametrized query on the appropriate shard
static void prepared_on_shard(cql_test_env& e, const sstring& query,
std::vector<bytes> params,
std::vector<std::vector<bytes_opt>> expected_rows) {
auto execute = [&] () mutable {
return seastar::async([&] () mutable {
auto id = e.prepare(query).get0();
std::vector<cql3::raw_value> raw_values;
for (auto& param : params) {
raw_values.emplace_back(cql3::raw_value::make_value(param));
}
auto qo = q_serial_opts();
auto msg = e.execute_prepared(id, raw_values).get0();
if (!msg->move_to_shard()) {
assert_that(msg).is_rows().with_rows_ignore_order(expected_rows);
}
return make_foreign(msg);
});
};
auto msg = execute().get0();
if (msg->move_to_shard()) {
unsigned shard = *msg->move_to_shard();
smp::submit_to(shard, std::move(execute)).get();
}
}
SEASTAR_TEST_CASE(test_like_parameter_marker) {
return do_with_cql_env_thread([] (cql_test_env& e) {
cquery_nofail(e, "CREATE TABLE t (pk int PRIMARY KEY, col text)").get();
cquery_nofail(e, "INSERT INTO t (pk, col) VALUES (1, 'aaa')").get();
cquery_nofail(e, "INSERT INTO t (pk, col) VALUES (2, 'bbb')").get();
cquery_nofail(e, "INSERT INTO t (pk, col) VALUES (3, 'ccc')").get();
const sstring query("UPDATE t SET col = ? WHERE pk = ? IF col LIKE ?");
prepared_on_shard(e, query, {T("err"), I(9), T("e%")}, {{B(false), {}}});
prepared_on_shard(e, query, {T("err"), I(9), T("e%")}, {{B(false), {}}});
prepared_on_shard(e, query, {T("chg"), I(1), T("a%")}, {{B(true), "aaa"}});
prepared_on_shard(e, query, {T("err"), I(1), T("a%")}, {{B(false), "chg"}});
prepared_on_shard(e, query, {T("chg"), I(2), T("b%")}, {{B(true), "bbb"}});
prepared_on_shard(e, query, {T("err"), I(1), T("a%")}, {{B(false), "chg"}});
});
}

View File

@@ -0,0 +1,32 @@
create table t (pk int primary key, c text);
insert into t (pk, c) values (1, 'abc');
insert into t (pk, c) values (2, 'bcd');
insert into t (pk, c) values (3, 'cde');
-- match
update t set c = 'chg' where pk = 1 if c like 'a%';
update t set c = 'chg' where pk = 2 if c like 'b%';
update t set c = 'chg' where pk = 3 if c like 'c%';
-- null value
insert into t (pk, c) values (3, null);
update t set c = 'error' where pk = 3 if c like 'a%';
-- unset value
insert into t json '{ "pk": 4 }' default unset;
update t set c = 'err' where pk = 4 if c like 'a%';
-- empty pattern
update t set c = 'err' where pk = 1 if c like '';
-- invalid pattern type
update t set c = 'err' where pk = 1 if c like 1;
update t set c = 'err' where pk = 1 if c like null;
update t set c = 'err' where pk = 1 if c like bigintAsBlob(1);
-- int column
create table ti (pk int primary key, c int);
insert into ti (pk, c) values (1, 1);
update ti set c = 2 where pk = 1 if c like 'a%';
-- map column
create table tm (pk int primary key, m map<int, text>);
insert into tm (pk, m) values (1, { 1: 'abc' });
update tm set m = { 2: 'error' } where pk = 1 if m like 'a%';
-- blob column
create table tb (pk int primary key, b blob);
insert into tb (pk, b) values (1, bigintAsBlob(1));
update tb set b = bigintAsBlob(2) where pk = 1 if b like 'a%';

View File

@@ -0,0 +1,144 @@
create table t (pk int primary key, c text);
{
"status" : "ok"
}
insert into t (pk, c) values (1, 'abc');
{
"status" : "ok"
}
insert into t (pk, c) values (2, 'bcd');
{
"status" : "ok"
}
insert into t (pk, c) values (3, 'cde');
{
"status" : "ok"
}
-- match
update t set c = 'chg' where pk = 1 if c like 'a%';
{
"rows" :
[
{
"[applied]" : "true",
"c" : "\"abc\""
}
]
}
update t set c = 'chg' where pk = 2 if c like 'b%';
{
"rows" :
[
{
"[applied]" : "true",
"c" : "\"bcd\""
}
]
}
update t set c = 'chg' where pk = 3 if c like 'c%';
{
"rows" :
[
{
"[applied]" : "true",
"c" : "\"cde\""
}
]
}
-- null value
insert into t (pk, c) values (3, null);
{
"status" : "ok"
}
update t set c = 'error' where pk = 3 if c like 'a%';
{
"rows" :
[
{
"[applied]" : "false"
}
]
}
-- unset value
insert into t json '{ "pk": 4 }' default unset;
{
"status" : "ok"
}
update t set c = 'err' where pk = 4 if c like 'a%';
{
"rows" :
[
{
"[applied]" : "false"
}
]
}
-- empty pattern
update t set c = 'err' where pk = 1 if c like '';
{
"rows" :
[
{
"[applied]" : "false",
"c" : "\"chg\""
}
]
}
-- invalid pattern type
update t set c = 'err' where pk = 1 if c like 1;
{
"message" : "exceptions::invalid_request_exception (Invalid INTEGER constant (1) for \"c\" of type text)",
"status" : "error"
}
update t set c = 'err' where pk = 1 if c like null;
{
"message" : "exceptions::invalid_request_exception (Invalid NULL value in LIKE pattern)",
"status" : "error"
}
update t set c = 'err' where pk = 1 if c like bigintAsBlob(1);
{
"message" : "exceptions::invalid_request_exception (Type error: cannot assign result of function system.bigintasblob (type blob) to c (type text))",
"status" : "error"
}
-- int column
create table ti (pk int primary key, c int);
{
"status" : "ok"
}
insert into ti (pk, c) values (1, 1);
{
"status" : "ok"
}
update ti set c = 2 where pk = 1 if c like 'a%';
{
"message" : "exceptions::invalid_request_exception (Invalid STRING constant (a%) for \"c\" of type int)",
"status" : "error"
}
-- map column
create table tm (pk int primary key, m map<int, text>);
{
"status" : "ok"
}
insert into tm (pk, m) values (1, { 1: 'abc' });
{
"status" : "ok"
}
update tm set m = { 2: 'error' } where pk = 1 if m like 'a%';
{
"message" : "exceptions::invalid_request_exception (Invalid STRING constant (a%) for \"m\" of type map<int, text>)",
"status" : "error"
}
-- blob column
create table tb (pk int primary key, b blob);
{
"status" : "ok"
}
insert into tb (pk, b) values (1, bigintAsBlob(1));
{
"status" : "ok"
}
update tb set b = bigintAsBlob(2) where pk = 1 if b like 'a%';
{
"message" : "exceptions::invalid_request_exception (Invalid STRING constant (a%) for \"b\" of type blob)",
"status" : "error"
}

View File

@@ -74,7 +74,6 @@ insert into lwt (a, b, c) values (1, {1:1, 2:2}, 3);
-- LWT restrictions are a superposition of modification statement
-- restrictions
update lwt set c=3 where a=1 and b contains 1 if c=1;
update lwt set c=3 where a=1 if c like 'asd';
drop table lwt;

View File

@@ -206,11 +206,6 @@ update lwt set c=3 where a=1 and b contains 1 if c=1;
"message" : "exceptions::invalid_request_exception (Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering)",
"status" : "error"
}
update lwt set c=3 where a=1 if c like 'asd';
{
"message" : "exceptions::invalid_request_exception (Unsupported operator type LIKE in a condition )",
"status" : "error"
}
drop table lwt;
{