cql3: Reorganize orderings code

Reorganized the code that handles column ordering (ASC or DESC).
I feel that it's now clearer and easier to understand.

Added an enum that describes column ordering.
It has two possible values: ascending or descending.
It used to be a bool that was sometimes called 'reversed',
which could mean multiple things.

Instead of column.type->is_reversed() != <ordering bool>
there is now a function called are_column_select_results_reversed.

Split checking if ordering is reversed and verifying whether it's correct into two functions.
Before all of this was done by is_reversed()

This is a preparation to later allow skipping ORDER BY restrictions on some columns.
Adding this to the existing code caused it to get quite complex,
but this new version is better suited for the task.

The diff is a bit messy because I moved all ordering functions to one place,
it's better to read select_statement.cc lines 1495-1651 directly.

Signed-off-by: Jan Ciolek <jan.ciolek@scylladb.com>
This commit is contained in:
Jan Ciolek
2021-10-19 13:57:29 +02:00
parent 337906bc1c
commit f76a1cd4bf
3 changed files with 186 additions and 114 deletions

View File

@@ -486,9 +486,9 @@ whereClause returns [std::vector<cql3::relation_ptr> clause]
orderByClause[raw::select_statement::parameters::orderings_type& orderings]
@init{
bool reversed = false;
raw::select_statement::ordering ordering = raw::select_statement::ordering::ascending;
}
: c=cident (K_ASC | K_DESC { reversed = true; })? { orderings.emplace_back(c, reversed); }
: c=cident (K_ASC | K_DESC { ordering = raw::select_statement::ordering::descending; })? { orderings.emplace_back(c, ordering); }
;
jsonValue returns [expression value]

View File

@@ -71,9 +71,15 @@ namespace raw {
class select_statement : public cf_statement
{
public:
// Ordering of selected values as defined by the basic comparison order.
// Even for a column that by default has ordering 4, 3, 2, 1 ordering it in ascending order will result in 1, 2, 3, 4.
enum class ordering {
ascending,
descending
};
class parameters final {
public:
using orderings_type = std::vector<std::pair<shared_ptr<column_identifier::raw>, bool>>;
using orderings_type = std::vector<std::pair<shared_ptr<column_identifier::raw>, ordering>>;
private:
const orderings_type _orderings;
const bool _is_distinct;
@@ -101,6 +107,8 @@ public:
using result_row_type = std::vector<bytes_opt>;
using ordering_comparator_type = compare_fn<result_row_type>;
private:
using prepared_orderings_type = std::vector<std::pair<const column_definition*, ordering>>;
private:
lw_shared_ptr<const parameters> _parameters;
std::vector<::shared_ptr<selection::raw_selector>> _select_clause;
@@ -136,20 +144,29 @@ private:
/** Returns an expression for the limit or nullopt if no limit is set */
std::optional<expr::expression> prepare_limit(database& db, prepare_context& ctx, const std::optional<expr::expression>& limit);
// Checks whether it is legal to have ORDER BY in this statement
static void verify_ordering_is_allowed(const restrictions::statement_restrictions& restrictions);
void handle_unrecognized_ordering_column(const column_identifier& column) const;
// Processes ORDER BY column orderings, converts column_identifiers to column_defintions
prepared_orderings_type prepare_orderings(const schema& schema) const;
void verify_ordering_is_valid(const prepared_orderings_type&, const schema&, const restrictions::statement_restrictions& restrictions) const;
// Checks whether this ordering reverses all results.
// We only allow leaving select results unchanged or reversing them.
bool is_ordering_reversed(const prepared_orderings_type&) const;
select_statement::ordering_comparator_type get_ordering_comparator(
const prepared_orderings_type&,
selection::selection& selection,
const restrictions::statement_restrictions& restrictions);
static void validate_distinct_selection(const schema& schema,
const selection::selection& selection,
const restrictions::statement_restrictions& restrictions);
void handle_unrecognized_ordering_column(const column_identifier& column) const;
select_statement::ordering_comparator_type get_ordering_comparator(const schema& schema,
selection::selection& selection,
const restrictions::statement_restrictions& restrictions);
bool is_reversed(const schema& schema) const;
/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
void check_needs_filtering(
const restrictions::statement_restrictions& restrictions,

View File

@@ -1412,8 +1412,11 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
if (!_parameters->orderings().empty()) {
assert(!for_view);
verify_ordering_is_allowed(*restrictions);
ordering_comparator = get_ordering_comparator(*schema, *selection, *restrictions);
is_reversed_ = is_reversed(*schema);
prepared_orderings_type prepared_orderings = prepare_orderings(*schema);
verify_ordering_is_valid(prepared_orderings, *schema, *restrictions);
ordering_comparator = get_ordering_comparator(prepared_orderings, *selection, *restrictions);
is_reversed_ = is_ordering_reversed(prepared_orderings);
}
std::vector<sstring> warnings;
@@ -1502,6 +1505,159 @@ void select_statement::verify_ordering_is_allowed(const restrictions::statement_
}
}
void select_statement::handle_unrecognized_ordering_column(const column_identifier& column) const
{
if (contains_alias(column)) {
throw exceptions::invalid_request_exception(format("Aliases are not allowed in order by clause ('{}')", column));
}
throw exceptions::invalid_request_exception(format("Order by on unknown column {}", column));
}
select_statement::prepared_orderings_type select_statement::prepare_orderings(const schema& schema) const {
prepared_orderings_type prepared_orderings;
prepared_orderings.reserve(_parameters->orderings().size());
for (auto&& [column_id, column_ordering] : _parameters->orderings()) {
::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(schema);
const column_definition* def = schema.get_column_definition(column->name());
if (def == nullptr) {
handle_unrecognized_ordering_column(*column);
}
if (!def->is_clustering_key()) {
throw exceptions::invalid_request_exception(format("Order by is currently only supported on the clustered columns of the PRIMARY KEY, got {}", *column));
}
prepared_orderings.emplace_back(def, column_ordering);
}
// Uncomment this to allow specifing ORDER BY columns in any order.
// Right now specifying ORDER BY (c2 asc, c1 asc) is illegal, it can only be ORDER BY (c1 asc, c2 asc).
//
// std::sort(prepared_orderings.begin(), prepared_orderings.end(),
// [](const std::pair<const column_definition*, ordering>& a, const std::pair<const column_definition*, ordering>& b) {
// return a.first->component_index() < b.first->component_index();
// }
// );
return prepared_orderings;
}
// Checks whether this ordering causes select results on this column to be reversed.
// A clustering column can be ordered in the descending order in the table.
// Then specifying ascending order would cause the results of this column to be reverse in comparison to a standard select.
static bool are_column_select_results_reversed(const column_definition& column, select_statement::ordering column_ordering) {
if (column_ordering == select_statement::ordering::ascending) {
if (column.type->is_reversed()) {
return true;
} else {
return false;
}
} else { // descending ordering
if (column.type->is_reversed()) {
return false;
} else {
return true;
}
}
}
bool select_statement::is_ordering_reversed(const prepared_orderings_type& orderings) const {
if (orderings.empty()) {
return false;
}
return are_column_select_results_reversed(*orderings[0].first, orderings[0].second);
}
void select_statement::verify_ordering_is_valid(const prepared_orderings_type& orderings,
const schema& schema,
const restrictions::statement_restrictions& restrictions) const {
if (orderings.empty()) {
return;
}
// Verify that columns are in the same order as in schema definition
for (size_t i = 0; i + 1 < orderings.size(); i++) {
if (orderings[i].first->component_index() >= orderings[i + 1].first->component_index()) {
throw exceptions::invalid_request_exception("Order by currently only supports the ordering of columns following their declared order in the PRIMARY KEY");
}
}
// We only allow leaving select results unchanged or reversing them. Find out if they should be reversed.
bool is_reversed = is_ordering_reversed(orderings);
// Now verify that
// 1. Each column in the specified clustering prefix either has an ordering, or an EQ restriction.
// When there is an EQ restriction ordering is not needed because only a single value is possible.
// 2. The ordering leaves the selected rows unchanged or reverses the normal result.
// For that to happen the ordering must reverse all columns or none at all.
uint32_t max_ck_index = orderings.back().first->component_index();
auto orderings_iterator = orderings.begin();
for (uint32_t ck_column_index = 0; ck_column_index <= max_ck_index; ck_column_index++) {
if (orderings_iterator->first->component_index() != ck_column_index) {
// We don't have ordering for this column. Fail with an error.
const column_definition& cur_ck_column = schema.clustering_column_at(ck_column_index);
throw exceptions::invalid_request_exception(format(
"Unsupported order by relation - column {} doesn't have an ordering.",
cur_ck_column.name_as_text()));
}
// We have ordering for this column, let's see if its reversing is right.
const column_definition* cur_ck_column = orderings_iterator->first;
bool is_cur_column_reversed = are_column_select_results_reversed(*cur_ck_column, orderings_iterator->second);
if (is_cur_column_reversed != is_reversed) {
throw exceptions::invalid_request_exception(
format("Unsupported order by relation - only reversing all columns is supported, "
"but column {} has opposite ordering", cur_ck_column->name_as_text()));
}
orderings_iterator++;
}
}
select_statement::ordering_comparator_type select_statement::get_ordering_comparator(const prepared_orderings_type& orderings,
selection::selection& selection,
const restrictions::statement_restrictions& restrictions) {
if (!restrictions.key_is_in_relation()) {
return {};
}
std::vector<std::pair<uint32_t, data_type>> sorters;
sorters.reserve(orderings.size());
// If we order post-query (see orderResults), the sorted column needs to be in the ResultSet for sorting,
// even if we don't
// ultimately ship them to the client (CASSANDRA-4911).
for (auto&& [column_def, is_descending] : orderings) {
auto index = selection.index_of(*column_def);
if (index < 0) {
index = selection.add_column_for_post_processing(*column_def);
}
sorters.emplace_back(index, column_def->type);
}
return [sorters = std::move(sorters)] (const result_row_type& r1, const result_row_type& r2) mutable {
for (auto&& e : sorters) {
auto& c1 = r1[e.first];
auto& c2 = r2[e.first];
auto type = e.second;
if (bool(c1) != bool(c2)) {
return bool(c2);
}
if (c1) {
auto result = type->compare(*c1, *c2);
if (result != 0) {
return result < 0;
}
}
}
return false;
};
}
void select_statement::validate_distinct_selection(const schema& schema,
const selection::selection& selection,
const restrictions::statement_restrictions& restrictions)
@@ -1530,107 +1686,6 @@ void select_statement::validate_distinct_selection(const schema& schema,
}
}
void select_statement::handle_unrecognized_ordering_column(const column_identifier& column) const
{
if (contains_alias(column)) {
throw exceptions::invalid_request_exception(format("Aliases are not allowed in order by clause ('{}')", column));
}
throw exceptions::invalid_request_exception(format("Order by on unknown column {}", column));
}
select_statement::ordering_comparator_type
select_statement::get_ordering_comparator(const schema& schema,
selection::selection& selection,
const restrictions::statement_restrictions& restrictions)
{
if (!restrictions.key_is_in_relation()) {
return {};
}
std::vector<std::pair<uint32_t, data_type>> sorters;
sorters.reserve(_parameters->orderings().size());
// If we order post-query (see orderResults), the sorted column needs to be in the ResultSet for sorting,
// even if we don't
// ultimately ship them to the client (CASSANDRA-4911).
for (auto&& e : _parameters->orderings()) {
auto&& raw = e.first;
::shared_ptr<column_identifier> column = raw->prepare_column_identifier(schema);
const column_definition* def = schema.get_column_definition(column->name());
if (!def) {
handle_unrecognized_ordering_column(*column);
}
auto index = selection.index_of(*def);
if (index < 0) {
index = selection.add_column_for_post_processing(*def);
}
sorters.emplace_back(index, def->type);
}
return [sorters = std::move(sorters)] (const result_row_type& r1, const result_row_type& r2) mutable {
for (auto&& e : sorters) {
auto& c1 = r1[e.first];
auto& c2 = r2[e.first];
auto type = e.second;
if (bool(c1) != bool(c2)) {
return bool(c2);
}
if (c1) {
auto result = type->compare(*c1, *c2);
if (result != 0) {
return result < 0;
}
}
}
return false;
};
}
bool select_statement::is_reversed(const schema& schema) const {
assert(_parameters->orderings().size() > 0);
parameters::orderings_type::size_type i = 0;
bool is_reversed_ = false;
bool relation_order_unsupported = false;
for (auto&& e : _parameters->orderings()) {
::shared_ptr<column_identifier> column = e.first->prepare_column_identifier(schema);
bool reversed = e.second;
auto def = schema.get_column_definition(column->name());
if (!def) {
handle_unrecognized_ordering_column(*column);
}
if (!def->is_clustering_key()) {
throw exceptions::invalid_request_exception(format("Order by is currently only supported on the clustered columns of the PRIMARY KEY, got {}", *column));
}
if (i != def->component_index()) {
throw exceptions::invalid_request_exception(
"Order by currently only support the ordering of columns following their declared order in the PRIMARY KEY");
}
bool current_reverse_status = (reversed != def->type->is_reversed());
if (i == 0) {
is_reversed_ = current_reverse_status;
}
if (is_reversed_ != current_reverse_status) {
relation_order_unsupported = true;
}
++i;
}
if (relation_order_unsupported) {
throw exceptions::invalid_request_exception("Unsupported order by relation");
}
return is_reversed_;
}
/// True iff restrictions require ALLOW FILTERING despite there being no coordinator-side filtering.
static bool needs_allow_filtering_anyway(
const restrictions::statement_restrictions& restrictions,