sstable: limit compression chunk size to 128 KB
The chunk size used in sstable compression can be set when creating a table, using the "chunk_length_in_kb" parameter. It can be any power-of-two multiple of 1KB. Very large compression chunks are not useful - they offer diminishing returns on compression ratio, and require very large memory buffers and reading a very large amount of disk data just to read a small row. In fact, small chunks are recommended - Scylla defaults to 4 KB chunks, and Cassandra lowered their default from 64 KB (in Cassandra 3) to 16 KB (in Cassandra 4). Therefore, allowing arbitrarily large chunk sizes is just asking for trouble. Today, a user can ask for a 1 GB chunk size, and crash or hang Scylla when it runs out of memory. So in this patch we add a hard limit of 128 KB for the chunk size - anything larger is refused. Fixes #9933 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Closes #14267
This commit is contained in:
committed by
Botond Dénes
parent
f014ccf369
commit
8a9de08510
@@ -147,6 +147,12 @@ void compression_parameters::validate() {
|
||||
throw exceptions::configuration_exception(
|
||||
fmt::format("{}/{} must be a power of 2.", CHUNK_LENGTH_KB, CHUNK_LENGTH_KB_ERR));
|
||||
}
|
||||
// Excessive _chunk_length is pointless and can lead to allocation
|
||||
// failures (see issue #9933)
|
||||
if (chunk_length > 128 * 1024) {
|
||||
throw exceptions::configuration_exception(
|
||||
fmt::format("{}/{} must be 128 or less.", CHUNK_LENGTH_KB, CHUNK_LENGTH_KB_ERR));
|
||||
}
|
||||
}
|
||||
if (_crc_check_chance && (_crc_check_chance.value() < 0.0 || _crc_check_chance.value() > 1.0)) {
|
||||
throw exceptions::configuration_exception(sstring(CRC_CHECK_CHANCE) + " must be between 0.0 and 1.0.");
|
||||
|
||||
@@ -711,10 +711,10 @@ available:
|
||||
LZ4Compressor, SnappyCompressor, and DeflateCompressor.
|
||||
A custom compressor can be provided by specifying the full class
|
||||
name as a “string constant”:#constants.
|
||||
``chunk_length_in_kb`` 4KB On disk SSTables are compressed by block (to allow random reads). This
|
||||
``chunk_length_in_kb`` 4 On disk SSTables are compressed by block (to allow random reads). This
|
||||
defines the size (in KB) of the block. Bigger values may improve the
|
||||
compression rate, but increases the minimum size of data to be read from disk
|
||||
for a read.
|
||||
for a read. Allowed values are powers of two between 1 and 128.
|
||||
========================= =============== =============================================================================
|
||||
|
||||
.. ``crc_check_chance`` 1.0 When compression is enabled, each compressed block includes a checksum of
|
||||
@@ -991,4 +991,4 @@ For example::
|
||||
.. distributed under the License is distributed on an "AS IS" BASIS,
|
||||
.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
.. See the License for the specific language governing permissions and
|
||||
.. limitations under the License.
|
||||
.. limitations under the License.
|
||||
|
||||
@@ -77,11 +77,20 @@ def test_chunk_length_invalid(cql, test_keyspace, garbage):
|
||||
# result in unbounded allocations and potentially crashing Scylla. Therefore,
|
||||
# there ought to be some limit for the configured chunk length. Let's check it
|
||||
# by trying a ridiculously large value, which shouldn't be legal.
|
||||
# This test fails on Cassandra, which doesn't have protection against huge
|
||||
# chunk sizes, so the test is marked a "cassandra_bug".
|
||||
# Reproduces #9933.
|
||||
@pytest.mark.skip(reason="#9933")
|
||||
def test_huge_chunk_length(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': 1048576 }") as table:
|
||||
# Write something and flush it, to have sstable compression actually
|
||||
# be used
|
||||
cql.execute(f'INSERT INTO {table} (p, v) VALUES (1, 2)')
|
||||
nodetool.flush(cql, table)
|
||||
def test_huge_chunk_length(cql, test_keyspace, cassandra_bug):
|
||||
with pytest.raises(ConfigurationException, match='chunk_length_in_kb'):
|
||||
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': 1048576 }") as table:
|
||||
# At this point, the test already failed, as we expected the table
|
||||
# creation to have failed with ConfigurationException. But if we
|
||||
# reached here, let's really demonstrate the bug - write
|
||||
# something and flush it, to have sstable compression actually
|
||||
# be used.
|
||||
cql.execute(f'INSERT INTO {table} (p, v) VALUES (1, 2)')
|
||||
nodetool.flush(cql, table)
|
||||
# Also check the same for ALTER TABLE
|
||||
with new_test_table(cql, test_keyspace, "p int primary key, v int") as table:
|
||||
with pytest.raises(ConfigurationException, match='chunk_length_in_kb'):
|
||||
cql.execute("ALTER TABLE " + table + " with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': 1048576 }")
|
||||
|
||||
Reference in New Issue
Block a user