test.py: Add the possibility to run boost test from pytest

Add the possibility to run boost test from pytest. Boost facade based on code from https://github.com/pytest-dev/pytest-cpp, but enhanced and rewritten to suite better.
2024-10-30 13:28:01 +01:00
parent 4addc039e5
commit 8ef840a1c5
11 changed files with 446 additions and 6 deletions
--- a/test/boost/README.md
+++ b/test/boost/README.md
@@ -68,6 +68,43 @@ Boost tests can also be run using test.py - which is a script that provides
 a uniform way to run all tests in scylladb.git - C++ tests, Python tests,
 etc.

+## Execution with pytest
+
+To run all tests with pytest execute 
+```bash
+pytest test/boost
+```
+
+To execute all tests in one file, provide the path to the source filename as a parameter
+```bash
+pytest test/boost/aggregate_fcts_test.cc
+```
+Since it's a normal path, autocompletion works in the terminal out of the box.
+
+To execute only one test function, provide the path to the source file and function name
+```bash
+pytest --mode dev test/boost/aggregate_fcts_test.cc::test_aggregate_avg
+```
+
+To provide a specific mode, use the next parameter `--mode dev`,
+if parameter isn't provided pytest tries to use `ninja mode_list` to find out the compiled modes.
+
+Parallel execution is controlled by `pytest-xdist` and the parameter `-n auto`.
+This command starts tests with the number of workers equal to CPU cores.
+The useful command to discover the tests in the file or directory is 
+```bash
+pytest --collect-only -q --mode dev test/boost/aggregate_fcts_test.cc
+```
+That will return all test functions in the file.
+To execute only one function from the test, you can invoke the output from the previous command.
+However, suffix for mode should be skipped.
+For example,
+output shows in the terminal something like this `test/boost/aggregate_fcts_test.cc::test_aggregate_avg.dev`.
+So to execute this specific test function, please use the next command 
+```bash
+pytest --mode dev test/boost/aggregate_fcts_test.cc::test_aggregate_avg
+```
+
 # Writing tests

 Because of the large build time and build size of each separate test
--- a/test/boost/init.py
+++ b/test/boost/init.py
--- a/test/boost/conftest.py
+++ b/test/boost/conftest.py
@@ -0,0 +1,48 @@
+#
+# Copyright (C) 2024-present ScyllaDB
+#
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+#
+import os
+import sys
+from pathlib import Path, PosixPath
+
+
+import pytest
+from pytest import Collector
+
+from test.pylib.cpp.boost.boost_facade import BoostTestFacade, COMBINED_TESTS
+from test.pylib.cpp.boost.prepare_instance import get_env_manager
+from test.pylib.cpp.common_cpp_conftest import collect_items, get_combined_tests
+from test.pylib.util import get_modes_to_run
+
+
+def pytest_collect_file(file_path: PosixPath, parent: Collector):
+    """
+    Method triggered automatically by pytest to collect files from a directory. Boost and unit have the same logic for
+    collection, the only difference in execution, and it's covered by facade
+    """
+    # One of the files in the directory has additional extensions .inc. It's not a test and will not have a binary for
+    # execution, so it should be excluded from collecting
+    if file_path.suffix == '.cc' and '.inc' not in file_path.suffixes and file_path.stem != COMBINED_TESTS.stem:
+        return collect_items(file_path, parent, facade=BoostTestFacade(parent.config, get_combined_tests(parent.session)))
+
+
+@pytest.hookimpl(wrapper=True)
+def pytest_runtestloop(session):
+    """
+    https://docs.pytest.org/en/stable/reference/reference.html#pytest.hookspec.pytest_runtestloop
+    This hook is needed to start the Minio and S3 mock servers before tests. After starting the servers, the default
+    pytest's runtestloop takes control. Finally part is responsible for stopping servers regardless of failure in the tests.
+    """
+    if session.config.getoption('collectonly'):
+        yield
+        return
+    temp_dir = Path(session.config.rootpath, '..', session.config.getoption('tmpdir'))
+    modes = get_modes_to_run(session)
+    is_worker = False
+    if 'xdist' in sys.modules:
+        is_worker = sys.modules['xdist'].is_xdist_worker(session)
+
+    with get_env_manager(temp_dir, is_worker, modes):
+        yield
--- a/test/pylib/cpp/boost/init.py
+++ b/test/pylib/cpp/boost/init.py
--- a/test/pylib/cpp/boost/boost_facade.py
+++ b/test/pylib/cpp/boost/boost_facade.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2014 Bruno Oliveira
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# 
+# Copyright (C) 2025-present ScyllaDB
+#
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+#
+from __future__ import annotations
+
+import io
+import os
+import subprocess
+from collections.abc import Sequence
+from pathlib import Path
+from xml.etree import ElementTree
+
+from test.pylib.cpp.facade import CppTestFacade, CppTestFailure, run_process
+
+TIMEOUT_DEBUG = 60 * 5 # seconds
+TIMEOUT = 60 * 2 # seconds
+COMBINED_TESTS = Path('build', 'dev', 'test', 'boost', 'combined_tests')
+
+class BoostTestFacade(CppTestFacade):
+    """
+    Facade for BoostTests that's responsible for discovering test functions and executing them correctly.
+    """
+
+    def list_tests(
+        self,
+        executable: Path,
+        no_parallel: bool,
+    ) -> tuple[bool, list[str]]:
+        """
+        Return a boolean value indicating whether the tests combined or not and the list of tests
+        """
+        if no_parallel:
+            return False, [os.path.basename(os.path.splitext(executable)[0])]
+        else:
+            if not os.path.isfile(executable):
+                return True, self.combined_suites[executable.stem]
+            args = [executable, '--list_content']
+            try:
+                output = subprocess.check_output(
+                    args,
+                    stderr=subprocess.STDOUT,
+                    universal_newlines=True,
+                )
+            except subprocess.CalledProcessError as e:
+                output = e.output
+            # --list_content produces the list of all test cases in the file. When BOOST_DATA_TEST_CASE is used it
+            # additionally produce the lines with numbers for each case preserving the function name like this:
+            # test_singular_tree_ptr_sz*
+            #     _0*
+            #     _1*
+            #     _2*
+            # however, it's only possible to run test_singular_tree_ptr_sz that executes all test cases
+            # this line catches only test function name ignoring unrelated lines like '_0'
+            # Note: this ignores any test case starting with a '_' symbol
+            return False, [case[:-1] for case in output.splitlines() if
+                         case.endswith('*') and not case.strip().startswith('_')]
+
+    def run_test(
+        self,
+        executable: Path,
+        original_name: str,
+        test_name: str,
+        mode: str,
+        file_name: Path,
+        test_args:Sequence[str] = (),
+    ) -> tuple[list[CppTestFailure], str] | tuple[None, str]:
+        def read_file(name: Path) -> str:
+            try:
+                with io.open(name) as f:
+                    return f.read()
+            except IOError:
+                return ''
+
+        timeout = TIMEOUT_DEBUG if mode=='debug' else TIMEOUT
+        root_log_dir = self.temp_dir / mode / 'pytest'
+        log_xml = root_log_dir / f"{test_name}.log"
+        stdout_file_path = root_log_dir/ f"{test_name}_stdout.log"
+        stderr_file_path = root_log_dir / f"{test_name}_stderr.log"
+        report_xml = root_log_dir / f"{test_name}.xml"
+        args = [ str(executable),
+                 '--output_format=XML',
+                 f"--report_sink={report_xml}",
+                 f"--log_sink={log_xml}",
+                 '--catch_system_errors=no',
+                 '--color_output=false',
+                 ]
+        if original_name != Path(executable).stem:
+            if executable.stem == COMBINED_TESTS.stem:
+                args.append(f"--run_test={file_name.stem}/{original_name}")
+            else:
+                args.append(f"--run_test={original_name}")
+        # Tests are written in the way that everything after '--' passes to the test itself rather than to the test framework
+        args.append('--')
+        args.extend(test_args)
+        os.chdir(self.temp_dir.parent)
+        p, stderr, stdout = run_process(args, timeout)
+
+        with open(stdout_file_path, 'w') as fd:
+            fd.write(stdout)
+        with open(stderr_file_path, 'w') as fd:
+            fd.write(stderr)
+        log = read_file(log_xml)
+        report = read_file(report_xml)
+
+        results = self._parse_log(log=log)
+
+        if p.returncode != 0:
+            msg = (
+                'working_dir: {working_dir}\n'
+                'Internal Error: calling {executable} '
+                'for test {test_id} failed (return_code={return_code}):\n'
+                'output file:{stdout}\n'
+                'std error file:{stderr}\n'
+                'log:{log}\n'
+                'report:{report}\n'
+                'command to repeat:{command}'
+            )
+            failure = CppTestFailure(
+                file_name.name,
+                line_num=results[0].line_num,
+                contents=msg.format(
+                    working_dir=os.getcwd(),
+                    executable=executable,
+                    test_id=test_name,
+                    stdout=stdout_file_path.absolute(),
+                    stderr=stderr_file_path.absolute(),
+                    log=log,
+                    report=report,
+                    command=' '.join(p.args),
+                    return_code=p.returncode,
+                ),
+            )
+            return [failure], stdout
+
+        if results:
+            return results, stdout
+
+        return None, stdout
+
+    def _parse_log(self, log: str) -> list[CppTestFailure]:
+        """
+        Parse the 'log' section produced by BoostTest.
+
+        This is always an XML file, and from this it's possible to parse most of the
+        failures possible when running BoostTest.
+        """
+        parsed_elements = []
+
+        log_root = ElementTree.fromstring(log)
+
+        if log_root is not None:
+            parsed_elements.extend(log_root.findall('Exception'))
+            parsed_elements.extend(log_root.findall('Error'))
+            parsed_elements.extend(log_root.findall('FatalError'))
+
+        result = []
+        for elem in parsed_elements:
+            last_checkpoint = elem.find('LastCheckpoint')
+            if last_checkpoint:
+                elem = last_checkpoint
+            file_name = elem.attrib['file']
+            line_num = int(elem.attrib['line'])
+            result.append(CppTestFailure(file_name, line_num, elem.text or ''))
+        return result
--- a/test/pylib/cpp/boost/prepare_instance.py
+++ b/test/pylib/cpp/boost/prepare_instance.py
@@ -0,0 +1,162 @@
+#
+# Copyright (C) 2025-present ScyllaDB
+#
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+#
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import shutil
+import time
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any, Generator
+
+from test.pylib.host_registry import HostRegistry
+from test.pylib.minio_server import MinioServer
+from test.pylib.s3_proxy import S3ProxyServer
+from test.pylib.s3_server_mock import MockS3Server
+from test.pylib.util import LogPrefixAdapter
+
+
+class PrepareChildProcessEnv:
+    """
+    Class responsible to get environment variables from the main thread through the shared file and set them for the process
+    """
+    def __init__(self, env_file: Path):
+        self.env_file = env_file
+
+    def prepare(self) -> None:
+        """
+        Read the environment variables for S3 and MinIO from the file and set them for the process.
+        """
+        timeout = 10
+        start_time = time.time()
+        sleep_for = 0.01
+        while True:
+            if os.path.exists(self.env_file):
+                with open(self.env_file, 'r') as file:
+                    for line in file.readlines():
+                        key, value = line.strip().split('=', 1)
+                        os.environ[key] = value
+                break
+
+            if time.time() - start_time > timeout:
+                raise TimeoutError(f"Timeout waiting for file {self.env_file}")
+            # Sleep needed to wait when the controller will create a file with environment variables.
+            # Without sleep checking of the file existence will be too fast,
+            # so it will finish before the file is created
+            time.sleep(sleep_for)
+            sleep_for *=sleep_for
+
+    def cleanup(self) -> None:
+        """
+        Fake method to have the same interfaces with Controller class.
+        """
+        pass
+
+    def __enter__(self):
+        self.prepare()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.cleanup()
+
+
+class PrepareMainProcessEnv:
+    """
+    A class responsible for starting additional services needed by tests.
+
+    It starts up a Minio server and an S3 mock server.
+    The environment settings are saved to a file for later consumption by child processes.
+    Class ensures that the necessary subdirectories exist or clean it if it exists
+    """
+    def __init__(self, temp_dir: Path, modes: list[str], env_file: Path):
+        self.temp_dir = temp_dir
+        pytest_dirs = [self.temp_dir / mode / 'pytest' for mode in modes]
+        for directory in [self.temp_dir, *pytest_dirs]:
+            if not directory.exists():
+                os.makedirs(directory, exist_ok=True)
+            else:
+                shutil.rmtree(directory)
+        self.env_file = env_file
+        hosts = HostRegistry()
+        self.loop = asyncio.new_event_loop()
+        address_minio = self.loop.run_until_complete(hosts.lease_host())
+        address_s3_mock = self.loop.run_until_complete(hosts.lease_host())
+        self.address_s3_proxy = self.loop.run_until_complete(hosts.lease_host())
+        self.minio = MinioServer(self.temp_dir, address_minio, LogPrefixAdapter(logging.getLogger('minio'), {'prefix': 'minio'}))
+        self.mock_s3 = MockS3Server(address_s3_mock, 2012,
+                                    LogPrefixAdapter(logging.getLogger('s3_mock'), {'prefix': 's3_mock'}))
+        # S3 proxy initialized later because it needs to know Minis address and port that will be available only after
+        # Minio will start
+        self.proxy_s3 = None
+
+    def prepare(self) -> None:
+        """
+        Start the S3 mock server and MinIO for the tests.
+        Create a file with environment variables for connecting to them.
+        """
+
+        tasks = [
+            self.loop.create_task(self.minio.start()),
+            self.loop.create_task(self.mock_s3.start()),
+        ]
+        self.loop.run_until_complete(asyncio.gather(*tasks))
+        envs = self.minio.get_envs_settings()
+        envs.update(self.mock_s3.get_envs_settings())
+        minio_uri = "http://" + envs[self.minio.ENV_ADDRESS] + ":" + envs[self.minio.ENV_PORT]
+        self.proxy_s3 = S3ProxyServer(self.address_s3_proxy, 9002, minio_uri, 3, int(time.time()),
+                                      LogPrefixAdapter(logging.getLogger('s3_proxy'), {'prefix': 's3_proxy'}))
+        self.loop.run_until_complete(self.proxy_s3.start())
+        envs.update(self.proxy_s3.get_envs_settings())
+        with open(self.env_file, 'w') as file:
+            for key, value in envs.items():
+                file.write(f"{key}={value}\n")
+
+    def cleanup(self) -> None:
+        """
+        Stop the S3 mock server and MinIO
+        Remove the file with environment variables to not mess for consecutive runs.
+        """
+        tasks = [
+            self.loop.create_task(self.minio.stop()),
+            self.loop.create_task(self.mock_s3.stop()),
+            self.loop.create_task(self.proxy_s3.stop()),
+        ]
+        self.loop.run_until_complete(asyncio.gather(*tasks))
+        if os.path.exists(self.env_file):
+            self.env_file.unlink()
+
+    def __enter__(self):
+        try:
+            self.prepare()
+        except Exception:
+            self.cleanup()
+            raise
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.cleanup()
+
+@contextmanager
+def get_env_manager(temp_dir: Path, is_worker: bool, modes: list[str]) -> Generator[None, Any, None]:
+    """
+    xdist helps to execute test in parallel.
+    For that purpose it creates one main controller and workers.
+    Pytest itself doesn't know if it's a worker or controller, so it will execute all fixtures and methods.
+    Tests need S3 mock server and minio to start only once for the whole run, since they can share the one instance and
+    share the environment variables with workers.
+
+    So the part of starting the servers executes on non-workers' processes.
+    That means when xdist isn't used, servers start as intended in the main process.
+    Tests on workers should know the endpoints of the servers, so the controller prepares this information.
+    According classes responsible for configuration controller and workers.
+    """
+    env_file = Path(f"{temp_dir}/services_envs").absolute()
+    if is_worker:
+        with PrepareChildProcessEnv(env_file):
+            yield
+    else:
+        with PrepareMainProcessEnv(temp_dir, modes, env_file):
+            yield
--- a/test/pylib/cpp/common_cpp_conftest.py
+++ b/test/pylib/cpp/common_cpp_conftest.py
@@ -12,6 +12,7 @@ from pathlib import Path, PosixPath
 import yaml
 from pytest import Collector

+from test.pylib.cpp.boost.boost_facade import COMBINED_TESTS
 from test.pylib.cpp.facade import CppTestFacade
 from test.pylib.cpp.item import CppFile
 from test.pylib.util import get_modes_to_run
@@ -37,7 +38,7 @@ DEFAULT_ARGS = [
 ]


-def get_disabled_tests(config: dict, modes: [str]) -> dict[str, set[str]]:
+def get_disabled_tests(config: dict, modes: list[str]) -> dict[str, set[str]]:
    """
    Get the dict with disabled tests.
    Pytest spawns one process, so all modes should be handled there instead one by one as test.py does.
@@ -111,7 +112,7 @@ def collect_items(file_path: PosixPath, parent: Collector, facade: CppTestFacade
@cache
 def get_combined_tests(session):
    suites = collections.defaultdict()
-    executable = get_root_path(session) / 'combined_tests'
+    executable = get_root_path(session) / COMBINED_TESTS
    args = [executable, '--list_content']

    output = subprocess.check_output(
--- a/test/pylib/cpp/item.py
+++ b/test/pylib/cpp/item.py
@@ -32,6 +32,7 @@ import pytest
 from _pytest._code.code import TerminalRepr, ReprFileLocation
 from _pytest._io import TerminalWriter

+from test.pylib.cpp.boost.boost_facade import COMBINED_TESTS
 from test.pylib.cpp.facade import CppTestFailure, CppTestFailureList, CppTestFacade


@@ -140,7 +141,7 @@ class CppFile(pytest.File):
            executable = Path(f'{self.project_root}/build/{mode}/test/{self.path.parent.name}/{test_name}')
            combined, tests = self.facade.list_tests(executable, self.no_parallel_run)
            if combined:
-                executable = executable.parent / 'combined_tests'
+                executable = executable.parent / COMBINED_TESTS.stem
            for test_name in tests:
                if '/' in test_name:
                    test_name = test_name.replace('/', '_')
--- a/test/pylib/report_plugin.py
+++ b/test/pylib/report_plugin.py
@@ -17,7 +17,7 @@ class ReportPlugin:

    # Pytest hook to modify test name to include mode and run_id
    def pytest_configure(self, config):
-        # getting build_mode is needed for the cases when there will be no mode provided
+        # getting build_mode in two steps is needed for the cases when no mode parameter is provided
        self.build_mode = config.getoption("modes")
        if self.build_mode:
            self.build_mode = self.build_mode[0]
--- a/test/pylib/util.py
+++ b/test/pylib/util.py
@@ -270,17 +270,20 @@ def ninja(target):


@cache
-def get_configured_modes():
+def get_configured_modes(root_dir=None):
+    if root_dir:
+        os.chdir(root_dir)
    out = ninja('mode_list')
    # [1/1] List configured modes
    # debug release dev
    return re.sub(r'.* List configured modes\n(.*)\n', r'\1',
                            out, count=1, flags=re.DOTALL).split('\n')[-1].split(' ')

+
 def get_modes_to_run(session) -> list[str]:
    modes = session.config.getoption('modes')
    if not modes:
        modes = get_configured_modes(root_dir=pathlib.Path(session.config.rootpath).parent)
    if not modes:
        raise RuntimeError('No modes configured. Please run ./configure.py first')
-    return modes
+    return modes
--- a/test/pytest.ini
+++ b/test/pytest.ini
@@ -10,6 +10,7 @@ markers =
    without_scylla: run without attaching to a scylla process
    enable_tablets: create keyspace with tablets enabled or disabled
    repair: tests for repair
+    cpp: marker for c++ tests
 norecursedirs = manual perf lib
 # Ignore warnings about HTTPS requests without certificate verification
 # (see issue #15287). Pytest breaks urllib3.disable_warnings() in conftest.py,
@@ -26,6 +27,7 @@ norecursedirs = manual perf lib
 filterwarnings =
    ignore::urllib3.exceptions.InsecureRequestWarning
    ignore:record_property is incompatible with junit_family:pytest.PytestWarning
+    ignore::DeprecationWarning:importlib._bootstrap

 tmp_path_retention_count = 1
 tmp_path_retention_policy = failed