Merge 'Add code coverage support' from Eliran Sinvani

This mini-set includes code coverage support for ScyllaDB, it provides: 1. Support for building ScyllaDB with coverage support. 2. Utilities for processing coverage profiling data 3. test.py support for generation and processing of coverage profiling into an lcov trace files which can later be used to produce HTML or textual coverage reports. Refs #16323 Closes scylladb/scylladb#16784 * github.com:scylladb/scylladb: Add code coverage documentation test.py: support code coverage code coverage: Add libraries for coverage handling test.py: support --coverage and --coverage-mode configure.py support coverage profiles on standrad build modes
2024-01-19 15:27:44 +02:00
parent e62114214f 32d8dadf1a
commit b50d9bb802
10 changed files with 3250 additions and 14 deletions
--- a/configure.py
+++ b/configure.py
@@ -778,8 +778,11 @@ arg_parser.add_argument('--list-artifacts', dest='list_artifacts', action='store
 arg_parser.add_argument('--date-stamp', dest='date_stamp', type=str,
                        help='Set datestamp for SCYLLA-VERSION-GEN')
 arg_parser.add_argument('--use-cmake', action='store_true', help='Use CMake as the build system')
+arg_parser.add_argument('--coverage', action = 'store_true', help = 'Compile scylla with coverage instrumentation')
 args = arg_parser.parse_args()

+PROFILES_LIST_FILE_NAME = "coverage_sources.list"
+
 if args.list_artifacts:
    for artifact in sorted(all_artifacts):
        print(artifact)
@@ -1418,6 +1421,15 @@ tests_not_using_seastar_test_framework = set([
    'test/unit/cross_shard_barrier_test',
 ]) | pure_boost_tests

+
+COVERAGE_INST_FLAGS = ['-fprofile-instr-generate', '-fcoverage-mapping', f'-fprofile-list=./{PROFILES_LIST_FILE_NAME}']
+if args.coverage:
+    for _, mode in filter(lambda m: m[0] != "coverage", modes.items()):
+        mode['cxx_ld_flags'] += ' ' + ' '.join(COVERAGE_INST_FLAGS)
+        mode['cxx_ld_flags'] = mode['cxx_ld_flags'].strip()
+        mode['cxxflags'] += ' ' + ' '.join(COVERAGE_INST_FLAGS)
+        mode['cxxflags'] = mode['cxxflags'].strip()
+
 for t in tests_not_using_seastar_test_framework:
    if t not in scylla_tests:
        raise Exception("Test %s not found in scylla_tests" % (t))
@@ -1671,13 +1683,19 @@ def real_relpath(path, start):
 def configure_seastar(build_dir, mode, mode_config):
    seastar_build_dir = os.path.join(build_dir, mode, 'seastar')

+    seastar_cxx_ld_flags = mode_config['cxx_ld_flags']
+    # We want to "undo" coverage for seastar if we have it enabled.
+    if args.coverage:
+        for flag in COVERAGE_INST_FLAGS:
+            seastar_cxx_ld_flags = seastar_cxx_ld_flags.replace(' ' + flag, '')
+            seastar_cxx_ld_flags = seastar_cxx_ld_flags.replace(flag, '')
    seastar_cmake_args = [
        '-DCMAKE_BUILD_TYPE={}'.format(mode_config['cmake_build_type']),
        '-DCMAKE_C_COMPILER={}'.format(args.cc),
        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
        '-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
        '-DSeastar_CXX_FLAGS=SHELL:{}'.format(mode_config['lib_cflags']),
-        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(mode_config['lib_ldflags'], mode_config['cxx_ld_flags'])),
+        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(mode_config['lib_ldflags'], seastar_cxx_ld_flags)),
        '-DSeastar_CXX_DIALECT=gnu++20',
        '-DSeastar_API_LEVEL=7',
        '-DSeastar_UNUSED_RESULT_ERROR=ON',
--- a/coverage_excludes.txt
+++ b/coverage_excludes.txt
@@ -0,0 +1,12 @@
+# This file contains regx patterns that should be
+# excluded from coverage reports. The file exists for easy
+# synchronization between different runs of coverage or different
+# frameworks running tests for unified coverage report.
+# The format is simple:
+# 1. lines that start with # are comments and should be ignored
+# 2. empty lines should be ignored
+# 3. every other line contains a single regx pattern of files to exclude
+# Note if the string should start with '#' simply put (#) instead.
+
+(.*/)?seastar/.*
+(.*/)?test/.*
--- a/coverage_sources.list
+++ b/coverage_sources.list
@@ -0,0 +1,8 @@
+# Those tests are testing header files so we have no choice but to instrument them
+source:test/boost/small_vector_test\.cc=allow
+source:test/boost/anchorless_list_test\.cc=allow
+
+# Don't instrument files that are part of the testing framework itself.
+source:test/*\.*=skip
+
+default:allow
--- a/docs/dev/code-coverage.md
+++ b/docs/dev/code-coverage.md
@@ -0,0 +1,116 @@
+# Code Coverage Support
+
+Scylla supports several code coverage workflows in order to analyze code coverage in testing
+and during runs this relies on llvm toolchain for instrumentation and conversion of the resulting
+profiles.
+The support for code coverage is provided through the following components:
+1. `configure.py` - build Scylla with code coverage instrumentation
+2. `test.py` - unit test runs with code coverage processing and reporting.
+3. `test/pylib/coverage_utils.py` - provides library utilities and cli for common code coverage processing operations.
+4. `test/pylib/lcov_utils.py` - provides library utilities for manipulation of lcov trace files.
+5. a rest api endpoint: `/system/dump_llvm_profile` for profile dumping during runtime or in cases where graceful shutdown of Scylla is not desired or possible.
+
+## General Workflow With Coverage
+1. Build Scylla with coverage instrumentation
+2. Run Scylla with some use case (or run test)
+3. Trigger coverage profile dump
+4. Process the profile into an lcov trace file
+5. (optional) post process the lcov trace
+6. Generate a textual or an HTML report to view the coverage report
+
+## Why Does The Profile Data Converted Into Lcov Format?
+One can just use the coverage profiles produced by llvm with the native llvm tools, the method for converting the profile data into lcov format was chosen for the following reasons:
+1. Being source oriented, Lcov format allows for merging coverage data from different binaries easily even if they originated from different source trees (even though it is not very useful...). However combining different binaries from the same source file is useful since you can combine unit test runs with tests that used the actual Scylla binary.
+2. Lcov is easy to parse and manipulate (For example: we can transform it into a patch coverage report)
+3. The HTML reporting is nicer (genhtml) than the llvm one.
+
+## Building Scylla With Coverage Instrumentation
+Building scylla with coverage instrumentation is done by adding the `--coverage` option to the
+`configure.py` command line. Example:
+`./configure.py --mode dev --coverage` - will build Scylla in dev mode with coverage instrumentation.
+
+NOTE: when adding the `--coverage` all build modes that are configured are going to be built with coverage instrumentation.
+
+NOTE: Coverage instrumentation incurs some performance penalty but it is hard to determine exactly
+how much.
+
+## Running Unit Tests With Coverage Processing And Reporting
+In order to get an lcov trace files for unit test runs, simply add `--coverage` or `--coverage-mode` to the `test.py` command line.
+Examples:
+1. `./test.py --coverage` - will run all tests in all built modes and will produce a coverage lcov traces for all of them.
+2. `./test.py --coverage-mode dev` - will run all tests in all built modes and will produce a coverage lcov traces only for the dev mode runs.
+3. `./test.py --mode dev --mode release --coverage-mode dev` - will run all tests in the dev and release mode but will only produce an lcov traces for the dev mode tests.
+
+   ### Where To Find The Lcov Traces After The `test.py` Run?
+   `test.py` will produce a hierarchy of of lcov trace, all rooted in the `<tmpdir>` (default:``./testlog``).
+   1. `<suite_name>.info` - will be produced for every suite under `<tmpdir>/<mode>/coverage/<suite_name>/`
+   2. `<mode>_coverage.info` - will be produced for ever coverage enabled mode under `<tmpdir>/<mode>/coverage/` and contains the aggregated coverage information from all suites.
+   3. `test_coverage.info` - will be created under `<tmpdir>` and contains the aggregated coverage information for all coverage enabled modes.
+   4. `test_coverage_report.txt` - will be produced under `<tmpdir>` a textual report of the coverage information in `test_coverage.info`.
+
+## Advanced And Manual Workflows
+Once built with code coverage instrumentation, all test executables and scylla executable are going
+to dump a a profile upon graceful shutdown / clean exit. However, in order to produce a coverage report some extra processing is needed.
+For reference of how to process the profile dumps using the llvm toolchain, refer to: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
+
+HINT: It is recommended to run with LLVM_PROFILE_FILE set in the environment so the profiles will be dumped to a known location (https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program)
+
+   ### Working with the cli
+   The cli `test/pylib/coverage_utils.py` contains some commands to help with the processing of profile dumps. **It is recommended to run the tool from the main source directory**
+
+   For extensive cli help run: `test/pylib/coverage_utils.py` or give the `-h` or `--help` to one of the subcommands.
+
+   Given a `profiles/someprofile.profraw` (or several files) the most common operations are:
+
+   NOTE: In most of the commands a list of files can be given in order to batch the operation on multiple profiles. But we concentrate on single file here for simplicity.
+
+   #### 1. Converting Raw Profiles Into Indexed Profile:
+   Run: `test/pylib/coverage_utils.py llvm-profiles merge profiles/someprofile.profraw profiles`- This will create an indexed profile in the `./profiles` directory. This file is named after the build ID of the executable that created this file. For example: `85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.profdata`
+   On this file you can use the llvm tools to produce reports from. However files that originated in different binaries are hard to merge and will yield incomplete or incorrect results.
+
+
+   #### 2. Converting Indexed Profiles Into Lcov files:
+   Run: `test/pylib/coverage_utils.py llvm-profiles to-lcov --binary-search-path build --excludes-file coverage_excludes.txt profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.profdata` - This will create `profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.info` which can be manipulated and be used to produce a report.
+
+   #### 3. Producing A Report From Lcov trace files:
+   For textual reports:
+   1. `lcov --summary profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.info` for a summary of the the coverage rates:
+   ```
+   Reading tracefile profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.info
+   Summary coverage rate:
+   lines......: 14.5% (11 of 76 lines)
+   functions..: 11.8% (2 of 17 functions)
+   branches...: no data found
+
+   ```
+   2. `lcov --list profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.info` for a per file report:
+   ```
+   Reading tracefile profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.info
+                        |Lines       |Functions  |Branches
+   Filename             |Rate     Num|Rate    Num|Rate     Num
+   ===========================================================
+   [utils/]
+   abi/eh_ia64.hh       | 100%      3| 100%     1|    -      0
+   exceptions.cc        |15.1%     53|16.7%     6|    -      0
+   exceptions.hh        | 0.0%     20| 0.0%    10|    -      0
+   ===========================================================
+                  Total:|14.5%     76|11.8%    17|    -      0
+
+   ```
+   For an HTML report:
+   `test/pylib/coverage_utils.py lcov-tools genhtml --output-dir profiles/html profiles/85e5e08c67bd9bd74c2caeb98aca2a45360cf25a.info` - This will create an HTML report rooted at `profiles/html` in order to view it, open `profiles/html/index.html` in your browser.
+
+#### Other Advanced Operations With Lcov Files:
+Once you have a coverage data you can manipulate it in the following ways:
+1. "Set Like" operations -
+```
+union               Merges several (or single) lcov file into another trace file. If testname is given, the resulting lcov file will be tagged with this name, else if will just merge the files similarly to 'lcov -a...' command.
+                        Files can also be filtered (see 'man lcovrc')
+    diff                computes the diff between two or more coverage files (lines that are covered by first but not others)
+    intersection        computes the intersection between two or more coverage files (lines that are covered by all trace files)
+    symmetric-dff       computes the symmetric difference between two traces (line covered by either trace but not both)
+```
+2. Produce a "patch coverage" report which transform the report into a patch centric report of the coverage of lines introduced by some git history fragment (i.e `HEAD~10..HEAD`) there is also an option for patch coverage report for a merge commit in which case only the patches on the merge will be considered.
+
+For further information you can refer to the cli help:
+`test/pylib/coverage_utils.py help` or view the tools code: `test/pylib/coverage_utils.py` and `test/pylib/lcov_utils.py`
--- a/test.py
+++ b/test.py
@@ -40,6 +40,10 @@ from test.pylib.util import LogPrefixAdapter
 from test.pylib.scylla_cluster import ScyllaServer, ScyllaCluster, get_cluster_manager, merge_cmdline_options
 from test.pylib.minio_server import MinioServer
 from typing import Dict, List, Callable, Any, Iterable, Optional, Awaitable, Union
+import logging
+from test.pylib import coverage_utils
+import humanfriendly
+import treelib

 launch_time = time.monotonic()

@@ -145,7 +149,15 @@ class TestSuite(ABC):
                continue
            skip_in_m = set(self.cfg.get("run_in_" + a, []))
            self.disabled_tests.update(skip_in_m - run_in_m)
-
+        # environment variables that should be the base of all processes running in this suit
+        self.base_env = {}
+        if self.need_coverage():
+            # Set the coverage data from each instrumented object to use the same file (and merged into it with locking)
+            # as long as we don't need test specific coverage data, this looks sufficient. The benefit of doing this in
+            # this way is that the storage will not be bloated with coverage files (each can weigh 10s of MBs so for several
+            # thousands of tests it can easily reach 10 of GBs)
+            # ref: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program
+            self.base_env["LLVM_PROFILE_FILE"] = os.path.join(options.tmpdir,self.mode, "coverage", self.name, "%m.profraw")
    # Generate a unique ID for `--repeat`ed tests
    # We want these tests to have different XML IDs so test result
    # processors (Jenkins) don't merge results for different iterations of
@@ -274,7 +286,8 @@ class TestSuite(ABC):
                task.cancel()
            await asyncio.gather(*pending, return_exceptions=True)
            raise
-
+    def need_coverage(self):
+        return self.options.coverage and (self.mode in self.options.coverage_modes) and bool(self.cfg.get("coverage",True))

 class UnitTestSuite(TestSuite):
    """TestSuite instantiation for non-boost unit tests"""
@@ -368,10 +381,9 @@ class PythonTestSuite(TestSuite):
    def __init__(self, path, cfg: dict, options: argparse.Namespace, mode: str) -> None:
        super().__init__(path, cfg, options, mode)
        self.scylla_exe = path_to(self.mode, "scylla")
+        self.scylla_env = dict(self.base_env)
        if self.mode == "coverage":
-            self.scylla_env = coverage.env(self.scylla_exe, distinct_id=self.name)
-        else:
-            self.scylla_env = dict()
+            self.scylla_env.update(coverage.env(self.scylla_exe, distinct_id=self.name))
        self.scylla_env['SCYLLA'] = self.scylla_exe

        cluster_cfg = self.cfg.get("cluster", {"initial_size": 1})
@@ -419,7 +431,8 @@ class PythonTestSuite(TestSuite):
                seeds=create_cfg.seeds,
                cmdline_options=cmdline_options,
                config_options=config_options,
-                property_file=create_cfg.property_file)
+                property_file=create_cfg.property_file,
+                append_env=self.base_env)

            return server

@@ -506,10 +519,10 @@ class RunTestSuite(TestSuite):
    def __init__(self, path: str, cfg, options: argparse.Namespace, mode: str) -> None:
        super().__init__(path, cfg, options, mode)
        self.scylla_exe = path_to(self.mode, "scylla")
+        self.scylla_env = dict(self.base_env)
        if self.mode == "coverage":
            self.scylla_env = coverage.env(self.scylla_exe, distinct_id=self.name)
-        else:
-            self.scylla_env = dict()
+
        self.scylla_env['SCYLLA'] = self.scylla_exe

    async def add_test(self, shortname) -> None:
@@ -569,6 +582,7 @@ class Test:
        # True if the test was cancelled by a ctrl-c or timeout, so
        # shouldn't be retried, even if it is flaky
        self.is_cancelled = False
+        self.env = dict(self.suite.base_env)
        Test._reset(self)

    def reset(self) -> None:
@@ -624,9 +638,8 @@ class UnitTest(Test):
        self.path = path_to(self.mode, "test", self.name)
        self.args = shlex.split(args) + UnitTest.standard_args
        if self.mode == "coverage":
-            self.env = coverage.env(self.path)
-        else:
-            self.env = dict()
+            self.env.update(coverage.env(self.path))
+
        UnitTest._reset(self)

    def _reset(self) -> None:
@@ -1256,7 +1269,19 @@ def parse_cmd_line() -> argparse.Namespace:
                             "is only supported by python tests for now, other tests ignore it. "
                             "By default, the marker filter is not applied and all tests will be run without exception."
                             "To exclude e.g. slow tests you can write --markers 'not slow'.")
-
+    parser.add_argument('--coverage', action = 'store_true', default = False,
+                        help="When running code instrumented with coverage support"
+                             "Will route the profiles to `tmpdir`/mode/coverage/`suite` and post process them in order to generate "
+                             "lcov file per suite, lcov file per mode, and an lcov file for the entire run, "
+                             "The lcov files can eventually be used for generating coverage reports")
+    parser.add_argument("--coverage-mode",action = 'append', type = str, dest = "coverage_modes",
+                        help = "Collect and process coverage only for the modes specified. implies: --coverage, defalt: All built modes")
+    parser.add_argument("--coverage-keep-raw",action = 'store_true',
+                        help = "Do not delete llvm raw profiles when processing coverage reports.")
+    parser.add_argument("--coverage-keep-indexed",action = 'store_true',
+                        help = "Do not delete llvm indexed profiles when processing coverage reports.")
+    parser.add_argument("--coverage-keep-lcovs",action = 'store_true',
+                        help = "Do not delete intermediate lcov traces when processing coverage reports.")
    scylla_additional_options = parser.add_argument_group('Additional options for Scylla tests')
    scylla_additional_options.add_argument('--x-log2-compaction-groups', action="store", default="0", type=int,
                             help="Controls number of compaction groups to be used by Scylla tests. Value of 3 implies 8 groups.")
@@ -1295,6 +1320,19 @@ def parse_cmd_line() -> argparse.Namespace:
            print(palette.fail("Failed to read output of `ninja mode_list`: please run ./configure.py first"))
            raise

+    if not args.coverage_modes and args.coverage:
+        args.coverage_modes = list(args.modes)
+        if "coverage" in args.coverage_modes:
+            args.coverage_modes.remove("coverage")
+        if not args.coverage_modes:
+            args.coverage = False
+    elif args.coverage_modes:
+        if "coverage" in args.coverage_modes:
+            raise RuntimeError("'coverage' mode is not allowed in --coverage-mode")
+        missing_coverage_modes = set(args.coverage_modes).difference(set(args.modes))
+        if len(missing_coverage_modes) > 0:
+            raise RuntimeError(f"The following modes weren't built or ran (using the '--mode' option): {missing_coverage_modes}")
+        args.coverage = True
    def prepare_dir(dirname, pattern):
        # Ensure the dir exists
        pathlib.Path(dirname).mkdir(parents=True, exist_ok=True)
@@ -1619,10 +1657,264 @@ async def main() -> int:
    if 'coverage' in options.modes:
        coverage.generate_coverage_report(path_to("coverage", "tests"))

+    if options.coverage:
+        await process_coverage(options)
+
    # Note: failure codes must be in the ranges 0-124, 126-127,
    #       to cooperate with git bisect's expectations
    return 0 if not failed_tests else 1

+async def process_coverage(options):
+    total_processing_time = time.time()
+    logger = LogPrefixAdapter(logging.getLogger("coverage"), {'prefix' : 'coverage'})
+    modes_for_coverage = options.coverage_modes
+    # use about 75% of the machine's processing power.
+    concurrency = max(int(multiprocessing.cpu_count() * 0.75), 1)
+    logger.info(f"Processing coverage information for modes: {modes_for_coverage}, using {concurrency} cpus")
+    semaphore = asyncio.Semaphore(concurrency)
+    build_paths = [pathlib.Path(f"build/{mode}") for mode in modes_for_coverage]
+    paths_for_id_search = [bp / p for bp, p in itertools.product(build_paths, ["scylla", "test", "seastar"])]
+    logger.info("Getting binary ids for coverage conversion...")
+    files_to_ids_map = await coverage_utils.get_binary_ids_map(paths = paths_for_id_search,
+                                                               filter = coverage_utils.PROFILED_ELF_TYPES,
+                                                               semaphore = semaphore,
+                                                               logger = logger)
+    logger.debug(f"Binary ids map is: {files_to_ids_map}")
+    logger.info("Done getting binary ids for coverage conversion")
+    # get the suits that have actually been ran
+    suits_to_exclude = ["pylib_test", "nodetool"]
+    sources_to_exclude = [line for line in open("coverage_excludes.txt", 'r').read().split('\n') if line and not line.startswith('#')]
+    ran_suites = list({test.suite for test in TestSuite.all_tests() if test.suite.need_coverage()})
+
+    def suite_coverage_path(suite) -> pathlib.Path:
+        return pathlib.Path(suite.options.tmpdir) / suite.mode / 'coverage' / suite.name
+
+    def pathsize(path : pathlib.Path):
+        if path.is_file():
+            return os.path.getsize(path)
+        elif path.is_dir():
+            return sum([os.path.getsize(f) for f in path.glob("**/*") if f.is_file()])
+        else:
+            return 0
+    class Stats:
+        def __init__(self, name = "", size = 0, time = 0) -> None:
+            self.name = name
+            self.size = size
+            self.time = time
+        def __add__(self, other):
+            return Stats(self.name,
+                         size = self.size + other.size,
+                         time = self.time + other.time)
+        def __str__(self):
+            name = f"{self.name} - " if self.name else ""
+            fields = []
+            if self.size:
+                fields.append(f"size: {humanfriendly.format_size(self.size)}")
+            if self.time:
+                fields.append(f"time: {humanfriendly.format_timespan(self.time)}")
+            fields = ', '.join(fields)
+            return f"{name}{fields}"
+        @property
+        def asstring(self):
+            return str(self)
+
+    # a nested map of: mode -> suite -> unified_coverage_file
+    suits_trace_files = {}
+    stats = treelib.Tree()
+
+    RAW_PROFILE_STATS = "raw profiles"
+    INDEXED_PROFILE_STATS = "indexed profiles"
+    LCOV_CONVERSION_STATS = "lcov conversion"
+    LCOV_SUITES_MEREGE_STATS = "lcov per suite merge"
+    LCOV_MODES_MERGE_STATS = "lcov merge for mode"
+    LCOV_MERGE_ALL_STATS = "lcov merge all stats"
+    ROOT_NODE = stats.create_node(tag = time.time(),
+                                  identifier = "root",
+                                  data = Stats("Coverage Processing Stats", 0, 0))
+
+    for suite in list(ran_suites):
+        coverage_path = suite_coverage_path(suite)
+        if not coverage_path.exists():
+            logger.warning(f"Coverage dir for suite '{suite.name}' in mode '{suite.mode}' wasn't found, common reasons:\n\t"
+                "1. The suite doesn't use any instrumented binaries.\n\t"
+                "2. The binaries weren't compiled with coverage instrumentation.")
+            continue
+
+        # 1. Transform every suite raw profiles into indexed profiles
+        raw_profiles = list(coverage_path.glob("*.profraw"))
+        if len(raw_profiles) == 0:
+            logger.warning(f"Couldn't find any raw profiles for suite '{suite.name}' in mode '{suite.mode}' ({coverage_path}):\n\t"
+                "1. The binaries are killed instead of terminating which bypasses profile dump.\n\t"
+                "2. The suite tempres with the LLVM_PROFILE_FILE which causes the profile to be dumped\n\t"
+                "   to somewhere else.")
+            continue
+        mode_stats = stats.get_node(suite.mode)
+        if not mode_stats:
+            mode_stats = stats.create_node(tag = time.time(),
+                                           identifier = suite.mode,
+                                           parent = ROOT_NODE,
+                                           data = Stats(f"{suite.mode} mode processing stats", 0, 0))
+
+        raw_stats_node = stats.get_node(mode_stats.identifier + RAW_PROFILE_STATS)
+        if not raw_stats_node:
+            raw_stats_node = stats.create_node(tag = time.time(),
+                                               identifier = mode_stats.identifier + RAW_PROFILE_STATS,
+                                               parent = mode_stats,
+                                               data = Stats(RAW_PROFILE_STATS, 0, 0))
+        stat = stats.create_node(tag = time.time(),
+                                 identifier = raw_stats_node.identifier + suite.name,
+                                 parent = raw_stats_node,
+                                 data = Stats(suite.name, pathsize(coverage_path), 0))
+        raw_stats_node.data += stat.data
+        mode_stats.data.time += stat.data.time
+        mode_stats.data.size = max(mode_stats.data.size, raw_stats_node.data.size)
+
+
+        logger.info(f"{suite.name}: Converting raw profiles into indexed profiles - {stat.data}.")
+        start_time = time.time()
+        merge_result = await coverage_utils.merge_profiles(profiles = raw_profiles,
+                                            path_for_merged = coverage_path,
+                                            clear_on_success = (not options.coverage_keep_raw),
+                                            semaphore = semaphore,
+                                            logger = logger)
+        indexed_stats_node = stats.get_node(mode_stats.identifier +INDEXED_PROFILE_STATS)
+        if not indexed_stats_node:
+            indexed_stats_node = stats.create_node(tag = time.time(),
+                                                   identifier = mode_stats.identifier +INDEXED_PROFILE_STATS,
+                                                   parent = mode_stats,
+                                                   data = Stats(INDEXED_PROFILE_STATS, 0, 0))
+        stat = stats.create_node(tag = time.time(),
+                                 identifier = indexed_stats_node.identifier + suite.name,
+                                 parent = indexed_stats_node,
+                                 data = Stats(suite.name, pathsize(coverage_path), time.time() - start_time))
+        indexed_stats_node.data += stat.data
+        mode_stats.data.time += stat.data.time
+        mode_stats.data.size = max(mode_stats.data.size, indexed_stats_node.data.size)
+
+        logger.info(f"{suite.name}: Done converting raw profiles into indexed profiles - {humanfriendly.format_timespan(stat.data.time)}.")
+
+        # 2. Transform every indexed profile into an lcov trace file,
+        #    after this step, the dependency upon the build artifacts
+        #    ends and processing of the files can be done using the source
+        #    code only.
+
+        logger.info(f"{suite.name}: Converting indexed profiles into lcov trace files.")
+        start_time = time.time()
+        if len(merge_result.errors) > 0:
+            raise RuntimeError(merge_result.errors)
+        await coverage_utils.profdata_to_lcov(profiles = merge_result.generated_profiles,
+                                              excludes = sources_to_exclude,
+                                              known_file_ids = files_to_ids_map,
+                                              clear_on_success = (not options.coverage_keep_indexed),
+                                              semaphore = semaphore,
+                                              logger = logger
+                                              )
+        lcov_conversion_stats_node = stats.get_node(mode_stats.identifier + LCOV_CONVERSION_STATS)
+        if not lcov_conversion_stats_node:
+            lcov_conversion_stats_node = stats.create_node(tag = time.time(),
+                                                           identifier = mode_stats.identifier + LCOV_CONVERSION_STATS,
+                                                           parent = mode_stats,
+                                                           data = Stats(LCOV_CONVERSION_STATS, 0, 0))
+        stat = stats.create_node(tag = time.time(),
+                                 identifier = lcov_conversion_stats_node.identifier + suite.name,
+                                 parent = lcov_conversion_stats_node,
+                                 data = Stats(suite.name, pathsize(coverage_path), time.time() - start_time))
+        lcov_conversion_stats_node.data += stat.data
+        mode_stats.data.time += stat.data.time
+        mode_stats.data.size = max(mode_stats.data.size, lcov_conversion_stats_node.data.size)
+
+        logger.info(f"{suite.name}: Done converting indexed profiles into lcov trace files - {humanfriendly.format_timespan(stat.data.time)}.")
+
+        # 3. combine all tracefiles
+        logger.info(f"{suite.name} in mode {suite.mode}: Combinig lcov trace files.")
+        start_time = time.time()
+        trace_files = list(coverage_path.glob("**/*.info"))
+        target_trace_file = coverage_path / (suite.name + ".info")
+        if len(trace_files) == 0: # No coverage data, can skip
+            logger.warning(f"{suite.name} in mode  {suite.mode}: No coverage tracefiles found")
+        elif len(trace_files) == 1: # No need to merge, we can just rename the file
+            trace_files[0].rename(str(target_trace_file))
+        else:
+            await coverage_utils.lcov_combine_traces(lcovs = trace_files,
+                                                     output_lcov = target_trace_file,
+                                                     clear_on_success = (not options.coverage_keep_lcovs),
+                                                     files_per_chunk = 10,
+                                                     semaphore = semaphore,
+                                                     logger = logger)
+        lcov_merge_stats_node = stats.get_node(mode_stats.identifier + LCOV_SUITES_MEREGE_STATS)
+        if not lcov_merge_stats_node:
+            lcov_merge_stats_node = stats.create_node(tag = time.time(),
+                                                      identifier = mode_stats.identifier + LCOV_SUITES_MEREGE_STATS,
+                                                      parent = mode_stats,
+                                                      data = Stats(LCOV_SUITES_MEREGE_STATS, 0, 0))
+        stat = stats.create_node(tag = time.time(),
+                                 identifier = lcov_merge_stats_node.identifier + suite.name,
+                                 parent = lcov_merge_stats_node,
+                                 data = Stats(suite.name, pathsize(coverage_path), time.time() - start_time))
+        lcov_merge_stats_node.data += stat.data
+        mode_stats.data.time += stat.data.time
+        mode_stats.data.size = max(mode_stats.data.size, lcov_merge_stats_node.data.size)
+
+        suits_trace_files.setdefault(suite.mode, {})[suite.name] = target_trace_file
+        logger.info(f"{suite.name}: Done combinig lcov trace files - {humanfriendly.format_timespan(stat.data.time)}")
+
+    #4. combine the suite lcovs into per mode trace files
+    modes_trace_files  = {}
+    for mode, suite_traces in suits_trace_files.items():
+
+        target_trace_file = pathlib.Path(options.tmpdir) / mode / "coverage" / f"{mode}_coverage.info"
+        start_time = time.time()
+        logger.info(f"Consolidating trace files for mode {mode}.")
+        await coverage_utils.lcov_combine_traces(lcovs = suite_traces.values(),
+                                                 output_lcov = target_trace_file,
+                                                 clear_on_success = False,
+                                                 files_per_chunk = 10,
+                                                 semaphore = semaphore,
+                                                 logger = logger)
+        mode_stats = stats[mode]
+        stat = stats.create_node(tag = time.time(),
+                                 identifier = mode_stats.identifier + LCOV_MODES_MERGE_STATS,
+                                 parent = mode_stats,
+                                 data = Stats(LCOV_MODES_MERGE_STATS, None, time.time() - start_time))
+        mode_stats.data.time += stat.data.time
+        ROOT_NODE.data.size += mode_stats.data.size
+        modes_trace_files[mode] = target_trace_file
+        logger.info(f"Done consolidating trace files for mode {mode} - time: {humanfriendly.format_timespan(stat.data.time)}.")
+    #5. create one consolidated file with all trace information
+    logger.info(f"Consolidating all trace files for this run.")
+    start_time = time.time()
+    target_trace_file = pathlib.Path(options.tmpdir) / "test_coverage.info"
+    await coverage_utils.lcov_combine_traces(lcovs = modes_trace_files.values(),
+                                             output_lcov = target_trace_file,
+                                             clear_on_success = False,
+                                             files_per_chunk = 10,
+                                             semaphore = semaphore,
+                                             logger = logger)
+    stats.create_node(tag = time.time(),
+                      identifier = LCOV_MERGE_ALL_STATS,
+                      parent = ROOT_NODE,
+                      data = Stats(LCOV_MERGE_ALL_STATS, None, time.time() - start_time))
+    logger.info(f"Done consolidating all trace files for this run - time: {humanfriendly.format_timespan(time.time() - start_time)}.")
+
+    logger.info(f"Creating textual report.")
+    proc = await asyncio.create_subprocess_shell(f"lcov --summary --rc lcov_branch_coverage=1 {options.tmpdir}/test_coverage.info 2>/dev/null > {options.tmpdir}/test_coverage_report.txt")
+    await proc.wait()
+    with open(pathlib.Path(options.tmpdir) /"test_coverage_report.txt") as f:
+        summary = f.readlines()
+    proc = await asyncio.create_subprocess_shell(f"lcov --list --rc lcov_branch_coverage=1 {options.tmpdir}/test_coverage.info  2>/dev/null >> {options.tmpdir}/test_coverage_report.txt")
+    await proc.wait()
+    logger.info(f"Done creating textual report. ({options.tmpdir}/test_coverage_report.txt)")
+    total_processing_time = time.time() - total_processing_time
+    ROOT_NODE.data.time = total_processing_time
+
+
+
+
+    stats_str ="\n" + stats.show(stdout=False,
+                                 data_property="asstring")
+    summary = ["\n" + l for l in summary]
+    logger.info(stats_str)
+    logger.info("".join(summary))

 async def workaround_python26789() -> int:
    """Workaround for https://bugs.python.org/issue26789.
--- a/test/nodetool/suite.yaml
+++ b/test/nodetool/suite.yaml
@@ -1 +1,2 @@
 type: Tool
+coverage: false
--- a/test/pylib/coverage_utils.py
+++ b/test/pylib/coverage_utils.py
--- a/test/pylib/lcov_utils.py
+++ b/test/pylib/lcov_utils.py
--- a/test/pylib/scylla_cluster.py
+++ b/test/pylib/scylla_cluster.py
@@ -212,7 +212,8 @@ class ScyllaServer:
                 cluster_name: str, ip_addr: str, seeds: List[str],
                 cmdline_options: List[str],
                 config_options: Dict[str, Any],
-                 property_file: Dict[str, Any]) -> None:
+                 property_file: Dict[str, Any],
+                 append_env: Dict[str,Any]) -> None:
        # pylint: disable=too-many-arguments
        self.server_id = ServerNum(ScyllaServer.newid())
        self.exe = pathlib.Path(exe).resolve()
@@ -239,6 +240,7 @@ class ScyllaServer:
                cluster_name = self.cluster_name) \
            | config_options
        self.property_file = property_file
+        self.append_env = append_env

    def change_ip(self, ip_addr: IPAddress) -> None:
        """Change IP address of the current server. Pre: the server is
@@ -415,6 +417,7 @@ class ScyllaServer:

        env = os.environ.copy()
        env.clear()     # pass empty env to make user user's SCYLLA_HOME has no impact
+        env.update(self.append_env)
        self.cmd = await asyncio.create_subprocess_exec(
            self.exe,
            *self.cmdline_options,
--- a/test/pylib_test/suite.yaml
+++ b/test/pylib_test/suite.yaml
@@ -1 +1,2 @@
 type: Run
+coverage: false