diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index a51bf55a77..ba3e9ac417 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -1,16 +1,36 @@ -set(LINK_MEM_PER_JOB 4096 CACHE INTERNAL "Maximum memory used by each link job in (in MiB)") +if(NOT DEFINED Scylla_PARALLEL_LINK_JOBS) + if(NOT DEFINED Scylla_RAM_PER_LINK_JOB) + # preserve user-provided value + set(_default_ram_value 4096) + if(Scylla_ENABLE_LTO) + # When ThinLTO optimization is enabled, the linker uses all available CPU threads. + # To prevent excessive memory usage, we limit parallel link jobs based on available RAM, + # as each link job requires significant memory during optimization. + set(_default_ram_value 16384) + endif() + set(Scylla_RAM_PER_LINK_JOB ${_default_ram_value} CACHE STRING + "Maximum amount of memory used by each link job (in MiB)") + endif() + cmake_host_system_information( + RESULT _total_mem_mb + QUERY AVAILABLE_PHYSICAL_MEMORY) + math(EXPR _link_pool_depth "${_total_mem_mb} / ${Scylla_RAM_PER_LINK_JOB}") + # Use 2 parallel link jobs to optimize build throughput. The main executable requires + # LTO (slower link phase) while tests are linked without LTO (faster link phase). + # This allows simultaneous linking of LTO and non-LTO targets, enabling better CPU + # utilization by overlapping the slower LTO link with faster test links. + if(_link_pool_depth LESS 2) + set(_link_pool_depth 2) + endif() -cmake_host_system_information( - RESULT _total_mem - QUERY AVAILABLE_PHYSICAL_MEMORY) -math(EXPR _link_pool_depth "${_total_mem} / ${LINK_MEM_PER_JOB}") -if(_link_pool_depth EQUAL 0) - set(_link_pool_depth 1) + set(Scylla_PARALLEL_LINK_JOBS "${_link_pool_depth}" CACHE STRING + "Maximum number of concurrent link jobs") endif() + set_property( GLOBAL APPEND PROPERTY JOB_POOLS - link_pool=${_link_pool_depth} + link_pool=${Scylla_PARALLEL_LINK_JOBS} submodule_pool=1) set(CMAKE_JOB_POOL_LINK link_pool) diff --git a/configure.py b/configure.py index 7362df94f5..610ce3dafc 100755 --- a/configure.py +++ b/configure.py @@ -1664,7 +1664,14 @@ defines = ' '.join(['-D' + d for d in defines]) globals().update(vars(args)) total_memory = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') +# assuming each link job takes around 7GiB of memory without LTO link_pool_depth = max(int(total_memory / 7e9), 1) +if args.lto: + # ThinLTO provides its own parallel linking, use 16GiB for RAM size used + # by each link job + depth_with_lto = max(int(total_memory / 16e9), 2) + if depth_with_lto < link_pool_depth: + link_pool_depth = depth_with_lto selected_modes = args.selected_modes or modes.keys() default_modes = args.selected_modes or [mode for mode, mode_cfg in modes.items() if mode_cfg["default"]]