From 7275b614aa27ad577f57a1174f62e5d249d2cad2 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Tue, 26 Dec 2023 06:46:04 +0900 Subject: [PATCH] scylla_util.py: wait for apt operation on other processes apt_install() / apt_uninstall() may fail if background process running apt operation, such as unattended-upgrades. To avoid this, we need to add two things: 1. For apt-get install / remove, we need to option "DPkg::Lock::Timeout=-1" to wait for dpkg lock. 2. For apt-get update, there is no option to wait for cache lock. Therefore, we need to implement retry-loop to wait for apt-get update succeed. Fixes #16537 Closes scylladb/scylladb#16561 --- dist/common/scripts/scylla_util.py | 36 +++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/dist/common/scripts/scylla_util.py b/dist/common/scripts/scylla_util.py index 0456481f14..a50f857750 100644 --- a/dist/common/scripts/scylla_util.py +++ b/dist/common/scripts/scylla_util.py @@ -12,8 +12,9 @@ import shutil import subprocess import yaml import sys +import time from pathlib import Path, PurePath -from subprocess import run, DEVNULL +from subprocess import run, DEVNULL, PIPE, CalledProcessError from datetime import datetime, timedelta import distro @@ -26,7 +27,6 @@ import traceback import traceback_with_variables import logging - def scylla_excepthook(etype, value, tb): os.makedirs('/var/tmp/scylla', mode=0o755, exist_ok=True) traceback.print_exception(etype, value, tb) @@ -311,14 +311,38 @@ def apt_is_updated(): return False return datetime.now() - datetime.fromtimestamp(cache_mtime) <= timedelta(days=1) +APT_GET_UPDATE_NUM_RETRY = 30 +APT_GET_UPDATE_RETRY_INTERVAL = 10 def apt_install(pkg): if is_offline(): pkg_error_exit(pkg) - if not apt_is_updated(): - run('apt-get update', shell=True, check=True) + + # The lock for update and install/remove are different, and + # DPkg::Lock::Timeout will only wait for install/remove lock. + # So we need to manually retry apt-get update. + for i in range(APT_GET_UPDATE_NUM_RETRY): + if apt_is_updated(): + break + try: + res = run('apt-get update', shell=True, check=True, stderr=PIPE, encoding='utf-8') + break + except CalledProcessError as e: + print(e.stderr, end='') + # if error is "Could not get lock", wait a while and retry + match = re.match('^E: Could not get lock ', e.stderr, re.MULTILINE) + if match: + print('Sleep 10 seconds to wait for apt lock...') + time.sleep(APT_GET_UPDATE_RETRY_INTERVAL) + # if this is last time to retry, re-raise exception + if i == APT_GET_UPDATE_NUM_RETRY - 1: + raise + # if error is not "Could not get lock", re-raise Exception + else: + raise + apt_env = os.environ.copy() apt_env['DEBIAN_FRONTEND'] = 'noninteractive' - return run(f'apt-get install -y {pkg}', shell=True, check=True, env=apt_env) + return run(f'apt-get -o DPkg::Lock::Timeout=300 install -y {pkg}', shell=True, check=True, env=apt_env) def emerge_install(pkg): if is_offline(): @@ -361,7 +385,7 @@ def yum_uninstall(pkg): def apt_uninstall(pkg): apt_env = os.environ.copy() apt_env['DEBIAN_FRONTEND'] = 'noninteractive' - return run(f'apt-get remove -y {pkg}', shell=True, check=True, env=apt_env) + return run(f'apt-get -o DPkg::Lock::Timeout=300 remove -y {pkg}', shell=True, check=True, env=apt_env) def emerge_uninstall(pkg): return run(f'emerge --deselect {pkg}', shell=True, check=True)