chore: add azure and gcp images extensions

Closes scylladb/scylladb#16942
This commit is contained in:
David Garcia
2024-01-23 11:27:52 +00:00
committed by Botond Dénes
parent e79ea91990
commit 77822fc51d
11 changed files with 337 additions and 148 deletions

0
docs/_ext/__init__.py Normal file
View File

View File

@@ -1,41 +1,13 @@
import csv
import os
import re
import requests
from sphinx.application import Sphinx
from docutils.parsers.rst import Directive, directives
from sphinxcontrib.datatemplates.directive import DataTemplateCSV
from sphinx.util import logging
from scylladb_common_images import FileDownloader, BaseVersionsTemplateDirective
LOGGER = logging.getLogger(__name__)
class AWSFileDownloader:
def __init__(self, base_url, session=None):
self.base_url = base_url
self.session = session or requests.Session()
def get_links(self, bucket_directory, extension):
url = f"{self.base_url}/?delimiter=/&prefix={bucket_directory}"
response = self.session.get(url)
response.raise_for_status()
return re.findall(rf"<Key>([^<]*\.{extension})</Key>", response.text)
def download_files(self, bucket_directory, download_directory, extension="csv"):
os.makedirs(download_directory, exist_ok=True)
links = self.get_links(bucket_directory, extension)
for link in links:
file_url = f"{self.base_url}/{link}"
print(f"Downloading {file_url}")
file_response = self.session.get(file_url)
file_response.raise_for_status()
with open(os.path.join(download_directory, link.split("/")[-1]), "wb") as file:
file.write(file_response.content)
print(f"Download complete. The {extension.upper()} files are in {download_directory}")
class CloudFormationProcessor:
FILENAME_REGEX = r"^.+_(\d+\.\d+\.\d+)_(\w+)(\.yaml)?$"
@@ -88,129 +60,21 @@ class AMIInformationDownloader:
if os.path.exists(ami_download_directory) and os.listdir(ami_download_directory):
print(f"Files already exist in {ami_download_directory}. Skipping download.")
return
downloader = AWSFileDownloader(base_url)
downloader.download_files(ami_bucket_directory, ami_download_directory)
processor = CloudFormationProcessor()
links = downloader.get_links(cloudformation_bucket_directory, "yaml")
processor.process_files(ami_download_directory, links)
class AMITemplateDirective(DataTemplateCSV):
option_spec = DataTemplateCSV.option_spec.copy()
option_spec["version"] = lambda x: x
def _make_context(self, data, config, env):
context = super()._make_context(data, config, env)
context["version"] = self.options.get("version")
return context
def run(self):
return super().run()
class AMIVersionsTemplateDirective(Directive):
FILENAME_REGEX = re.compile(r"ami_ids_(\d+(?:\.\d+)?(?:\.\d+)?)(?:.*?)\.csv")
has_content = True
option_spec = {
"version": directives.unchanged,
"exclude": directives.unchanged,
"only_latest": directives.flag,
}
def _extract_version_from_filename(self, filename):
match = self.FILENAME_REGEX.search(filename)
return match.group(1) if match else None
def _matches_version(self, filename, version):
if not version:
return True
file_version = self._extract_version_from_filename(filename)
if not file_version:
return False
if "." in version:
return file_version.startswith(version)
return file_version.split(".")[0] == version
def _excluded(self, filename, patterns):
return any(pattern in filename for pattern in patterns if pattern)
def _version_key(self, filename):
version = self._extract_version_from_filename(filename)
return tuple(map(int, version.split("."))) if version else (0,)
def _get_current_version(self, current_version, stable_version):
prefix = 'branch-'
version = current_version
if current_version.startswith(prefix):
version = current_version
elif not stable_version.startswith(prefix):
LOGGER.error("Invalid stable_version format in conf.py. It should start with 'branch-'")
else:
version = stable_version
downloader = FileDownloader(base_url)
downloader.download_files(ami_bucket_directory, ami_download_directory)
processor = CloudFormationProcessor()
links = downloader.get_links(cloudformation_bucket_directory, "yaml")
processor.process_files(ami_download_directory, links)
return version.replace(prefix, '')
def run(self):
app = self.state.document.settings.env.app
current_version = os.environ.get('SPHINX_MULTIVERSION_NAME', '')
stable_version = app.config.smv_latest_version
version_pattern = self._get_current_version(current_version, stable_version)
version_options = self.options.get("version", "")
if version_options:
version_pattern = version_options
exclude_patterns = self.options.get("exclude", "").split(",")
download_directory = os.path.join(
app.builder.srcdir, app.config.scylladb_aws_images_ami_download_directory
)
docname = self.state.document.settings.env.docname
current_rst_path = os.path.join(app.builder.srcdir, docname + ".rst")
current_rst_dir = os.path.dirname(current_rst_path)
relative_path_from_current_rst = os.path.relpath(
download_directory, current_rst_dir
)
files = sorted([
file for file in os.listdir(download_directory) if file.endswith('.csv') and
self._matches_version(file, version_pattern) and not self._excluded(file, exclude_patterns)
], key=self._version_key, reverse=True)
if len(files) == 0:
LOGGER.warning(
f"No files match in directory '{download_directory}' with version pattern '{version_pattern}'."
)
elif "only_latest" in self.options:
files = [files[0]]
output = []
for file in files:
data_directive = AMITemplateDirective(
name=self.name,
arguments=[os.path.join(relative_path_from_current_rst, file)],
options=self.options,
content=self.content,
lineno=self.lineno,
content_offset=self.content_offset,
block_text=self.block_text,
state=self.state,
state_machine=self.state_machine,
)
data_directive.options["template"] = "aws_image.tmpl"
data_directive.options["version"] = self._extract_version_from_filename(
file
)
output.extend(data_directive.run())
return output
class AMIVersionsTemplateDirective(BaseVersionsTemplateDirective):
FILENAME_REGEX = re.compile(r"ami_ids_(\d+(?:\.\d+)?(?:\.\d+)?)(?:.*?)\.csv")
TEMPLATE = 'aws_image.tmpl'
def get_download_directory(self, app):
return os.path.join(app.builder.srcdir, app.config.scylladb_aws_images_ami_download_directory)
def setup(app: Sphinx):
app.add_config_value(

View File

@@ -0,0 +1,54 @@
import os
import re
from sphinx.application import Sphinx
from sphinx.util import logging
from scylladb_common_images import FileDownloader, BaseVersionsTemplateDirective
LOGGER = logging.getLogger(__name__)
class AzureImagesInformationDownloader:
def run(self, app, exception=None):
config = app.config
base_url = config.scylladb_azure_images_base_url
ami_bucket_directory = config.scylladb_azure_images_ami_bucket_directory
ami_download_directory = os.path.join(app.builder.srcdir, config.scylladb_azure_images_download_directory)
if os.path.exists(ami_download_directory) and os.listdir(ami_download_directory):
print(f"Files already exist in {ami_download_directory}. Skipping download.")
else:
downloader = FileDownloader(base_url)
downloader.download_files(ami_bucket_directory, ami_download_directory)
class AzureImagesVersionsTemplateDirective(BaseVersionsTemplateDirective):
FILENAME_REGEX = re.compile(r"azure_image_ids_(\d+(?:\.\d+)?(?:\.\d+)?)(?:.*?)\.csv")
TEMPLATE = "azure_image.tmpl"
def get_download_directory(self, app):
return os.path.join(app.builder.srcdir, app.config.scylladb_azure_images_download_directory)
def setup(app: Sphinx):
app.add_config_value(
"scylladb_azure_images_base_url",
default="https://s3.amazonaws.com/downloads.scylladb.com",
rebuild="html",
)
app.add_config_value(
"scylladb_azure_images_ami_bucket_directory",
default="downloads/scylla/azure/",
rebuild="html",
)
app.add_config_value(
"scylladb_azure_images_download_directory",
default="_data/opensource/azure",
rebuild="html",
)
app.connect("builder-inited", AzureImagesInformationDownloader().run)
app.add_directive("scylladb_azure_images_template", AzureImagesVersionsTemplateDirective)
return {
"version": "0.1",
"parallel_read_safe": True,
"parallel_write_safe": True,
}

View File

@@ -8,7 +8,8 @@ import jinja2
from sphinx import addnodes
from sphinx.application import Sphinx
from sphinx.directives import ObjectDescription
from sphinx.util import logging, status_iterator, ws_re
from sphinx.util import logging, ws_re
from sphinx.util.display import status_iterator
from sphinx.util.docfields import Field
from sphinx.util.docutils import switch_source_input, SphinxDirective
from sphinx.util.nodes import make_id, nested_parse_with_titles

View File

@@ -0,0 +1,159 @@
import os
import re
import requests
import csv
from docutils.parsers.rst import Directive, directives
from sphinxcontrib.datatemplates.directive import DataTemplateCSV
from sphinx.util import logging
class FileDownloader:
def __init__(self, base_url, session=None):
self.base_url = base_url
self.session = session or requests.Session()
def get_links(self, bucket_directory, extension):
url = f"{self.base_url}/?delimiter=/&prefix={bucket_directory}"
response = self.session.get(url)
response.raise_for_status()
return re.findall(rf"<Key>([^<]*\.{extension})</Key>", response.text)
def download_files(self, bucket_directory, download_directory, extension="csv"):
os.makedirs(download_directory, exist_ok=True)
links = self.get_links(bucket_directory, extension)
for link in links:
file_url = f"{self.base_url}/{link}"
print(f"Downloading {file_url}")
file_response = self.session.get(file_url)
file_response.raise_for_status()
with open(os.path.join(download_directory, link.split("/")[-1]), "wb") as file:
file.write(file_response.content)
print(f"Download complete. The {extension.upper()} files are in {download_directory}")
class BaseTemplateDirective(DataTemplateCSV):
option_spec = DataTemplateCSV.option_spec.copy()
option_spec["version"] = lambda x: x
def _make_context(self, data, config, env):
context = super()._make_context(data, config, env)
context["version"] = self.options.get("version")
return context
def run(self):
return super().run()
class BaseVersionsTemplateDirective(Directive):
# Directives should implement the following variables
FILENAME_REGEX = re.compile(r".*")
TEMPLATE = 'template.tmpl'
has_content = True
option_spec = {
"version": directives.unchanged,
"exclude": directives.unchanged,
"only_latest": directives.flag,
}
def _get_version_pattern(self, app):
current_version = os.environ.get('SPHINX_MULTIVERSION_NAME', '')
stable_version = app.config.smv_latest_version
version_pattern = self._get_current_version(current_version, stable_version)
return self.options.get("version", "") or version_pattern
def _get_relative_path(self, download_directory, app, docname):
current_rst_path = os.path.join(app.builder.srcdir, docname + ".rst")
return os.path.relpath(download_directory, os.path.dirname(current_rst_path))
def _filter_files(self, download_directory, version_pattern, exclude_patterns):
return sorted(
[file for file in os.listdir(download_directory) if file.endswith('.csv') and
self._matches_version(file, version_pattern) and not self._excluded(file, exclude_patterns)],
key=self._version_key
)
def _process_file(self, file, relative_path_from_current_rst):
data_directive = BaseTemplateDirective(
name=self.name,
arguments=[os.path.join(relative_path_from_current_rst, file)],
options=self.options,
content=self.content,
lineno=self.lineno,
content_offset=self.content_offset,
block_text=self.block_text,
state=self.state,
state_machine=self.state_machine,
)
data_directive.options["template"] = self.TEMPLATE
data_directive.options["version"] = self._extract_version_from_filename(file)
return data_directive.run()
def _get_exclude_patterns(self):
return self.options.get("exclude", "").split(",")
def _matches_version(self, filename, version):
if not version:
return True
file_version = self._extract_version_from_filename(filename)
if not file_version:
return False
if "." in version:
return file_version.startswith(version)
return file_version.split(".")[0] == version
def _excluded(self, filename, patterns):
return any(pattern in filename for pattern in patterns if pattern)
def _version_key(self, filename):
version = self._extract_version_from_filename(filename)
return tuple(map(int, version.split("."))) if version else (0,)
def _extract_version_from_filename(self, filename):
match = self.FILENAME_REGEX.search(filename)
return match.group(1) if match else None
def _get_current_version(self, current_version, stable_version):
prefix = 'branch-'
version = current_version
if current_version.startswith(prefix):
version = current_version
elif not stable_version.startswith(prefix):
LOGGER.error("Invalid stable_version format in conf.py. It should start with 'branch-'")
else:
version = stable_version
return version.replace(prefix, '')
def get_download_directory(self, app):
# Directives should implement the following function
raise NotImplementedError
def run(self):
app = self.state.document.settings.env.app
docname = self.state.document.settings.env.docname
version_pattern = self._get_version_pattern(app)
download_directory = self.get_download_directory(app)
relative_path_from_current_rst = self._get_relative_path(download_directory, app, docname)
files = self._filter_files(download_directory, version_pattern, self._get_exclude_patterns())
if not files:
LOGGER.warning(f"No files match in directory '{download_directory}' with version pattern '{version_pattern}'.")
return []
if "only_latest" in self.options:
files = [files[0]]
output = []
for file in files:
output.extend(self._process_file(file, relative_path_from_current_rst))
return output

View File

@@ -0,0 +1,57 @@
import os
import re
from sphinx.application import Sphinx
from sphinx.util import logging
from scylladb_common_images import FileDownloader, BaseVersionsTemplateDirective
LOGGER = logging.getLogger(__name__)
class GCPImagesInformationDownloader:
def run(self, app, exception=None):
config = app.config
base_url = config.scylladb_gcp_images_base_url
ami_bucket_directory = config.scylladb_gcp_images_bucket_directory
ami_download_directory = os.path.join(app.builder.srcdir, config.scylladb_gcp_images_download_directory)
if os.path.exists(ami_download_directory) and os.listdir(ami_download_directory):
print(f"Files already exist in {ami_download_directory}. Skipping download.")
else:
downloader = FileDownloader(base_url)
downloader.download_files(ami_bucket_directory, ami_download_directory)
class GCPImagesVersionsTemplateDirective(BaseVersionsTemplateDirective):
FILENAME_REGEX = re.compile(r"gce_image_ids_(\d+(?:\.\d+)?(?:\.\d+)?)(?:.*?)\.csv")
TEMPLATE = "gcp_image.tmpl"
def get_download_directory(self, app):
return os.path.join(app.builder.srcdir, app.config.scylladb_gcp_images_download_directory)
def setup(app: Sphinx):
app.add_config_value(
"scylladb_gcp_images_base_url",
default="https://s3.amazonaws.com/downloads.scylladb.com",
rebuild="html",
)
app.add_config_value(
"scylladb_gcp_images_bucket_directory",
default="downloads/scylla/gcp/",
rebuild="html",
)
app.add_config_value(
"scylladb_gcp_images_download_directory",
default="_data/opensource/gcp",
rebuild="html",
)
app.connect("builder-inited", GCPImagesInformationDownloader().run)
app.add_directive("scylladb_gcp_images_template", GCPImagesVersionsTemplateDirective)
return {
"version": "0.1",
"parallel_read_safe": True,
"parallel_write_safe": True,
}

16
docs/_templates/azure_image.tmpl vendored Normal file
View File

@@ -0,0 +1,16 @@
.. -*- mode: rst -*-
{{version}}
{{ '-' * version|length }}
.. list-table::
:header-rows: 1
* - Gallery Image Definition
- Gallery Image Version
- Public Gallery Name
{% for row in data[1:] %}
* - {{ row[0] }}
- {{ row[1] }}
- {{ row[2] }}
{% endfor %}

14
docs/_templates/gcp_image.tmpl vendored Normal file
View File

@@ -0,0 +1,14 @@
.. -*- mode: rst -*-
{{version}}
{{ '-' * version|length }}
.. list-table::
:header-rows: 1
* - Image Name
- Image ID
{% for row in data[1:] %}
* - {{ row[0] }}
- {{ row[1] }}
{% endfor %}

View File

@@ -40,6 +40,8 @@ extensions = [
"sphinxcontrib.datatemplates",
"scylladb_cc_properties",
"scylladb_aws_images",
"scylladb_azure_images",
"scylladb_gcp_images",
"scylladb_include_flag"
]
@@ -108,6 +110,16 @@ scylladb_aws_images_ami_bucket_directory = "downloads/scylla/aws/ami/"
scylladb_aws_images_ami_download_directory = "_data/opensource/aws/ami"
scylladb_aws_images_cloudformation_bucket_directory = "downloads/scylla/aws/cloudformation/"
# -- Options for scylladb_azure_images extension
scylladb_azure_images_base_url = "https://s3.amazonaws.com/downloads.scylladb.com"
scylladb_azure_images_bucket_directory = "downloads/scylla/azure/"
scylladb_azure_images_download_directory = "_data/opensource/azure"
# -- Options for scylladb_gcp_images extension
scylladb_gcp_images_base_url = "https://s3.amazonaws.com/downloads.scylladb.com"
scylladb_gcp_images_bucket_directory = "downloads/scylla/gce/"
scylladb_gcp_images_download_directory = "_data/opensource/gce"
# -- Options for HTML output
# The theme to use for pages.

View File

@@ -0,0 +1,6 @@
============
Azure Images
============
.. scylladb_azure_images_template::
:exclude: rc,dev

View File

@@ -0,0 +1,6 @@
==========
GCP Images
==========
.. scylladb_gcp_images_template::
:exclude: rc,dev