blender-projects-gitea-custom/sphinx/sphinx_to_html.py

#!/usr/bin/python3

import argparse
import html
import os
import pathlib
import re
import shutil
import subprocess
import sys

page_contents = sys.stdin.read()

base_url = "https://projects.blender.org"
staging_base_url = "https://projects.staging.blender.org"
uatest_base_url = "https://uatest.projects.blender.org"
local_url = "http://localhost:3000"
placeholder_url = "https://placeholder.org"

# Gitea sets this environment variable with the URL prefix for the current file.
gitea_prefix = os.environ.get("GITEA_PREFIX_SRC", "")
if gitea_prefix.startswith(base_url):
    gitea_prefix = gitea_prefix[len(base_url):]
if gitea_prefix.startswith(staging_base_url):
    gitea_prefix = gitea_prefix[len(staging_base_url):]
if gitea_prefix.startswith(uatest_base_url):
    gitea_prefix = gitea_prefix[len(uatest_base_url):]
if gitea_prefix.startswith(local_url):
    gitea_prefix = gitea_prefix[len(local_url):]

if len(gitea_prefix):
    path_tokens = gitea_prefix.strip('/').split('/')
    org, repo, view, ref, branch = path_tokens[:5]

    doc_url = f"{base_url}/{org}/{repo}/{view}/{ref}/{branch}"
    image_url = f"{base_url}/{org}/{repo}/media/{ref}/{branch}"

    # Hardcoded exception for blender-manual, that has links relative
    # to manual/ folder.
    if len(path_tokens) > 5 and path_tokens[5] == 'manual':
        doc_url += "/manual"
        image_url += "/manual"
else:
    doc_url = ""
    image_url = ""

# Create a temporary directory manually
tmp_dir = pathlib.Path("./tmp_sphinx")
if tmp_dir.exists():
    shutil.rmtree(tmp_dir)  # Remove if it already exists
tmp_dir.mkdir(parents=True)

try:
    work_dir = tmp_dir / "work"
    work_dir.mkdir(parents=True)

    script_dir = pathlib.Path(__file__).parent.resolve()
    shutil.copytree(script_dir / "template", work_dir)
    page_filepath = work_dir / "contents.rst"

    # Turn links into external links since internal links are not found and stripped.
    def path_to_label(path):
        path = path.removesuffix('/index')
        return path.split('/')[-1].replace('_', ' ').replace('-', ' ').capitalize()
    def doc_label_link(matchobj):
        return f"`{matchobj.group(1)}<{doc_url}/{matchobj.group(2).strip('/')}.rst>`_"
    def doc_link(matchobj):
        return f"`{path_to_label(matchobj.group(1))} <{doc_url}/{matchobj.group(1).strip('/')}.rst>`_"
    def ref_label_link(matchobj):
        return f"`{matchobj.group(1)} <{placeholder_url}>`_"
    def ref_link(matchobj):
        return f"`{path_to_label(matchobj.group(1))} <{placeholder_url}>`_"
    def term_link(matchobj):
        return f"`{matchobj.group(1)} <{placeholder_url}>`_"
    def figure_link(matchobj):
        return f"figure:: {image_url}/{matchobj.group(1).strip('/')}"
    def image_link(matchobj):
        return f"image:: {image_url}/{matchobj.group(1).strip('/')}"

    page_contents = re.sub(r":doc:`/(.+?)`", doc_link, page_contents)
    page_contents = re.sub(r":doc:`([\w\s\n-]+?)\n?<(.+?)>`", doc_label_link, page_contents)
    page_contents = re.sub(r":ref:`([\w\s\n-]+?)\n?<(.+?)>`", ref_label_link, page_contents)
    page_contents = re.sub(r":ref:`([\w\s-]+?)`", ref_link, page_contents)
    page_contents = re.sub(r":term:`([\w\s\n-]+?)\n?<(.+?)>`", term_link, page_contents)
    page_contents = re.sub(r":term:`([\w\s-]+?)`", term_link, page_contents)
    page_contents = re.sub(r"figure:: (.+?)", figure_link, page_contents)
    page_contents = re.sub(r"image:: (.+?)", image_link, page_contents)

    # Disable include directives and raw for security. They are already disabled
    # by docutils.py, this is just to be extra careful.
    def include_directive(matchobj):
        return f"warning:: include not available in preview: {html.escape(matchobj.group(1))}"
    def raw_directive(matchobj):
        return f"warning:: raw not available in preview: {html.escape(matchobj.group(1))}"
    page_contents = re.sub(r"literalinclude::(.*)", include_directive, page_contents)
    page_contents = re.sub(r"include::(.*)", include_directive, page_contents)
    page_contents = re.sub(r"raw::(.*)", raw_directive, page_contents)
    page_contents = re.sub(r".. toctree::(.*)", ".. code-block:: none", page_contents)
    page_contents = re.sub(r":maxdepth:(.*)", "", page_contents)
    page_contents = page_contents.replace("|BLENDER_VERSION|", "BLENDER_VERSION")

    page_filepath.write_text(page_contents)

    # Debug processed RST
    # print(html.escape(page_contents).replace('\n', '<br/>\n'))
    # sys.exit(0)

    # Run sphinx-build.
    out_dir = work_dir / "out"
    out_dir.mkdir(parents=True)
    out_filepath = out_dir / "contents.html"

    sphinx_build = str(script_dir / "venv" / "bin" / "sphinx-build")
    sphinx_cmd = [sphinx_build, "-b", "html", str(work_dir), str(out_dir)]
    result = subprocess.run(sphinx_cmd, capture_output=True)

    # Output errors.
    error = result.stderr.decode("utf-8", "ignore").strip()
    if len(error):
        error = error.replace(str(page_filepath) + ":", "")
        error = html.escape(error)
        print("<h2>Sphinx Warnings</h2>\n")
        print(f"<pre>{error}</pre>")
        print("<p>Note the preview is not accurate and warnings may not indicate real issues.</p>")

    # Output contents of body.
    if result.returncode == 0 and out_filepath.is_file():
        contents = out_filepath.read_text()
        body = contents.split("<body>")[1].split("</body>")[0]
        body = body.replace(f'href="{placeholder_url}', 'href="#link-not-available-in-preview"')
        body = body.replace('href="http', 'target="_blank" href="http')
        body = '<div class="restructuredtext">' + body + '</div>'
        print(body)

finally:
    # Clean up temporary directory if needed
    shutil.rmtree(tmp_dir, ignore_errors=True)