# Ultralytics š AGPL-3.0 License - https://ultralytics.com/license """ Automates building and post-processing of MkDocs documentation, especially for multilingual projects. This script streamlines generating localized documentation and updating HTML links for correct formatting. Key Features: - Automated building of MkDocs documentation: Compiles main documentation and localized versions from separate MkDocs configuration files. - Post-processing of generated HTML files: Updates HTML files to remove '.md' from internal links, ensuring correct navigation in web-based documentation. Usage: - Run from the root directory of your MkDocs project. - Ensure MkDocs is installed and configuration files (main and localized) are present. - The script builds documentation using MkDocs, then scans HTML files in 'site' to update links. - Ideal for projects with Markdown documentation served as a static website. Note: - Requires Python and MkDocs to be installed and configured. """ from __future__ import annotations import os import re import shutil import subprocess import tempfile import time from pathlib import Path import yaml from bs4 import BeautifulSoup from minijinja import Environment, load_from_path try: from plugin import postprocess_site # mkdocs-ultralytics-plugin except ImportError: postprocess_site = None from build_reference import build_reference_docs, build_reference_for from ultralytics.utils import LINUX, LOGGER, MACOS from ultralytics.utils.tqdm import TQDM os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs DOCS = Path(__file__).parent.resolve() SITE = DOCS.parent / "site" LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])") TITLE_PATTERN = re.compile(r"
max_title_length and "-" in title_tag.text:
title_tag.string = title_tag.text.rsplit("-", 1)[0].strip()
modified = True
# Find the main content area
main_content = soup.find("main") or soup.find("div", class_="md-content")
if not main_content:
return str(soup) if modified else content
# Convert plaintext links to HTML hyperlinks
if needs_link_conversion:
for paragraph in main_content.select("p, li"):
for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}:
new_text = LINK_PATTERN.sub(r'\1', str(text_node))
if " 0:
tail = " "
if tail:
span.insert_after(tail)
modified = True
highlight_labels(soup.select("main h1, main h2, main h3, main h4, main h5"))
highlight_labels(soup.select("nav.md-nav--secondary .md-ellipsis, nav.md-nav__list .md-ellipsis"))
if "reference" in rel_path:
for ellipsis in soup.select("nav.md-nav--secondary .md-ellipsis"):
kind = ellipsis.find(class_=lambda c: c and "doc-kind" in c.split())
text = str(kind.next_sibling).strip() if kind and kind.next_sibling else ellipsis.get_text(strip=True)
if "." not in text:
continue
ellipsis.clear()
short = text.rsplit(".", 1)[-1]
if kind:
ellipsis.append(kind)
ellipsis.append(f" {short}")
else:
ellipsis.append(short)
modified = True
if needs_kind_highlight and not modified and soup.select(".doc-kind"):
# Ensure style injection when pre-existing badges are present
modified = True
if modified:
head = soup.find("head")
if head and not soup.select("style[data-doc-kind]"):
style = soup.new_tag("style", attrs={"data-doc-kind": "true"})
style.string = (
".doc-kind{display:inline-flex;align-items:center;gap:0.25em;padding:0.21em 0.59em;border-radius:999px;"
"font-weight:700;font-size:0.81em;letter-spacing:0.06em;text-transform:uppercase;"
"line-height:1;color:var(--doc-kind-color,#f8fafc);"
"background:var(--doc-kind-bg,rgba(255,255,255,0.12));}"
f".doc-kind-class{{--doc-kind-color:{DOC_KIND_COLORS['Class']};--doc-kind-bg:rgba(3,157,252,0.22);}}"
f".doc-kind-function{{--doc-kind-color:{DOC_KIND_COLORS['Function']};--doc-kind-bg:rgba(252,152,3,0.22);}}"
f".doc-kind-method{{--doc-kind-color:{DOC_KIND_COLORS['Method']};--doc-kind-bg:rgba(239,94,255,0.22);}}"
f".doc-kind-property{{--doc-kind-color:{DOC_KIND_COLORS['Property']};--doc-kind-bg:rgba(2,232,53,0.22);}}"
)
head.append(style)
return str(soup) if modified else content
def _rewrite_md_links(content: str) -> str:
"""Replace .md references with trailing slashes in HTML content, skipping GitHub links."""
if ".md" not in content:
return content
lines = []
for line in content.split("\n"):
if "github.com" not in line:
line = line.replace("index.md", "")
line = MD_LINK_PATTERN.sub(r"\1\2/\3", line)
lines.append(line)
return "\n".join(lines)
# Precompiled regex patterns for minification
HTML_COMMENT = re.compile(r"")
HTML_PRESERVE = re.compile(r"<(pre|code|textarea|script)[^>]*>[\s\S]*?\1>", re.IGNORECASE)
HTML_TAG_SPACE = re.compile(r">\s+<")
HTML_MULTI_SPACE = re.compile(r"\s{2,}")
HTML_EMPTY_LINE = re.compile(r"^\s*$\n", re.MULTILINE)
CSS_COMMENT = re.compile(r"/\*[\s\S]*?\*/")
def remove_comments_and_empty_lines(content: str, file_type: str) -> str:
"""Remove comments and empty lines from a string of code, preserving newlines and URLs.
Args:
content (str): Code content to process.
file_type (str): Type of file ('html', 'css', or 'js').
Returns:
(str): Cleaned content with comments and empty lines removed.
Notes:
Typical reductions for Ultralytics Docs are:
- Total HTML reduction: 2.83% (1301.56 KB saved)
- Total CSS reduction: 1.75% (2.61 KB saved)
- Total JS reduction: 13.51% (99.31 KB saved)
"""
if file_type == "html":
content = HTML_COMMENT.sub("", content) # Remove HTML comments
# Preserve whitespace in ,
,