| |
| """ |
| Automates the building and post-processing of MkDocs documentation, particularly for projects with multilingual content. |
| It streamlines the workflow for generating localized versions of the documentation and updating HTML links to ensure |
| they are correctly formatted. |
| |
| Key Features: |
| - Automated building of MkDocs documentation: The script compiles both the main documentation and |
| any localized versions specified in separate MkDocs configuration files. |
| - Post-processing of generated HTML files: After the documentation is built, the script updates all |
| HTML files to remove the '.md' extension from internal links. This ensures that links in the built |
| HTML documentation correctly point to other HTML pages rather than Markdown files, which is crucial |
| for proper navigation within the web-based documentation. |
| |
| Usage: |
| - Run the script from the root directory of your MkDocs project. |
| - Ensure that MkDocs is installed and that all MkDocs configuration files (main and localized versions) |
| are present in the project directory. |
| - The script first builds the documentation using MkDocs, then scans the generated HTML files in the 'site' |
| directory to update the internal links. |
| - It's ideal for projects where the documentation is written in Markdown and needs to be served as a static website. |
| |
| Note: |
| - This script is built to be run in an environment where Python and MkDocs are installed and properly configured. |
| """ |
|
|
| import os |
| import re |
| import shutil |
| import subprocess |
| from pathlib import Path |
|
|
| from bs4 import BeautifulSoup |
| from tqdm import tqdm |
|
|
| os.environ["JUPYTER_PLATFORM_DIRS"] = "1" |
| DOCS = Path(__file__).parent.resolve() |
| SITE = DOCS.parent / "site" |
|
|
|
|
| def prepare_docs_markdown(clone_repos=True): |
| """Build docs using mkdocs.""" |
| if SITE.exists(): |
| print(f"Removing existing {SITE}") |
| shutil.rmtree(SITE) |
|
|
| |
| if clone_repos: |
| repo = "https://github.com/ultralytics/hub-sdk" |
| local_dir = DOCS.parent / Path(repo).name |
| if not local_dir.exists(): |
| os.system(f"git clone {repo} {local_dir}") |
| os.system(f"git -C {local_dir} pull") |
| shutil.rmtree(DOCS / "en/hub/sdk", ignore_errors=True) |
| shutil.copytree(local_dir / "docs", DOCS / "en/hub/sdk") |
| shutil.rmtree(DOCS.parent / "hub_sdk", ignore_errors=True) |
| shutil.copytree(local_dir / "hub_sdk", DOCS.parent / "hub_sdk") |
| print(f"Cloned/Updated {repo} in {local_dir}") |
|
|
| |
| for file in tqdm((DOCS / "en").rglob("*.md"), desc="Adding frontmatter"): |
| update_markdown_files(file) |
|
|
|
|
| def update_page_title(file_path: Path, new_title: str): |
| """Update the title of an HTML file.""" |
| |
| with open(file_path, encoding="utf-8") as file: |
| content = file.read() |
|
|
| |
| updated_content = re.sub(r"<title>.*?</title>", f"<title>{new_title}</title>", content) |
|
|
| |
| with open(file_path, "w", encoding="utf-8") as file: |
| file.write(updated_content) |
|
|
|
|
| def update_html_head(script=""): |
| """Update the HTML head section of each file.""" |
| html_files = Path(SITE).rglob("*.html") |
| for html_file in tqdm(html_files, desc="Processing HTML files"): |
| with html_file.open("r", encoding="utf-8") as file: |
| html_content = file.read() |
|
|
| if script in html_content: |
| return |
|
|
| head_end_index = html_content.lower().rfind("</head>") |
| if head_end_index != -1: |
| |
| new_html_content = html_content[:head_end_index] + script + html_content[head_end_index:] |
| with html_file.open("w", encoding="utf-8") as file: |
| file.write(new_html_content) |
|
|
|
|
| def update_subdir_edit_links(subdir="", docs_url=""): |
| """Update the HTML head section of each file.""" |
| if str(subdir[0]) == "/": |
| subdir = str(subdir[0])[1:] |
| html_files = (SITE / subdir).rglob("*.html") |
| for html_file in tqdm(html_files, desc="Processing subdir files"): |
| with html_file.open("r", encoding="utf-8") as file: |
| soup = BeautifulSoup(file, "html.parser") |
|
|
| |
| a_tag = soup.find("a", {"class": "md-content__button md-icon"}) |
| if a_tag and a_tag["title"] == "Edit this page": |
| a_tag["href"] = f"{docs_url}{a_tag['href'].split(subdir)[-1]}" |
|
|
| |
| with open(html_file, "w", encoding="utf-8") as file: |
| file.write(str(soup)) |
|
|
|
|
| def update_markdown_files(md_filepath: Path): |
| """Creates or updates a Markdown file, ensuring frontmatter is present.""" |
| if md_filepath.exists(): |
| content = md_filepath.read_text().strip() |
|
|
| |
| content = content.replace("‘", "'").replace("’", "'") |
|
|
| |
| if not content.strip().startswith("---\n") and "macros" not in md_filepath.parts: |
| header = "---\ncomments: true\ndescription: TODO ADD DESCRIPTION\nkeywords: TODO ADD KEYWORDS\n---\n\n" |
| content = header + content |
|
|
| |
| lines = content.split("\n") |
| new_lines = [] |
| for i, line in enumerate(lines): |
| stripped_line = line.strip() |
| if stripped_line.startswith("=== "): |
| if i > 0 and new_lines[-1] != "": |
| new_lines.append("") |
| new_lines.append(line) |
| if i < len(lines) - 1 and lines[i + 1].strip() != "": |
| new_lines.append("") |
| else: |
| new_lines.append(line) |
| content = "\n".join(new_lines) |
|
|
| |
| if not content.endswith("\n"): |
| content += "\n" |
|
|
| |
| md_filepath.write_text(content) |
| return |
|
|
|
|
| def update_docs_html(): |
| """Updates titles, edit links, head sections, and converts plaintext links in HTML documentation.""" |
| |
| update_page_title(SITE / "404.html", new_title="Ultralytics Docs - Not Found") |
|
|
| |
| update_subdir_edit_links( |
| subdir="hub/sdk/", |
| docs_url="https://github.com/ultralytics/hub-sdk/tree/main/docs/", |
| ) |
|
|
| |
| files_modified = 0 |
| for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links"): |
| with open(html_file, encoding="utf-8") as file: |
| content = file.read() |
| updated_content = convert_plaintext_links_to_html(content) |
| if updated_content != content: |
| with open(html_file, "w", encoding="utf-8") as file: |
| file.write(updated_content) |
| files_modified += 1 |
| print(f"Modified plaintext links in {files_modified} files.") |
|
|
| |
| script = "" |
| if any(script): |
| update_html_head(script) |
|
|
| |
| macros_dir = SITE / "macros" |
| if macros_dir.exists(): |
| print(f"Removing /macros directory from site: {macros_dir}") |
| shutil.rmtree(macros_dir) |
|
|
|
|
| def convert_plaintext_links_to_html(content): |
| """Convert plaintext links to HTML hyperlinks in the main content area only.""" |
| soup = BeautifulSoup(content, "html.parser") |
|
|
| |
| main_content = soup.find("main") or soup.find("div", class_="md-content") |
| if not main_content: |
| return content |
|
|
| modified = False |
| for paragraph in main_content.find_all(["p", "li"]): |
| for text_node in paragraph.find_all(string=True, recursive=False): |
| if text_node.parent.name not in {"a", "code"}: |
| new_text = re.sub( |
| r'(https?://[^\s()<>]+(?:\.[^\s()<>]+)+)(?<![.,:;\'"])', |
| r'<a href="\1">\1</a>', |
| str(text_node), |
| ) |
| if "<a" in new_text: |
| new_soup = BeautifulSoup(new_text, "html.parser") |
| text_node.replace_with(new_soup) |
| modified = True |
|
|
| return str(soup) if modified else content |
|
|
|
|
| def remove_macros(): |
| """Removes the /macros directory and related entries in sitemap.xml from the built site.""" |
| shutil.rmtree(SITE / "macros", ignore_errors=True) |
| (SITE / "sitemap.xml.gz").unlink(missing_ok=True) |
|
|
| |
| sitemap = SITE / "sitemap.xml" |
| lines = sitemap.read_text(encoding="utf-8").splitlines(keepends=True) |
|
|
| |
| macros_indices = [i for i, line in enumerate(lines) if "/macros/" in line] |
|
|
| |
| indices_to_remove = set() |
| for i in macros_indices: |
| indices_to_remove.update(range(i - 1, i + 4)) |
|
|
| |
| new_lines = [line for i, line in enumerate(lines) if i not in indices_to_remove] |
|
|
| |
| sitemap.write_text("".join(new_lines), encoding="utf-8") |
|
|
| print(f"Removed {len(macros_indices)} URLs containing '/macros/' from {sitemap}") |
|
|
|
|
| def main(): |
| """Builds docs, updates titles and edit links, and prints local server command.""" |
| prepare_docs_markdown() |
|
|
| |
| print(f"Building docs from {DOCS}") |
| subprocess.run(f"mkdocs build -f {DOCS.parent}/mkdocs.yml --strict", check=True, shell=True) |
| remove_macros() |
| print(f"Site built at {SITE}") |
|
|
| |
| update_docs_html() |
|
|
| |
| print('Docs built correctly ✅\nServe site at http://localhost:8000 with "python -m http.server --directory site"') |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|