#!/usr/bin/env python3 """ Render polished PNG overview figures for the Ropedia project page. The generated assets are used only as text-free visual backgrounds. All labels, dimensions, task names, and metrics are read from committed result files via scripts/generate_visualizations.py so the final figures stay traceable. """ from __future__ import annotations import argparse import base64 import html import subprocess import tempfile from pathlib import Path from generate_visualizations import collect_summary, task_architecture_rows ROOT = Path(__file__).resolve().parents[1] ASSETS = ROOT / "docs/assets" DEFAULT_PIPELINE_BASE = ASSETS / "pipeline_diagram_base.png" DEFAULT_ARCHITECTURE_BASE = ASSETS / "task_architectures_base.png" DEFAULT_PIPELINE_OUTPUT = ASSETS / "pipeline_diagram.png" DEFAULT_ARCHITECTURE_OUTPUT = ASSETS / "task_architectures.png" PIPELINE_WIDTH = 1800 PIPELINE_HEIGHT = 1120 ARCHITECTURE_WIDTH = 1800 ARCHITECTURE_HEIGHT = 2450 COLORS = { "blue": "#9bdfff", "teal": "#7ae5c3", "green": "#ccffa0", "amber": "#d8f4a5", "orange": "#b7ff91", "red": "#ff8f7a", "ink": "#f4f8ef", "muted": "#a5afa2", "line": "#2b4428", } TASK_GROUPS = [ ("Label + State", "#9bdfff", ["timeline_action", "timeline_subtask", "next_action"]), ( "Prediction + Reconstruction", "#ccffa0", ["hand_trajectory_forecast", "modality_reconstruction", "contact_prediction"], ), ("Grounding + Retrieval", "#7ae5c3", ["caption_grounding", "cross_modal_retrieval", "object_relevance"]), ("Temporal Diagnostics", "#d8f4a5", ["transition_detection", "temporal_order", "misalignment_detection"]), ] def data_uri(path: Path) -> str: if not path.exists(): return "" encoded = base64.b64encode(path.read_bytes()).decode("ascii") return f"data:image/png;base64,{encoded}" def esc(value: object) -> str: return html.escape(str(value), quote=True) def build_base_layer(path: Path, opacity: float) -> str: uri = data_uri(path) if not uri: return "" return f'
' def stage_card(number: str, title: str, lines: list[str], color: str) -> str: detail = "".join(f"
  • {esc(line)}
  • " for line in lines) return f"""
    {esc(number)}

    {esc(title)}

    """ def arrow() -> str: return '' def build_pipeline_html(summary: dict, base_path: Path) -> str: suite = summary["suite"] task_count = len(suite["tasks"]) neural_count = len(suite.get("neural_tasks", {})) stage_rows = [ [ stage_card( "01", "Raw public sample", ["annotation.hdf5", "6 MP4 videos with audio", f"{suite['num_frames']:,} aligned frames"], COLORS["blue"], ), arrow(), stage_card( "02", "HOMIE loader", ["video, depth, pose", "mocap, IMU, language", "audio features"], COLORS["teal"], ), arrow(), stage_card( "03", "Window builder", [ f"{suite['window_frames']}-frame windows", f"{suite['stride_frames']}-frame stride", f"{suite['num_windows']:,} windows", ], COLORS["green"], ), arrow(), stage_card( "04", "Feature vector", [f"{suite['feature_dim']:,} dimensions", "18 named blocks incl. audio", "manifested slice indices"], COLORS["orange"], ), ], [ stage_card( "05", "Baseline models", ["motion-only classifiers", "current all-feature classifiers", "neural MLP task heads"], COLORS["blue"], ), arrow(), stage_card( "06", "Ropedia Xperience-10M suite", [f"{task_count} minimal + {neural_count} neural results", "forecast, retrieval, alignment", "chronological evaluation"], COLORS["teal"], ), arrow(), stage_card( "07", "Published artifacts", ["metrics.json / csv / npz / pt", "GitHub Pages dashboard", "NN comparison charts"], COLORS["green"], ), ], ] rows_html = "".join(f'
    {"".join(row)}
    ' for row in stage_rows) checks = [ "Reproduction check: rerunning scripts to an ignored scratch workspace reproduced the committed metrics exactly.", "Modality check: sample covers video, audio, depth, pose/SLAM, mocap, IMU, and language annotation.", "Feature check: current baseline manifest has synchronized video/audio/depth/pose/mocap/IMU/language blocks.", "Neural check: lightweight PyTorch MLP heads are reported beside the minimal task heads under neural_mlp/.", "Scope check: this validates one public sample episode, not cross-episode generalization.", ] checks_html = "".join(f"
  • {esc(line)}
  • " for line in checks) base_layer = build_base_layer(base_path, 0.42) return f"""
    {base_layer}
    verified single-episode pipeline

    From Ropedia Xperience-10M episode to reproducible artifacts

    The figure follows the actual code path and includes minimal heads plus neural MLP results. Next milestone: Qwen3-Omni fine-tuning with sensor-bridge evaluation on held-out multi-episode splits.

    {suite['num_frames']:,}frames
    {suite['num_windows']:,}windows
    {suite['feature_dim']:,}features
    {task_count}+{neural_count}min + NN tasks
    {rows_html}
    Reproducibility checks
      {checks_html}
    """ def family_label(family: str) -> str: return { "softmax": "linear softmax", "ridge": "ridge regression", "ridge+rank": "ridge + cosine rank", "multilabel": "multi-label logistic", }.get(family, family) def build_task_card(row: dict, color: str) -> str: return f"""
    {esc(family_label(row['family']))}

    {esc(row['task'])}

    Input
    {esc(row['input'])}
    Head
    {esc(row['head'])}
    Output
    {esc(row['output'])}
    Metric{esc(row['metric'])}
    """ def build_architecture_html(summary: dict, base_path: Path) -> str: suite = summary["suite"] neural_count = len(suite.get("neural_tasks", {})) rows_by_task = {row["task"]: row for row in task_architecture_rows(summary)} group_html = [] for title, color, task_names in TASK_GROUPS: cards = "".join(build_task_card(rows_by_task[name], color) for name in task_names) group_html.append( f"""

    {esc(title)}

    {cards}
    """ ) family_cards = [ ("Linear softmax", "Minimal classifier for action, subtask, transition, contact, order, and alignment tasks.", COLORS["blue"]), ("Ridge regression", "Minimal closed-form projection for forecasting, reconstruction, and retrieval spaces.", COLORS["green"]), ("Multi-label logistic", "Minimal one-vs-rest sigmoid heads over the object vocabulary with top-1 fallback.", COLORS["orange"]), ("Neural MLP", "Optional PyTorch nonlinear classifier/regressor over the same features, splits, and metrics.", COLORS["red"]), ] families = "".join( f"""

    {esc(title)}

    {esc(desc)}

    """ for title, desc, color in family_cards ) base_layer = build_base_layer(base_path, 0.36) return f"""
    {base_layer}
    minimal + neural verified model architectures

    12 Ropedia Xperience-10M tasks, minimal and NN heads

    Each task uses the same aligned episode-window contract. The figure shows minimal heads beside neural MLP metrics; next milestone is Qwen3-Omni fine-tuning with sensor-bridge evaluation.

    {len(suite['tasks'])}+{neural_count}min + NN tasks

    Shared windows

    {suite['num_frames']:,} frames to {suite['num_windows']:,} windows over video, audio, depth, pose, mocap, inertial, and language features.

    Feature vector

    X_all is {suite['feature_dim']:,} dimensions with 18 named modality blocks.

    Reusable heads

    Minimal softmax/ridge/logistic heads plus optional PyTorch MLP heads cover the whole suite.

    Artifacts

    Metrics, predictions, model weights, neural checkpoints, manifests, and the source summary report are committed.

    {families}
    {"".join(group_html)}
    """ def render_html(html_text: str, output_path: Path, width: int, height: int, keep_html: Path | None = None) -> None: output_path.parent.mkdir(parents=True, exist_ok=True) if keep_html is None: with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as handle: handle.write(html_text) html_path = Path(handle.name) else: html_path = keep_html html_path.parent.mkdir(parents=True, exist_ok=True) html_path.write_text(html_text, encoding="utf-8") subprocess.run( [ "npx", "--yes", "playwright", "screenshot", "--full-page", f"--viewport-size={width},{height}", html_path.resolve().as_uri(), str(output_path), ], check=True, ) print(f"Wrote image: {output_path}") print(f"Wrote render HTML: {html_path}") def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--pipeline-base", type=Path, default=DEFAULT_PIPELINE_BASE) parser.add_argument("--architecture-base", type=Path, default=DEFAULT_ARCHITECTURE_BASE) parser.add_argument("--pipeline-output", type=Path, default=DEFAULT_PIPELINE_OUTPUT) parser.add_argument("--architecture-output", type=Path, default=DEFAULT_ARCHITECTURE_OUTPUT) parser.add_argument("--html-dir", type=Path, help="Optional directory for the intermediate render HTML files.") parser.add_argument("--only", choices=["pipeline", "architecture", "both"], default="both") args = parser.parse_args() summary = collect_summary() if args.only in {"pipeline", "both"}: pipeline_html = build_pipeline_html(summary, args.pipeline_base) html_path = args.html_dir / "pipeline_diagram.html" if args.html_dir else None render_html(pipeline_html, args.pipeline_output, PIPELINE_WIDTH, PIPELINE_HEIGHT, html_path) if args.only in {"architecture", "both"}: architecture_html = build_architecture_html(summary, args.architecture_base) html_path = args.html_dir / "task_architectures.html" if args.html_dir else None render_html(architecture_html, args.architecture_output, ARCHITECTURE_WIDTH, ARCHITECTURE_HEIGHT, html_path) return 0 if __name__ == "__main__": raise SystemExit(main())