#!/usr/bin/env python3
"""
Render polished PNG overview figures for the Ropedia project page.
The generated assets are used only as text-free visual backgrounds. All
labels, dimensions, task names, and metrics are read from committed result
files via scripts/generate_visualizations.py so the final figures stay
traceable.
"""
from __future__ import annotations
import argparse
import base64
import html
import subprocess
import tempfile
from pathlib import Path
from generate_visualizations import collect_summary, task_architecture_rows
ROOT = Path(__file__).resolve().parents[1]
ASSETS = ROOT / "docs/assets"
DEFAULT_PIPELINE_BASE = ASSETS / "pipeline_diagram_base.png"
DEFAULT_ARCHITECTURE_BASE = ASSETS / "task_architectures_base.png"
DEFAULT_PIPELINE_OUTPUT = ASSETS / "pipeline_diagram.png"
DEFAULT_ARCHITECTURE_OUTPUT = ASSETS / "task_architectures.png"
PIPELINE_WIDTH = 1800
PIPELINE_HEIGHT = 1120
ARCHITECTURE_WIDTH = 1800
ARCHITECTURE_HEIGHT = 2450
COLORS = {
"blue": "#9bdfff",
"teal": "#7ae5c3",
"green": "#ccffa0",
"amber": "#d8f4a5",
"orange": "#b7ff91",
"red": "#ff8f7a",
"ink": "#f4f8ef",
"muted": "#a5afa2",
"line": "#2b4428",
}
TASK_GROUPS = [
("Label + State", "#9bdfff", ["timeline_action", "timeline_subtask", "next_action"]),
(
"Prediction + Reconstruction",
"#ccffa0",
["hand_trajectory_forecast", "modality_reconstruction", "contact_prediction"],
),
("Grounding + Retrieval", "#7ae5c3", ["caption_grounding", "cross_modal_retrieval", "object_relevance"]),
("Temporal Diagnostics", "#d8f4a5", ["transition_detection", "temporal_order", "misalignment_detection"]),
]
def data_uri(path: Path) -> str:
if not path.exists():
return ""
encoded = base64.b64encode(path.read_bytes()).decode("ascii")
return f"data:image/png;base64,{encoded}"
def esc(value: object) -> str:
return html.escape(str(value), quote=True)
def build_base_layer(path: Path, opacity: float) -> str:
uri = data_uri(path)
if not uri:
return ""
return f'
'
def stage_card(number: str, title: str, lines: list[str], color: str) -> str:
detail = "".join(f"{esc(line)}" for line in lines)
return f"""
{esc(number)}
{esc(title)}
"""
def arrow() -> str:
return '->
'
def build_pipeline_html(summary: dict, base_path: Path) -> str:
suite = summary["suite"]
task_count = len(suite["tasks"])
neural_count = len(suite.get("neural_tasks", {}))
stage_rows = [
[
stage_card(
"01",
"Raw public sample",
["annotation.hdf5", "6 MP4 videos with audio", f"{suite['num_frames']:,} aligned frames"],
COLORS["blue"],
),
arrow(),
stage_card(
"02",
"HOMIE loader",
["video, depth, pose", "mocap, IMU, language", "audio features"],
COLORS["teal"],
),
arrow(),
stage_card(
"03",
"Window builder",
[
f"{suite['window_frames']}-frame windows",
f"{suite['stride_frames']}-frame stride",
f"{suite['num_windows']:,} windows",
],
COLORS["green"],
),
arrow(),
stage_card(
"04",
"Feature vector",
[f"{suite['feature_dim']:,} dimensions", "18 named blocks incl. audio", "manifested slice indices"],
COLORS["orange"],
),
],
[
stage_card(
"05",
"Baseline models",
["motion-only classifiers", "current all-feature classifiers", "neural MLP task heads"],
COLORS["blue"],
),
arrow(),
stage_card(
"06",
"Ropedia Xperience-10M suite",
[f"{task_count} minimal + {neural_count} neural results", "forecast, retrieval, alignment", "chronological evaluation"],
COLORS["teal"],
),
arrow(),
stage_card(
"07",
"Published artifacts",
["metrics.json / csv / npz / pt", "GitHub Pages dashboard", "NN comparison charts"],
COLORS["green"],
),
],
]
rows_html = "".join(f'' for row in stage_rows)
checks = [
"Reproduction check: rerunning scripts to an ignored scratch workspace reproduced the committed metrics exactly.",
"Modality check: sample covers video, audio, depth, pose/SLAM, mocap, IMU, and language annotation.",
"Feature check: current baseline manifest has synchronized video/audio/depth/pose/mocap/IMU/language blocks.",
"Neural check: lightweight PyTorch MLP heads are reported beside the minimal task heads under neural_mlp/.",
"Scope check: this validates one public sample episode, not cross-episode generalization.",
]
checks_html = "".join(f"{esc(line)}" for line in checks)
base_layer = build_base_layer(base_path, 0.42)
return f"""
{base_layer}
verified single-episode pipeline
From Ropedia Xperience-10M episode to reproducible artifacts
The figure follows the actual code path and includes minimal heads plus neural MLP results. Next milestone: Qwen3-Omni fine-tuning with sensor-bridge evaluation on held-out multi-episode splits.
{suite['num_frames']:,}frames
{suite['num_windows']:,}windows
{suite['feature_dim']:,}features
{task_count}+{neural_count}min + NN tasks
{rows_html}
"""
def family_label(family: str) -> str:
return {
"softmax": "linear softmax",
"ridge": "ridge regression",
"ridge+rank": "ridge + cosine rank",
"multilabel": "multi-label logistic",
}.get(family, family)
def build_task_card(row: dict, color: str) -> str:
return f"""
{esc(family_label(row['family']))}
{esc(row['task'])}
- Input
- {esc(row['input'])}
- Head
- {esc(row['head'])}
- Output
- {esc(row['output'])}
Metric{esc(row['metric'])}
"""
def build_architecture_html(summary: dict, base_path: Path) -> str:
suite = summary["suite"]
neural_count = len(suite.get("neural_tasks", {}))
rows_by_task = {row["task"]: row for row in task_architecture_rows(summary)}
group_html = []
for title, color, task_names in TASK_GROUPS:
cards = "".join(build_task_card(rows_by_task[name], color) for name in task_names)
group_html.append(
f"""
"""
)
family_cards = [
("Linear softmax", "Minimal classifier for action, subtask, transition, contact, order, and alignment tasks.", COLORS["blue"]),
("Ridge regression", "Minimal closed-form projection for forecasting, reconstruction, and retrieval spaces.", COLORS["green"]),
("Multi-label logistic", "Minimal one-vs-rest sigmoid heads over the object vocabulary with top-1 fallback.", COLORS["orange"]),
("Neural MLP", "Optional PyTorch nonlinear classifier/regressor over the same features, splits, and metrics.", COLORS["red"]),
]
families = "".join(
f"""
{esc(title)}
{esc(desc)}
"""
for title, desc, color in family_cards
)
base_layer = build_base_layer(base_path, 0.36)
return f"""
{base_layer}
minimal + neural verified model architectures
12 Ropedia Xperience-10M tasks, minimal and NN heads
Each task uses the same aligned episode-window contract. The figure shows minimal heads beside neural MLP metrics; next milestone is Qwen3-Omni fine-tuning with sensor-bridge evaluation.
{len(suite['tasks'])}+{neural_count}min + NN tasks
Shared windows
{suite['num_frames']:,} frames to {suite['num_windows']:,} windows over video, audio, depth, pose, mocap, inertial, and language features.
Feature vector
X_all is {suite['feature_dim']:,} dimensions with 18 named modality blocks.
Reusable heads
Minimal softmax/ridge/logistic heads plus optional PyTorch MLP heads cover the whole suite.
Artifacts
Metrics, predictions, model weights, neural checkpoints, manifests, and the source summary report are committed.
"""
def render_html(html_text: str, output_path: Path, width: int, height: int, keep_html: Path | None = None) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)
if keep_html is None:
with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as handle:
handle.write(html_text)
html_path = Path(handle.name)
else:
html_path = keep_html
html_path.parent.mkdir(parents=True, exist_ok=True)
html_path.write_text(html_text, encoding="utf-8")
subprocess.run(
[
"npx",
"--yes",
"playwright",
"screenshot",
"--full-page",
f"--viewport-size={width},{height}",
html_path.resolve().as_uri(),
str(output_path),
],
check=True,
)
print(f"Wrote image: {output_path}")
print(f"Wrote render HTML: {html_path}")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--pipeline-base", type=Path, default=DEFAULT_PIPELINE_BASE)
parser.add_argument("--architecture-base", type=Path, default=DEFAULT_ARCHITECTURE_BASE)
parser.add_argument("--pipeline-output", type=Path, default=DEFAULT_PIPELINE_OUTPUT)
parser.add_argument("--architecture-output", type=Path, default=DEFAULT_ARCHITECTURE_OUTPUT)
parser.add_argument("--html-dir", type=Path, help="Optional directory for the intermediate render HTML files.")
parser.add_argument("--only", choices=["pipeline", "architecture", "both"], default="both")
args = parser.parse_args()
summary = collect_summary()
if args.only in {"pipeline", "both"}:
pipeline_html = build_pipeline_html(summary, args.pipeline_base)
html_path = args.html_dir / "pipeline_diagram.html" if args.html_dir else None
render_html(pipeline_html, args.pipeline_output, PIPELINE_WIDTH, PIPELINE_HEIGHT, html_path)
if args.only in {"architecture", "both"}:
architecture_html = build_architecture_html(summary, args.architecture_base)
html_path = args.html_dir / "task_architectures.html" if args.html_dir else None
render_html(architecture_html, args.architecture_output, ARCHITECTURE_WIDTH, ARCHITECTURE_HEIGHT, html_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())