Publish Ropedia Xperience-10M task baseline cards

45c1706 verified 17 minutes ago

23.8 kB

	#!/usr/bin/env python3
	"""
	Render polished PNG overview figures for the Ropedia project page.

	The generated assets are used only as text-free visual backgrounds. All
	labels, dimensions, task names, and metrics are read from committed result
	files via scripts/generate_visualizations.py so the final figures stay
	traceable.
	"""

	from __future__ import annotations

	import argparse
	import base64
	import html
	import subprocess
	import tempfile
	from pathlib import Path

	from generate_visualizations import collect_summary, task_architecture_rows


	ROOT = Path(__file__).resolve().parents[1]
	ASSETS = ROOT / "docs/assets"
	DEFAULT_PIPELINE_BASE = ASSETS / "pipeline_diagram_base.png"
	DEFAULT_ARCHITECTURE_BASE = ASSETS / "task_architectures_base.png"
	DEFAULT_PIPELINE_OUTPUT = ASSETS / "pipeline_diagram.png"
	DEFAULT_ARCHITECTURE_OUTPUT = ASSETS / "task_architectures.png"

	PIPELINE_WIDTH = 1800
	PIPELINE_HEIGHT = 1120
	ARCHITECTURE_WIDTH = 1800
	ARCHITECTURE_HEIGHT = 2450


	COLORS = {
	"blue": "#9bdfff",
	"teal": "#7ae5c3",
	"green": "#ccffa0",
	"amber": "#d8f4a5",
	"orange": "#b7ff91",
	"red": "#ff8f7a",
	"ink": "#f4f8ef",
	"muted": "#a5afa2",
	"line": "#2b4428",
	}


	TASK_GROUPS = [
	("Label + State", "#9bdfff", ["timeline_action", "timeline_subtask", "next_action"]),
	(
	"Prediction + Reconstruction",
	"#ccffa0",
	["hand_trajectory_forecast", "modality_reconstruction", "contact_prediction"],
	),
	("Grounding + Retrieval", "#7ae5c3", ["caption_grounding", "cross_modal_retrieval", "object_relevance"]),
	("Temporal Diagnostics", "#d8f4a5", ["transition_detection", "temporal_order", "misalignment_detection"]),
	]


	def data_uri(path: Path) -> str:
	if not path.exists():
	return ""
	encoded = base64.b64encode(path.read_bytes()).decode("ascii")
	return f"data:image/png;base64,{encoded}"


	def esc(value: object) -> str:
	return html.escape(str(value), quote=True)


	def build_base_layer(path: Path, opacity: float) -> str:
	uri = data_uri(path)
	if not uri:
	return ""
	return f'<div class="base-layer" style="background-image:url({uri});opacity:{opacity};"></div>'


	def stage_card(number: str, title: str, lines: list[str], color: str) -> str:
	detail = "".join(f"<li>{esc(line)}</li>" for line in lines)
	return f"""
	<article class="stage" style="--accent:{color}">
	<div class="stage-number">{esc(number)}</div>
	<h3>{esc(title)}</h3>
	<ul>{detail}</ul>
	</article>
	"""


	def arrow() -> str:
	return '<div class="flow-arrow" aria-hidden="true">-></div>'


	def build_pipeline_html(summary: dict, base_path: Path) -> str:
	suite = summary["suite"]
	task_count = len(suite["tasks"])
	neural_count = len(suite.get("neural_tasks", {}))
	stage_rows = [
	[
	stage_card(
	"01",
	"Raw public sample",
	["annotation.hdf5", "6 MP4 videos with audio", f"{suite['num_frames']:,} aligned frames"],
	COLORS["blue"],
	),
	arrow(),
	stage_card(
	"02",
	"HOMIE loader",
	["video, depth, pose", "mocap, IMU, language", "audio features"],
	COLORS["teal"],
	),
	arrow(),
	stage_card(
	"03",
	"Window builder",
	[
	f"{suite['window_frames']}-frame windows",
	f"{suite['stride_frames']}-frame stride",
	f"{suite['num_windows']:,} windows",
	],
	COLORS["green"],
	),
	arrow(),
	stage_card(
	"04",
	"Feature vector",
	[f"{suite['feature_dim']:,} dimensions", "18 named blocks incl. audio", "manifested slice indices"],
	COLORS["orange"],
	),
	],
	[
	stage_card(
	"05",
	"Baseline models",
	["motion-only classifiers", "current all-feature classifiers", "neural MLP task heads"],
	COLORS["blue"],
	),
	arrow(),
	stage_card(
	"06",
	"Ropedia Xperience-10M suite",
	[f"{task_count} minimal + {neural_count} neural results", "forecast, retrieval, alignment", "chronological evaluation"],
	COLORS["teal"],
	),
	arrow(),
	stage_card(
	"07",
	"Published artifacts",
	["metrics.json / csv / npz / pt", "GitHub Pages dashboard", "NN comparison charts"],
	COLORS["green"],
	),
	],
	]
	rows_html = "".join(f'<section class="flow-row">{"".join(row)}</section>' for row in stage_rows)
	checks = [
	"Reproduction check: rerunning scripts to an ignored scratch workspace reproduced the committed metrics exactly.",
	"Modality check: sample covers video, audio, depth, pose/SLAM, mocap, IMU, and language annotation.",
	"Feature check: current baseline manifest has synchronized video/audio/depth/pose/mocap/IMU/language blocks.",
	"Neural check: lightweight PyTorch MLP heads are reported beside the minimal task heads under neural_mlp/.",
	"Scope check: this validates one public sample episode, not cross-episode generalization.",
	]
	checks_html = "".join(f"<li>{esc(line)}</li>" for line in checks)
	base_layer = build_base_layer(base_path, 0.42)
	return f"""<!doctype html>
	<html lang="en">
	<head>
	<meta charset="utf-8">
	<style>
	* {{ box-sizing: border-box; }}
	body {{ margin: 0; background: #020502; font-family: "Inter Tight", "Space Grotesk", Arial, sans-serif; }}
	.canvas {{
	position: relative;
	width: {PIPELINE_WIDTH}px;
	height: {PIPELINE_HEIGHT}px;
	overflow: hidden;
	color: #f4f8ef;
	background:
	radial-gradient(circle at 78% 24%, rgba(167,240,120,0.18), transparent 24%),
	radial-gradient(circle, rgba(167,240,120,0.16) 1px, transparent 2px),
	#020502;
	background-size: auto, 18px 18px, auto;
	}}
	.base-layer {{
	position: absolute;
	inset: 0;
	background-size: cover;
	background-position: center;
	filter: saturate(1.08) contrast(1.05) brightness(0.48);
	}}
	.wash {{
	position: absolute;
	inset: 0;
	background: linear-gradient(180deg, rgba(2,5,2,0.76), rgba(2,5,2,0.94));
	}}
	.content {{
	position: relative;
	padding: 66px 82px;
	height: 100%;
	}}
	header {{
	display: grid;
	grid-template-columns: 1fr auto;
	gap: 44px;
	align-items: start;
	margin-bottom: 42px;
	}}
	.kicker {{
	font: 700 17px "SF Mono", Menlo, monospace;
	color: #ccffa0;
	text-transform: uppercase;
	letter-spacing: 0.09em;
	margin-bottom: 14px;
	}}
	h1 {{
	margin: 0;
	font-size: 56px;
	line-height: 0.98;
	letter-spacing: 0;
	}}
	.subtitle {{
	margin: 18px 0 0;
	max-width: 1010px;
	color: #dce8d7;
	font-size: 24px;
	line-height: 1.42;
	font-weight: 520;
	}}
	.metrics {{
	display: grid;
	grid-template-columns: repeat(2, 150px);
	gap: 12px;
	margin-top: 2px;
	}}
	.metric {{
	background: rgba(7,18,7,0.86);
	border: 1px solid rgba(167,240,120,0.26);
	border-radius: 8px;
	padding: 13px 15px 12px;
	box-shadow: 0 16px 44px rgba(0,0,0,0.42);
	}}
	.metric strong {{
	display: block;
	font: 850 24px "SF Mono", Menlo, monospace;
	color: #f4f8ef;
	line-height: 1;
	font-variant-numeric: tabular-nums;
	}}
	.metric span {{
	display: block;
	margin-top: 7px;
	color: #a5afa2;
	font-size: 14px;
	font-weight: 650;
	}}
	.flow-row {{
	display: flex;
	align-items: center;
	gap: 18px;
	margin-top: 30px;
	}}
	.flow-row:nth-of-type(2) {{
	width: 78%;
	margin-left: auto;
	margin-right: auto;
	margin-top: 38px;
	}}
	.stage {{
	min-width: 0;
	flex: 1 1 0;
	height: 182px;
	position: relative;
	background: rgba(7,18,7,0.86);
	border: 1px solid rgba(167,240,120,0.24);
	border-radius: 8px;
	padding: 24px 24px 22px 30px;
	box-shadow: 0 24px 62px rgba(0,0,0,0.40);
	backdrop-filter: blur(12px);
	}}
	.stage::before {{
	content: "";
	position: absolute;
	inset: 0 auto 0 0;
	width: 8px;
	border-radius: 8px 0 0 8px;
	background: var(--accent);
	}}
	.stage-number {{
	color: var(--accent);
	font: 850 16px "SF Mono", Menlo, monospace;
	letter-spacing: 0.04em;
	margin-bottom: 10px;
	}}
	.stage h3 {{
	margin: 0 0 13px;
	font-size: 24px;
	line-height: 1.08;
	letter-spacing: 0;
	}}
	.stage ul {{
	margin: 0;
	padding: 0;
	list-style: none;
	color: #dce8d7;
	font-size: 17px;
	line-height: 1.48;
	font-weight: 560;
	}}
	.flow-arrow {{
	width: 54px;
	flex: 0 0 54px;
	height: 54px;
	display: grid;
	place-items: center;
	border-radius: 999px;
	border: 1px solid rgba(167,240,120,0.26);
	background: rgba(7,18,7,0.78);
	color: #ccffa0;
	font: 850 22px "SF Mono", Menlo, monospace;
	box-shadow: 0 14px 34px rgba(0,0,0,0.36);
	}}
	.audit {{
	position: absolute;
	left: 82px;
	right: 82px;
	bottom: 62px;
	display: grid;
	grid-template-columns: 190px 1fr;
	gap: 26px;
	align-items: center;
	background: rgba(7,18,7,0.88);
	border: 1px solid rgba(167,240,120,0.24);
	border-radius: 8px;
	padding: 24px 28px;
	box-shadow: 0 22px 52px rgba(0,0,0,0.42);
	}}
	.audit strong {{
	color: #f4f8ef;
	font-size: 23px;
	line-height: 1.1;
	}}
	.audit ul {{
	margin: 0;
	padding: 0;
	list-style: none;
	color: #dce8d7;
	font-size: 17px;
	line-height: 1.55;
	font-weight: 560;
	}}
	</style>
	</head>
	<body>
	<main class="canvas">
	{base_layer}
	<div class="wash"></div>
	<div class="content">
	<header>
	<div>
	<div class="kicker">verified single-episode pipeline</div>
	<h1>From Ropedia Xperience-10M episode to reproducible artifacts</h1>
	<p class="subtitle">The figure follows the actual code path and includes minimal heads plus neural MLP results. Next milestone: Qwen3-Omni fine-tuning with sensor-bridge evaluation on held-out multi-episode splits.</p>
	</div>
	<div class="metrics">
	<div class="metric"><strong>{suite['num_frames']:,}</strong><span>frames</span></div>
	<div class="metric"><strong>{suite['num_windows']:,}</strong><span>windows</span></div>
	<div class="metric"><strong>{suite['feature_dim']:,}</strong><span>features</span></div>
	<div class="metric"><strong>{task_count}+{neural_count}</strong><span>min + NN tasks</span></div>
	</div>
	</header>
	{rows_html}
	<section class="audit">
	<strong>Reproducibility checks</strong>
	<ul>{checks_html}</ul>
	</section>
	</div>
	</main>
	</body>
	</html>
	"""


	def family_label(family: str) -> str:
	return {
	"softmax": "linear softmax",
	"ridge": "ridge regression",
	"ridge+rank": "ridge + cosine rank",
	"multilabel": "multi-label logistic",
	}.get(family, family)


	def build_task_card(row: dict, color: str) -> str:
	return f"""
	<article class="task-card" style="--accent:{color}">
	<div class="chip">{esc(family_label(row['family']))}</div>
	<h3>{esc(row['task'])}</h3>
	<dl>
	<dt>Input</dt><dd>{esc(row['input'])}</dd>
	<dt>Head</dt><dd>{esc(row['head'])}</dd>
	<dt>Output</dt><dd>{esc(row['output'])}</dd>
	</dl>
	<div class="metric-line"><span>Metric</span><strong>{esc(row['metric'])}</strong></div>
	</article>
	"""


	def build_architecture_html(summary: dict, base_path: Path) -> str:
	suite = summary["suite"]
	neural_count = len(suite.get("neural_tasks", {}))
	rows_by_task = {row["task"]: row for row in task_architecture_rows(summary)}
	group_html = []
	for title, color, task_names in TASK_GROUPS:
	cards = "".join(build_task_card(rows_by_task[name], color) for name in task_names)
	group_html.append(
	f"""
	<section class="task-group" style="--accent:{color}">
	<div class="group-head">
	<span></span>
	<h2>{esc(title)}</h2>
	</div>
	<div class="group-cards">{cards}</div>
	</section>
	"""
	)

	family_cards = [
	("Linear softmax", "Minimal classifier for action, subtask, transition, contact, order, and alignment tasks.", COLORS["blue"]),
	("Ridge regression", "Minimal closed-form projection for forecasting, reconstruction, and retrieval spaces.", COLORS["green"]),
	("Multi-label logistic", "Minimal one-vs-rest sigmoid heads over the object vocabulary with top-1 fallback.", COLORS["orange"]),
	("Neural MLP", "Optional PyTorch nonlinear classifier/regressor over the same features, splits, and metrics.", COLORS["red"]),
	]
	families = "".join(
	f"""
	<article class="family" style="--accent:{color}">
	<h3>{esc(title)}</h3>
	<p>{esc(desc)}</p>
	</article>
	"""
	for title, desc, color in family_cards
	)
	base_layer = build_base_layer(base_path, 0.36)
	return f"""<!doctype html>
	<html lang="en">
	<head>
	<meta charset="utf-8">
	<style>
	* {{ box-sizing: border-box; }}
	body {{ margin: 0; background: #020502; font-family: "Inter Tight", "Space Grotesk", Arial, sans-serif; }}
	.canvas {{
	position: relative;
	width: {ARCHITECTURE_WIDTH}px;
	height: {ARCHITECTURE_HEIGHT}px;
	overflow: hidden;
	color: #f4f8ef;
	background:
	radial-gradient(circle at 76% 18%, rgba(167,240,120,0.16), transparent 24%),
	radial-gradient(circle, rgba(167,240,120,0.13) 1px, transparent 2px),
	#020502;
	background-size: auto, 18px 18px, auto;
	}}
	.base-layer {{
	position: absolute;
	inset: 0;
	background-size: cover;
	background-position: center;
	filter: saturate(1.08) contrast(1.05) brightness(0.48);
	}}
	.wash {{
	position: absolute;
	inset: 0;
	background: linear-gradient(180deg, rgba(2,5,2,0.76), rgba(2,5,2,0.94));
	}}
	.content {{
	position: relative;
	height: 100%;
	padding: 58px 74px 64px;
	}}
	header {{
	display: grid;
	grid-template-columns: 1fr auto;
	gap: 42px;
	align-items: start;
	margin-bottom: 28px;
	}}
	.kicker {{
	font: 700 16px "SF Mono", Menlo, monospace;
	color: #ccffa0;
	text-transform: uppercase;
	letter-spacing: 0.09em;
	margin-bottom: 13px;
	}}
	h1 {{
	margin: 0;
	font-size: 52px;
	line-height: 1;
	letter-spacing: 0;
	}}
	.subtitle {{
	margin: 15px 0 0;
	max-width: 1060px;
	color: #dce8d7;
	font-size: 22px;
	line-height: 1.42;
	font-weight: 520;
	}}
	.summary-pill {{
	display: grid;
	place-items: center;
	min-width: 188px;
	min-height: 112px;
	border: 1px solid rgba(167,240,120,0.26);
	border-radius: 8px;
	background: rgba(7,18,7,0.86);
	box-shadow: 0 18px 44px rgba(0,0,0,0.42);
	text-align: center;
	}}
	.summary-pill strong {{
	font: 850 36px "SF Mono", Menlo, monospace;
	line-height: 1;
	}}
	.summary-pill span {{
	display: block;
	margin-top: 8px;
	color: #a5afa2;
	font-size: 15px;
	font-weight: 700;
	}}
	.shared {{
	display: grid;
	grid-template-columns: repeat(4, 1fr);
	gap: 18px;
	margin-bottom: 24px;
	}}
	.shared article {{
	min-height: 110px;
	border: 1px solid rgba(167,240,120,0.24);
	border-radius: 8px;
	background: rgba(7,18,7,0.86);
	padding: 20px 22px;
	box-shadow: 0 18px 44px rgba(0,0,0,0.36);
	}}
	.shared h2 {{
	margin: 0 0 9px;
	font-size: 22px;
	line-height: 1.08;
	}}
	.shared p {{
	margin: 0;
	color: #dce8d7;
	font-size: 16px;
	line-height: 1.38;
	font-weight: 560;
	}}
	.families {{
	display: grid;
	grid-template-columns: repeat(4, 1fr);
	gap: 18px;
	margin-bottom: 28px;
	}}
	.family {{
	min-height: 124px;
	border: 1px solid rgba(167,240,120,0.24);
	border-radius: 8px;
	background: rgba(7,18,7,0.82);
	padding: 20px 20px 18px;
	box-shadow: 0 16px 40px rgba(0,0,0,0.34);
	}}
	.family h3 {{
	margin: 0 0 10px;
	color: var(--accent);
	font-size: 21px;
	line-height: 1.08;
	}}
	.family p {{
	margin: 0;
	color: #dce8d7;
	font-size: 15px;
	line-height: 1.42;
	font-weight: 560;
	}}
	.task-groups {{
	display: grid;
	grid-template-columns: repeat(2, minmax(0, 1fr));
	gap: 24px;
	}}
	.task-group {{
	border: 1px solid rgba(167,240,120,0.22);
	border-radius: 8px;
	background: rgba(7,18,7,0.74);
	padding: 22px;
	box-shadow: 0 22px 54px rgba(0,0,0,0.42);
	backdrop-filter: blur(10px);
	}}
	.group-head {{
	display: flex;
	align-items: center;
	gap: 11px;
	margin-bottom: 18px;
	}}
	.group-head span {{
	width: 12px;
	height: 34px;
	border-radius: 999px;
	background: var(--accent);
	}}
	.group-head h2 {{
	margin: 0;
	font-size: 24px;
	line-height: 1.08;
	color: var(--accent);
	}}
	.group-cards {{
	display: grid;
	gap: 14px;
	}}
	.task-card {{
	min-height: 230px;
	position: relative;
	border: 1px solid color-mix(in srgb, var(--accent), #020502 66%);
	border-radius: 8px;
	background: rgba(7,18,7,0.92);
	padding: 18px 20px 17px;
	overflow: hidden;
	}}
	.task-card::before {{
	content: "";
	position: absolute;
	inset: 0 auto 0 0;
	width: 6px;
	background: var(--accent);
	opacity: 0.92;
	}}
	.chip {{
	display: inline-flex;
	border: 1px solid color-mix(in srgb, var(--accent), #ffffff 35%);
	border-radius: 6px;
	padding: 4px 8px;
	color: var(--accent);
	font: 850 11px "SF Mono", Menlo, monospace;
	text-transform: uppercase;
	letter-spacing: 0.03em;
	background: rgba(7,18,7,0.72);
	}}
	.task-card h3 {{
	margin: 13px 0 12px;
	color: #f4f8ef;
	font-size: 21px;
	line-height: 1.08;
	overflow-wrap: anywhere;
	}}
	dl {{
	display: grid;
	grid-template-columns: 54px 1fr;
	gap: 5px 9px;
	margin: 0;
	color: #dce8d7;
	font-size: 13px;
	line-height: 1.32;
	font-weight: 560;
	}}
	dt {{
	color: var(--accent);
	font: 850 10px "SF Mono", Menlo, monospace;
	text-transform: uppercase;
	letter-spacing: 0.04em;
	}}
	dd {{ margin: 0; }}
	.metric-line {{
	display: flex;
	justify-content: space-between;
	gap: 12px;
	align-items: center;
	margin-top: 12px;
	border-top: 1px solid rgba(167,240,120,0.16);
	padding-top: 12px;
	font-size: 13px;
	font-weight: 700;
	}}
	.metric-line span {{
	color: #a5afa2;
	font: 850 11px "SF Mono", Menlo, monospace;
	text-transform: uppercase;
	}}
	.metric-line strong {{
	color: var(--accent);
	font: 850 15px "SF Mono", Menlo, monospace;
	text-align: right;
	font-variant-numeric: tabular-nums;
	}}
	</style>
	</head>
	<body>
	<main class="canvas">
	{base_layer}
	<div class="wash"></div>
	<div class="content">
	<header>
	<div>
	<div class="kicker">minimal + neural verified model architectures</div>
	<h1>12 Ropedia Xperience-10M tasks, minimal and NN heads</h1>
	<p class="subtitle">Each task uses the same aligned episode-window contract. The figure shows minimal heads beside neural MLP metrics; next milestone is Qwen3-Omni fine-tuning with sensor-bridge evaluation.</p>
	</div>
	<div class="summary-pill"><strong>{len(suite['tasks'])}+{neural_count}</strong><span>min + NN tasks</span></div>
	</header>
	<section class="shared">
	<article><h2>Shared windows</h2><p>{suite['num_frames']:,} frames to {suite['num_windows']:,} windows over video, audio, depth, pose, mocap, inertial, and language features.</p></article>
	<article><h2>Feature vector</h2><p>X_all is {suite['feature_dim']:,} dimensions with 18 named modality blocks.</p></article>
	<article><h2>Reusable heads</h2><p>Minimal softmax/ridge/logistic heads plus optional PyTorch MLP heads cover the whole suite.</p></article>
	<article><h2>Artifacts</h2><p>Metrics, predictions, model weights, neural checkpoints, manifests, and the source summary report are committed.</p></article>
	</section>
	<section class="families">{families}</section>
	<section class="task-groups">{"".join(group_html)}</section>
	</div>
	</main>
	</body>
	</html>
	"""


	def render_html(html_text: str, output_path: Path, width: int, height: int, keep_html: Path \| None = None) -> None:
	output_path.parent.mkdir(parents=True, exist_ok=True)
	if keep_html is None:
	with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as handle:
	handle.write(html_text)
	html_path = Path(handle.name)
	else:
	html_path = keep_html
	html_path.parent.mkdir(parents=True, exist_ok=True)
	html_path.write_text(html_text, encoding="utf-8")
	subprocess.run(
	[
	"npx",
	"--yes",
	"playwright",
	"screenshot",
	"--full-page",
	f"--viewport-size={width},{height}",
	html_path.resolve().as_uri(),
	str(output_path),
	],
	check=True,
	)
	print(f"Wrote image: {output_path}")
	print(f"Wrote render HTML: {html_path}")


	def main() -> int:
	parser = argparse.ArgumentParser()
	parser.add_argument("--pipeline-base", type=Path, default=DEFAULT_PIPELINE_BASE)
	parser.add_argument("--architecture-base", type=Path, default=DEFAULT_ARCHITECTURE_BASE)
	parser.add_argument("--pipeline-output", type=Path, default=DEFAULT_PIPELINE_OUTPUT)
	parser.add_argument("--architecture-output", type=Path, default=DEFAULT_ARCHITECTURE_OUTPUT)
	parser.add_argument("--html-dir", type=Path, help="Optional directory for the intermediate render HTML files.")
	parser.add_argument("--only", choices=["pipeline", "architecture", "both"], default="both")
	args = parser.parse_args()

	summary = collect_summary()
	if args.only in {"pipeline", "both"}:
	pipeline_html = build_pipeline_html(summary, args.pipeline_base)
	html_path = args.html_dir / "pipeline_diagram.html" if args.html_dir else None
	render_html(pipeline_html, args.pipeline_output, PIPELINE_WIDTH, PIPELINE_HEIGHT, html_path)
	if args.only in {"architecture", "both"}:
	architecture_html = build_architecture_html(summary, args.architecture_base)
	html_path = args.html_dir / "task_architectures.html" if args.html_dir else None
	render_html(architecture_html, args.architecture_output, ARCHITECTURE_WIDTH, ARCHITECTURE_HEIGHT, html_path)
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())