ropedia-xperience-10m-task-baselines / scripts /validate_task_surface.py
cy0307's picture
Publish Ropedia Xperience-10M task baseline cards
1e688c9 verified
#!/usr/bin/env python3
"""Validate the public 12-task card and walkthrough surface.
This gate is deliberately about presentation integrity, not model quality. The
repo keeps snake_case artifact ids for reproducibility, but the public website
task cards and interactive player should use research-readable names and clear
input/process/output wording.
"""
from __future__ import annotations
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
TASK_JSON = ROOT / "docs/data/task_walkthroughs.json"
WEBSITE = ROOT / "docs/index.html"
WALKTHROUGH_MD = ROOT / "results/episode_task_suite/task_walkthroughs/TASK_WALKTHROUGHS.md"
OUTPUT = ROOT / "docs/data/task_surface_integrity.json"
EXPECTED_TASKS = {
"timeline_action": "Action Recognition",
"timeline_subtask": "Procedure Step Recognition",
"transition_detection": "Action Boundary Detection",
"next_action": "Next-Action Prediction",
"hand_trajectory_forecast": "Hand Trajectory Forecasting",
"contact_prediction": "Contact State Prediction",
"object_relevance": "Object Relevance Prediction",
"caption_grounding": "Language Grounding",
"cross_modal_retrieval": "Cross-Modal Retrieval",
"modality_reconstruction": "Cross-Modal Reconstruction",
"temporal_order": "Temporal Order Verification",
"misalignment_detection": "Multimodal Synchronization Detection",
}
EXPECTED_EXTENSION_NAMES = {
"body_motion_intensity": "Body and Hand Motion Intensity",
"multi_view_consistency_retrieval": "Multi-View Consistency Retrieval",
"action_phase_progress": "Action Phase Progress Estimation",
"ego_motion_forecast": "Short-Horizon Ego-Motion Forecasting",
}
REQUIRED_TASK_FIELDS = {
"display_name",
"research_name",
"task_family",
"architecture_family",
"primary_direction",
"card_blurb",
"input_short",
"process_short",
"output_short",
"modalities",
"poster_modality",
"case_study",
"input",
"output",
"middle_modules",
"metric",
"failure_mode",
"artifact_id",
"plain_goal",
}
DISPLAY_FIELDS = {
"display_name",
"research_name",
"card_blurb",
"input_short",
"process_short",
"output_short",
"plain_goal",
}
ALLOWED_FAMILIES = {"supervised", "forecast", "retrieval", "diagnostic"}
MODALITY_ASSETS = {
"video": "docs/assets/modalities/video.jpg",
"audio": "docs/assets/modalities/audio.png",
"depth": "docs/assets/modalities/depth.jpg",
"pose_slam": "docs/assets/modalities/pose_slam.png",
"motion_capture": "docs/assets/modalities/motion_capture.png",
"inertial": "docs/assets/modalities/inertial.png",
"language": "docs/assets/modalities/language.png",
}
RAW_ID_PATTERN = re.compile(r"\b[a-z]+(?:_[a-z0-9]+)+\b")
def load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def check(condition: bool, name: str, failures: list[dict[str, Any]], **details: Any) -> dict[str, Any]:
record = {"name": name, "status": "pass" if condition else "fail", **details}
if not condition:
failures.append(record)
return record
def function_body(source: str, name: str) -> str:
marker = f"function {name}("
start = source.find(marker)
if start < 0:
return ""
brace = source.find("{", start)
if brace < 0:
return ""
depth = 0
for index in range(brace, len(source)):
char = source[index]
if char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0:
return source[start : index + 1]
return source[start:]
def validate_tasks(payload: dict[str, Any], failures: list[dict[str, Any]]) -> list[dict[str, Any]]:
checks: list[dict[str, Any]] = []
tasks = payload.get("tasks", {})
checks.append(check(isinstance(tasks, dict), "tasks_object_present", failures))
if not isinstance(tasks, dict):
return checks
task_ids = set(tasks)
checks.append(
check(
len(tasks) == len(EXPECTED_TASKS),
"exactly_12_tasks",
failures,
observed=len(tasks),
expected=len(EXPECTED_TASKS),
)
)
checks.append(
check(
task_ids == set(EXPECTED_TASKS),
"expected_task_ids_present",
failures,
missing=sorted(set(EXPECTED_TASKS) - task_ids),
extra=sorted(task_ids - set(EXPECTED_TASKS)),
)
)
for task_id, task in tasks.items():
if not isinstance(task, dict):
checks.append(check(False, f"{task_id}: task_record_object", failures))
continue
missing_fields = sorted(REQUIRED_TASK_FIELDS - set(task))
checks.append(
check(not missing_fields, f"{task_id}: required_fields", failures, missing=missing_fields)
)
expected_name = EXPECTED_TASKS.get(task_id)
checks.append(
check(
task.get("display_name") == expected_name,
f"{task_id}: human_readable_display_name",
failures,
expected=expected_name,
observed=task.get("display_name"),
)
)
checks.append(
check(
task.get("artifact_id") == task_id,
f"{task_id}: artifact_id_matches_key",
failures,
observed=task.get("artifact_id"),
)
)
for field in DISPLAY_FIELDS:
value = str(task.get(field, ""))
raw_hits = [hit for hit in RAW_ID_PATTERN.findall(value) if hit in EXPECTED_TASKS or hit in MODALITY_ASSETS]
checks.append(
check(
not raw_hits,
f"{task_id}: public_field_{field}_is_human_readable",
failures,
value=value,
raw_hits=raw_hits,
)
)
family = task.get("task_family")
checks.append(
check(
family in ALLOWED_FAMILIES,
f"{task_id}: known_task_family",
failures,
observed=family,
allowed=sorted(ALLOWED_FAMILIES),
)
)
modalities = task.get("modalities", [])
checks.append(
check(
isinstance(modalities, list) and modalities,
f"{task_id}: modality_list_present",
failures,
observed=modalities,
)
)
if isinstance(modalities, list):
unknown = [item for item in modalities if item not in MODALITY_ASSETS]
missing_assets = [
MODALITY_ASSETS[item]
for item in modalities
if item in MODALITY_ASSETS and not (ROOT / MODALITY_ASSETS[item]).exists()
]
checks.append(
check(
not unknown,
f"{task_id}: known_modalities",
failures,
unknown=unknown,
)
)
checks.append(
check(
not missing_assets,
f"{task_id}: modality_assets_exist",
failures,
missing=missing_assets,
)
)
checks.append(
check(
task.get("poster_modality") in modalities,
f"{task_id}: poster_modality_in_task_modalities",
failures,
poster_modality=task.get("poster_modality"),
modalities=modalities,
)
)
metric = task.get("metric", {})
metric_ok = (
isinstance(metric, dict)
and isinstance(metric.get("name"), str)
and isinstance(metric.get("direction"), str)
and isinstance(metric.get("minimal"), (int, float))
and isinstance(metric.get("neural_mlp"), (int, float))
)
checks.append(
check(
metric_ok,
f"{task_id}: numeric_minimal_and_neural_metrics",
failures,
metric=metric,
)
)
checks.append(
check(
isinstance(task.get("middle_modules"), list) and len(task.get("middle_modules", [])) >= 3,
f"{task_id}: middle_modules_explain_process",
failures,
observed_count=len(task.get("middle_modules", [])) if isinstance(task.get("middle_modules"), list) else 0,
)
)
return checks
def validate_markdown(source: str, tasks: dict[str, Any], failures: list[dict[str, Any]]) -> list[dict[str, Any]]:
checks: list[dict[str, Any]] = []
for task_id, display_name in EXPECTED_TASKS.items():
expected_heading = f"### {display_name} (`{task_id}`)"
checks.append(
check(
expected_heading in source,
f"markdown_heading_present:{task_id}",
failures,
expected=expected_heading,
)
)
checks.append(
check(
source.count("### ") == len(EXPECTED_TASKS),
"markdown_has_12_task_sections",
failures,
observed=source.count("### "),
)
)
checks.append(
check(
all(str(task.get("case_study", "")) in source for task in tasks.values()),
"markdown_contains_case_studies",
failures,
)
)
return checks
def validate_website(source: str, failures: list[dict[str, Any]]) -> list[dict[str, Any]]:
checks: list[dict[str, Any]] = []
required_markers = [
'id="taskPlayer"',
'id="taskGrid"',
'id="walkthroughSelector"',
'id="playerStoryboard"',
'id="playerFrameChip"',
'id="playerFrameCaption"',
'id="playerScrub"',
'fetch("data/task_walkthroughs.json"',
'class="task-card"',
'class="task-card-media"',
'class="story-button',
'class="flow-step',
'id="playerPlay"',
'id="playerPrev"',
'id="playerNext"',
]
for marker in required_markers:
checks.append(
check(marker in source, f"website_marker_present:{marker}", failures, marker=marker)
)
task_card_renderer = function_body(source, "renderTaskCards")
selector_renderer = function_body(source, "renderSelector")
player_renderer = function_body(source, "renderPlayer")
checks.append(
check(
"artifact-id" not in source,
"website_no_artifact_id_css_or_markup",
failures,
)
)
checks.append(
check(
"artifact_id" not in task_card_renderer,
"task_cards_do_not_render_artifact_ids",
failures,
)
)
checks.append(
check(
"task.display_name" in task_card_renderer and "task.research_name" in task_card_renderer,
"task_cards_render_human_names",
failures,
)
)
checks.append(
check(
"task.input_short" in task_card_renderer and "task.process_short" in task_card_renderer and "task.output_short" in task_card_renderer,
"task_cards_render_input_process_output",
failures,
)
)
checks.append(
check(
"task.poster_modality" in task_card_renderer and "task-card-media" in task_card_renderer,
"task_cards_use_representative_modality_thumbnail",
failures,
)
)
checks.append(
check(
all(
needle in player_renderer
for needle in ["playerPoster", "middle_modules"]
)
and all(needle in source for needle in ["playerProgress", "renderStageFrame(task, index)"])
and all(needle in source for needle in ['id="playerPlay"', 'id="playerPrev"', 'id="playerNext"']),
"interactive_player_wired_to_task_metadata",
failures,
)
)
checks.append(
check(
all(needle in source for needle in ["function setActiveStage", "function advancePlayer", "playerScrub"]),
"interactive_video_storyboard_controls_present",
failures,
)
)
checks.append(
check(
"task.display_name" in selector_renderer and "artifact_id" not in selector_renderer,
"selector_uses_human_names",
failures,
)
)
for artifact_id, display_name in EXPECTED_EXTENSION_NAMES.items():
checks.append(
check(
f"<h3>{artifact_id}</h3>" not in source and display_name in source,
f"extension_probe_uses_human_name:{artifact_id}",
failures,
expected=display_name,
)
)
return checks
def build_report() -> dict[str, Any]:
failures: list[dict[str, Any]] = []
checks: list[dict[str, Any]] = []
inputs_present = {
"task_walkthroughs_json": TASK_JSON.exists(),
"website_index": WEBSITE.exists(),
"walkthrough_markdown": WALKTHROUGH_MD.exists(),
}
checks.append(
check(
all(inputs_present.values()),
"required_task_surface_inputs_present",
failures,
inputs=inputs_present,
)
)
if not all(inputs_present.values()):
return {
"status": "fail",
"generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
"summary": {"task_count": 0, "failure_count": len(failures)},
"checks": checks,
"failures": failures,
}
task_payload = load_json(TASK_JSON)
website_source = WEBSITE.read_text(encoding="utf-8")
markdown_source = WALKTHROUGH_MD.read_text(encoding="utf-8")
tasks = task_payload.get("tasks", {}) if isinstance(task_payload.get("tasks", {}), dict) else {}
checks.extend(validate_tasks(task_payload, failures))
checks.extend(validate_markdown(markdown_source, tasks, failures))
checks.extend(validate_website(website_source, failures))
task_families = {}
task_modalities = {}
for task in tasks.values():
family = task.get("task_family")
if isinstance(family, str):
task_families[family] = task_families.get(family, 0) + 1
for modality in task.get("modalities", []):
task_modalities[modality] = task_modalities.get(modality, 0) + 1
return {
"status": "pass" if not failures else "fail",
"generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
"summary": {
"task_count": len(tasks),
"expected_task_count": len(EXPECTED_TASKS),
"task_family_counts": dict(sorted(task_families.items())),
"modality_usage_counts": dict(sorted(task_modalities.items())),
"interactive_surface": "task cards plus scrub/play/chapter walkthrough storyboard",
"failure_count": len(failures),
},
"checks": checks,
"failures": failures,
}
def main() -> int:
report = build_report()
OUTPUT.parent.mkdir(parents=True, exist_ok=True)
OUTPUT.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
print(f"{report['status'].upper()}: wrote {OUTPUT}")
if report["status"] != "pass":
for failure in report["failures"][:40]:
print(f"- {failure['name']}")
if len(report["failures"]) > 40:
print(f"- ... {len(report['failures']) - 40} more failures")
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())