#!/usr/bin/env python3 """Generate data.json for the G1 Moves showcase site. Reads manifest.json + scans the filesystem to determine which media files (GIF/MP4) exist for each clip at each pipeline stage. Usage: python generate_data.py # from space/ directory python generate_data.py --repo /path/to/g1-moves """ import argparse import json import re from pathlib import Path STAGES = ["capture", "retarget", "training", "policy"] # Descriptive subtitles for karate moves (from README) KARATE_SUBTITLES = { "M_Move1": "Guard Combo", "M_Move2": "Low Punch", "M_Move3": "Horse Stance", "M_Move4": "Spin Punch", "M_Move5": "Twist Punch", "M_Move6": "Spin Strike", "M_Move7": "Rapid Punch", "M_Move8": "Drop Spin", "M_Move9": "Level Change", "M_Move10": "Side Kick", "M_Move11": "Blitz", "M_Move17": "Double Strike", "M_Move18": "Front Kick", "M_Move19": "Slow Kata", "M_Move20": "Open Strike", "M_ShortMove12": "Quick Jab", "M_ShortMove13": "Snap Kick", "M_ShortMove14": "Light Punch", "M_ShortMove15": "Drop Strike", "M_ShortMove16": "Power Burst", } def format_display_name(clip_id: str) -> str: """Convert clip ID to human-readable name.""" name = re.sub(r"^[BJMV]_", "", clip_id) name = re.sub(r"([a-z\d])([A-Z])", r"\1 \2", name) name = re.sub(r"([A-Za-z])(\d)", r"\1 \2", name) name = name.replace("_", " ") return name def get_media_file(clip_id: str, stage: str) -> str: """Return the expected GIF filename for a clip at a given stage.""" if stage == "capture": return f"{clip_id}.gif" return f"{clip_id}_{stage}.gif" def scan_stage(repo: Path, category: str, clip_id: str, stage: str) -> dict | None: """Check for media files at a pipeline stage. Returns paths dict or None.""" stage_dir = repo / category / clip_id / stage result = {} gif_name = get_media_file(clip_id, stage) gif_path = stage_dir / gif_name if gif_path.exists(): result["gif"] = f"{category}/{clip_id}/{stage}/{gif_name}" # Also check for MP4 mp4_name = gif_name.replace(".gif", ".mp4") mp4_path = stage_dir / mp4_name if mp4_path.exists(): result["mp4"] = f"{category}/{clip_id}/{stage}/{mp4_name}" return result if result else None def main(): parser = argparse.ArgumentParser(description="Generate site data from manifest") parser.add_argument( "--repo", type=Path, default=Path(__file__).parent.parent, help="Path to g1-moves repository root", ) args = parser.parse_args() repo = args.repo.resolve() manifest_path = repo / "manifest.json" if not manifest_path.exists(): print(f"ERROR: manifest.json not found at {manifest_path}") return manifest = json.loads(manifest_path.read_text()) clips = [] stats = {"dance": 0, "karate": 0, "bonus": 0, "policies": 0, "total": 0} for clip_id, info in sorted(manifest["clips"].items()): cat = info["category"] stats[cat] = stats.get(cat, 0) + 1 stats["total"] += 1 stages = {} for stage in STAGES: media = scan_stage(repo, cat, clip_id, stage) if media: stages[stage] = media # Check for ONNX and PT policy files onnx_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.onnx" pt_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.pt" has_onnx = onnx_path.exists() has_pt = pt_path.exists() has_policy = "policy" in stages or has_onnx or has_pt if has_policy: stats["policies"] += 1 subtitle = KARATE_SUBTITLES.get(clip_id) display_name = format_display_name(clip_id) if subtitle: display_name = f"{display_name}: {subtitle}" clips.append( { "id": clip_id, "name": display_name, "category": cat, "performer": info.get("performer", "Unknown"), "duration": info.get("duration_s", 0), "fps": info.get("fps", 60), "frames": info.get("frames", 0), "stages": stages, "has_policy": has_policy, "has_onnx": has_onnx, } ) # Sort: categories grouped, then alphabetical cat_order = {"dance": 0, "karate": 1, "bonus": 2} clips.sort(key=lambda c: (cat_order.get(c["category"], 9), c["id"])) data = { "base_url": "https://huggingface.co/datasets/exptech/g1-moves/resolve/main", "clips": clips, "stats": stats, } output = Path(__file__).parent / "data.json" output.write_text(json.dumps(data, indent=2)) print(f"Generated {output.name}: {len(clips)} clips, {stats['policies']} policies") if __name__ == "__main__": main()