Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Generate data.json for the G1 Moves showcase site. | |
| Reads manifest.json + scans the filesystem to determine which media | |
| files (GIF/MP4) exist for each clip at each pipeline stage. | |
| Usage: | |
| python generate_data.py # from space/ directory | |
| python generate_data.py --repo /path/to/g1-moves | |
| """ | |
| import argparse | |
| import json | |
| import re | |
| from pathlib import Path | |
| STAGES = ["capture", "retarget", "training", "policy"] | |
| # Descriptive subtitles for karate moves (from README) | |
| KARATE_SUBTITLES = { | |
| "M_Move1": "Guard Combo", | |
| "M_Move2": "Low Punch", | |
| "M_Move3": "Horse Stance", | |
| "M_Move4": "Spin Punch", | |
| "M_Move5": "Twist Punch", | |
| "M_Move6": "Spin Strike", | |
| "M_Move7": "Rapid Punch", | |
| "M_Move8": "Drop Spin", | |
| "M_Move9": "Level Change", | |
| "M_Move10": "Side Kick", | |
| "M_Move11": "Blitz", | |
| "M_Move17": "Double Strike", | |
| "M_Move18": "Front Kick", | |
| "M_Move19": "Slow Kata", | |
| "M_Move20": "Open Strike", | |
| "M_ShortMove12": "Quick Jab", | |
| "M_ShortMove13": "Snap Kick", | |
| "M_ShortMove14": "Light Punch", | |
| "M_ShortMove15": "Drop Strike", | |
| "M_ShortMove16": "Power Burst", | |
| } | |
| def format_display_name(clip_id: str) -> str: | |
| """Convert clip ID to human-readable name.""" | |
| name = re.sub(r"^[BJMV]_", "", clip_id) | |
| name = re.sub(r"([a-z\d])([A-Z])", r"\1 \2", name) | |
| name = re.sub(r"([A-Za-z])(\d)", r"\1 \2", name) | |
| name = name.replace("_", " ") | |
| return name | |
| def get_media_file(clip_id: str, stage: str) -> str: | |
| """Return the expected GIF filename for a clip at a given stage.""" | |
| if stage == "capture": | |
| return f"{clip_id}.gif" | |
| return f"{clip_id}_{stage}.gif" | |
| def scan_stage(repo: Path, category: str, clip_id: str, stage: str) -> dict | None: | |
| """Check for media files at a pipeline stage. Returns paths dict or None.""" | |
| stage_dir = repo / category / clip_id / stage | |
| result = {} | |
| gif_name = get_media_file(clip_id, stage) | |
| gif_path = stage_dir / gif_name | |
| if gif_path.exists(): | |
| result["gif"] = f"{category}/{clip_id}/{stage}/{gif_name}" | |
| # Also check for MP4 | |
| mp4_name = gif_name.replace(".gif", ".mp4") | |
| mp4_path = stage_dir / mp4_name | |
| if mp4_path.exists(): | |
| result["mp4"] = f"{category}/{clip_id}/{stage}/{mp4_name}" | |
| return result if result else None | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Generate site data from manifest") | |
| parser.add_argument( | |
| "--repo", | |
| type=Path, | |
| default=Path(__file__).parent.parent, | |
| help="Path to g1-moves repository root", | |
| ) | |
| args = parser.parse_args() | |
| repo = args.repo.resolve() | |
| manifest_path = repo / "manifest.json" | |
| if not manifest_path.exists(): | |
| print(f"ERROR: manifest.json not found at {manifest_path}") | |
| return | |
| manifest = json.loads(manifest_path.read_text()) | |
| clips = [] | |
| stats = {"dance": 0, "karate": 0, "bonus": 0, "policies": 0, "total": 0} | |
| for clip_id, info in sorted(manifest["clips"].items()): | |
| cat = info["category"] | |
| stats[cat] = stats.get(cat, 0) + 1 | |
| stats["total"] += 1 | |
| stages = {} | |
| for stage in STAGES: | |
| media = scan_stage(repo, cat, clip_id, stage) | |
| if media: | |
| stages[stage] = media | |
| # Check for ONNX and PT policy files | |
| onnx_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.onnx" | |
| pt_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.pt" | |
| has_onnx = onnx_path.exists() | |
| has_pt = pt_path.exists() | |
| has_policy = "policy" in stages or has_onnx or has_pt | |
| if has_policy: | |
| stats["policies"] += 1 | |
| subtitle = KARATE_SUBTITLES.get(clip_id) | |
| display_name = format_display_name(clip_id) | |
| if subtitle: | |
| display_name = f"{display_name}: {subtitle}" | |
| clips.append( | |
| { | |
| "id": clip_id, | |
| "name": display_name, | |
| "category": cat, | |
| "performer": info.get("performer", "Unknown"), | |
| "duration": info.get("duration_s", 0), | |
| "fps": info.get("fps", 60), | |
| "frames": info.get("frames", 0), | |
| "stages": stages, | |
| "has_policy": has_policy, | |
| "has_onnx": has_onnx, | |
| } | |
| ) | |
| # Sort: categories grouped, then alphabetical | |
| cat_order = {"dance": 0, "karate": 1, "bonus": 2} | |
| clips.sort(key=lambda c: (cat_order.get(c["category"], 9), c["id"])) | |
| data = { | |
| "base_url": "https://huggingface.co/datasets/exptech/g1-moves/resolve/main", | |
| "clips": clips, | |
| "stats": stats, | |
| } | |
| output = Path(__file__).parent / "data.json" | |
| output.write_text(json.dumps(data, indent=2)) | |
| print(f"Generated {output.name}: {len(clips)} clips, {stats['policies']} policies") | |
| if __name__ == "__main__": | |
| main() | |