Spaces:
Running
Running
File size: 5,067 Bytes
2d78b0c e6d1dc8 e7fed85 e6d1dc8 e7fed85 e6d1dc8 e7fed85 2d78b0c e7fed85 2d78b0c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | #!/usr/bin/env python3
"""Generate data.json for the G1 Moves showcase site.
Reads manifest.json + scans the filesystem to determine which media
files (GIF/MP4) exist for each clip at each pipeline stage.
Usage:
python generate_data.py # from space/ directory
python generate_data.py --repo /path/to/g1-moves
"""
import argparse
import json
import re
from pathlib import Path
STAGES = ["capture", "retarget", "training", "policy"]
# Descriptive subtitles for karate moves (from README)
KARATE_SUBTITLES = {
"M_Move1": "Guard Combo",
"M_Move2": "Low Punch",
"M_Move3": "Horse Stance",
"M_Move4": "Spin Punch",
"M_Move5": "Twist Punch",
"M_Move6": "Spin Strike",
"M_Move7": "Rapid Punch",
"M_Move8": "Drop Spin",
"M_Move9": "Level Change",
"M_Move10": "Side Kick",
"M_Move11": "Blitz",
"M_Move17": "Double Strike",
"M_Move18": "Front Kick",
"M_Move19": "Slow Kata",
"M_Move20": "Open Strike",
"M_ShortMove12": "Quick Jab",
"M_ShortMove13": "Snap Kick",
"M_ShortMove14": "Light Punch",
"M_ShortMove15": "Drop Strike",
"M_ShortMove16": "Power Burst",
}
def format_display_name(clip_id: str) -> str:
"""Convert clip ID to human-readable name."""
name = re.sub(r"^[BJMV]_", "", clip_id)
name = re.sub(r"([a-z\d])([A-Z])", r"\1 \2", name)
name = re.sub(r"([A-Za-z])(\d)", r"\1 \2", name)
name = name.replace("_", " ")
return name
def get_media_file(clip_id: str, stage: str) -> str:
"""Return the expected GIF filename for a clip at a given stage."""
if stage == "capture":
return f"{clip_id}.gif"
return f"{clip_id}_{stage}.gif"
def scan_stage(repo: Path, category: str, clip_id: str, stage: str) -> dict | None:
"""Check for media files at a pipeline stage. Returns paths dict or None."""
stage_dir = repo / category / clip_id / stage
result = {}
gif_name = get_media_file(clip_id, stage)
gif_path = stage_dir / gif_name
if gif_path.exists():
result["gif"] = f"{category}/{clip_id}/{stage}/{gif_name}"
# Also check for MP4
mp4_name = gif_name.replace(".gif", ".mp4")
mp4_path = stage_dir / mp4_name
if mp4_path.exists():
result["mp4"] = f"{category}/{clip_id}/{stage}/{mp4_name}"
return result if result else None
def main():
parser = argparse.ArgumentParser(description="Generate site data from manifest")
parser.add_argument(
"--repo",
type=Path,
default=Path(__file__).parent.parent,
help="Path to g1-moves repository root",
)
args = parser.parse_args()
repo = args.repo.resolve()
manifest_path = repo / "manifest.json"
if not manifest_path.exists():
print(f"ERROR: manifest.json not found at {manifest_path}")
return
manifest = json.loads(manifest_path.read_text())
clips = []
stats = {"dance": 0, "karate": 0, "bonus": 0, "policies": 0, "total": 0}
for clip_id, info in sorted(manifest["clips"].items()):
cat = info["category"]
stats[cat] = stats.get(cat, 0) + 1
stats["total"] += 1
stages = {}
for stage in STAGES:
media = scan_stage(repo, cat, clip_id, stage)
if media:
stages[stage] = media
# Check for ONNX and PT policy files
onnx_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.onnx"
pt_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.pt"
has_onnx = onnx_path.exists()
has_pt = pt_path.exists()
has_policy = "policy" in stages or has_onnx or has_pt
if has_policy:
stats["policies"] += 1
subtitle = KARATE_SUBTITLES.get(clip_id)
display_name = format_display_name(clip_id)
if subtitle:
display_name = f"{display_name}: {subtitle}"
clips.append(
{
"id": clip_id,
"name": display_name,
"category": cat,
"performer": info.get("performer", "Unknown"),
"duration": info.get("duration_s", 0),
"fps": info.get("fps", 60),
"frames": info.get("frames", 0),
"stages": stages,
"has_policy": has_policy,
"has_onnx": has_onnx,
}
)
# Sort: categories grouped, then alphabetical
cat_order = {"dance": 0, "karate": 1, "bonus": 2}
clips.sort(key=lambda c: (cat_order.get(c["category"], 9), c["id"]))
data = {
"base_url": "https://huggingface.co/datasets/exptech/g1-moves/resolve/main",
"clips": clips,
"stats": stats,
}
output = Path(__file__).parent / "data.json"
output.write_text(json.dumps(data, indent=2))
print(f"Generated {output.name}: {len(clips)} clips, {stats['policies']} policies")
if __name__ == "__main__":
main()
|