File size: 5,067 Bytes
2d78b0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6d1dc8
e7fed85
e6d1dc8
e7fed85
e6d1dc8
 
 
 
e7fed85
2d78b0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7fed85
2d78b0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
"""Generate data.json for the G1 Moves showcase site.



Reads manifest.json + scans the filesystem to determine which media

files (GIF/MP4) exist for each clip at each pipeline stage.



Usage:

    python generate_data.py              # from space/ directory

    python generate_data.py --repo /path/to/g1-moves

"""

import argparse
import json
import re
from pathlib import Path

STAGES = ["capture", "retarget", "training", "policy"]

# Descriptive subtitles for karate moves (from README)
KARATE_SUBTITLES = {
    "M_Move1": "Guard Combo",
    "M_Move2": "Low Punch",
    "M_Move3": "Horse Stance",
    "M_Move4": "Spin Punch",
    "M_Move5": "Twist Punch",
    "M_Move6": "Spin Strike",
    "M_Move7": "Rapid Punch",
    "M_Move8": "Drop Spin",
    "M_Move9": "Level Change",
    "M_Move10": "Side Kick",
    "M_Move11": "Blitz",
    "M_Move17": "Double Strike",
    "M_Move18": "Front Kick",
    "M_Move19": "Slow Kata",
    "M_Move20": "Open Strike",
    "M_ShortMove12": "Quick Jab",
    "M_ShortMove13": "Snap Kick",
    "M_ShortMove14": "Light Punch",
    "M_ShortMove15": "Drop Strike",
    "M_ShortMove16": "Power Burst",
}


def format_display_name(clip_id: str) -> str:
    """Convert clip ID to human-readable name."""
    name = re.sub(r"^[BJMV]_", "", clip_id)
    name = re.sub(r"([a-z\d])([A-Z])", r"\1 \2", name)
    name = re.sub(r"([A-Za-z])(\d)", r"\1 \2", name)
    name = name.replace("_", " ")
    return name


def get_media_file(clip_id: str, stage: str) -> str:
    """Return the expected GIF filename for a clip at a given stage."""
    if stage == "capture":
        return f"{clip_id}.gif"
    return f"{clip_id}_{stage}.gif"


def scan_stage(repo: Path, category: str, clip_id: str, stage: str) -> dict | None:
    """Check for media files at a pipeline stage. Returns paths dict or None."""
    stage_dir = repo / category / clip_id / stage
    result = {}

    gif_name = get_media_file(clip_id, stage)
    gif_path = stage_dir / gif_name
    if gif_path.exists():
        result["gif"] = f"{category}/{clip_id}/{stage}/{gif_name}"

    # Also check for MP4
    mp4_name = gif_name.replace(".gif", ".mp4")
    mp4_path = stage_dir / mp4_name
    if mp4_path.exists():
        result["mp4"] = f"{category}/{clip_id}/{stage}/{mp4_name}"

    return result if result else None


def main():
    parser = argparse.ArgumentParser(description="Generate site data from manifest")
    parser.add_argument(
        "--repo",
        type=Path,
        default=Path(__file__).parent.parent,
        help="Path to g1-moves repository root",
    )
    args = parser.parse_args()
    repo = args.repo.resolve()

    manifest_path = repo / "manifest.json"
    if not manifest_path.exists():
        print(f"ERROR: manifest.json not found at {manifest_path}")
        return

    manifest = json.loads(manifest_path.read_text())

    clips = []
    stats = {"dance": 0, "karate": 0, "bonus": 0, "policies": 0, "total": 0}

    for clip_id, info in sorted(manifest["clips"].items()):
        cat = info["category"]
        stats[cat] = stats.get(cat, 0) + 1
        stats["total"] += 1

        stages = {}
        for stage in STAGES:
            media = scan_stage(repo, cat, clip_id, stage)
            if media:
                stages[stage] = media

        # Check for ONNX and PT policy files
        onnx_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.onnx"
        pt_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.pt"
        has_onnx = onnx_path.exists()
        has_pt = pt_path.exists()
        has_policy = "policy" in stages or has_onnx or has_pt
        if has_policy:
            stats["policies"] += 1

        subtitle = KARATE_SUBTITLES.get(clip_id)
        display_name = format_display_name(clip_id)
        if subtitle:
            display_name = f"{display_name}: {subtitle}"

        clips.append(
            {
                "id": clip_id,
                "name": display_name,
                "category": cat,
                "performer": info.get("performer", "Unknown"),
                "duration": info.get("duration_s", 0),
                "fps": info.get("fps", 60),
                "frames": info.get("frames", 0),
                "stages": stages,
                "has_policy": has_policy,
                "has_onnx": has_onnx,
            }
        )

    # Sort: categories grouped, then alphabetical
    cat_order = {"dance": 0, "karate": 1, "bonus": 2}
    clips.sort(key=lambda c: (cat_order.get(c["category"], 9), c["id"]))

    data = {
        "base_url": "https://huggingface.co/datasets/exptech/g1-moves/resolve/main",
        "clips": clips,
        "stats": stats,
    }

    output = Path(__file__).parent / "data.json"
    output.write_text(json.dumps(data, indent=2))
    print(f"Generated {output.name}: {len(clips)} clips, {stats['policies']} policies")


if __name__ == "__main__":
    main()