Spaces:

exptech
/

g1-moves

Running

File size: 5,067 Bytes

#!/usr/bin/env python3
"""Generate data.json for the G1 Moves showcase site.



Reads manifest.json + scans the filesystem to determine which media

files (GIF/MP4) exist for each clip at each pipeline stage.



Usage:

    python generate_data.py              # from space/ directory

    python generate_data.py --repo /path/to/g1-moves

"""

import argparse
import json
import re
from pathlib import Path

STAGES = ["capture", "retarget", "training", "policy"]

# Descriptive subtitles for karate moves (from README)
KARATE_SUBTITLES = {
    "M_Move1": "Guard Combo",
    "M_Move2": "Low Punch",
    "M_Move3": "Horse Stance",
    "M_Move4": "Spin Punch",
    "M_Move5": "Twist Punch",
    "M_Move6": "Spin Strike",
    "M_Move7": "Rapid Punch",
    "M_Move8": "Drop Spin",
    "M_Move9": "Level Change",
    "M_Move10": "Side Kick",
    "M_Move11": "Blitz",
    "M_Move17": "Double Strike",
    "M_Move18": "Front Kick",
    "M_Move19": "Slow Kata",
    "M_Move20": "Open Strike",
    "M_ShortMove12": "Quick Jab",
    "M_ShortMove13": "Snap Kick",
    "M_ShortMove14": "Light Punch",
    "M_ShortMove15": "Drop Strike",
    "M_ShortMove16": "Power Burst",
}


def format_display_name(clip_id: str) -> str:
    """Convert clip ID to human-readable name."""
    name = re.sub(r"^[BJMV]_", "", clip_id)
    name = re.sub(r"([a-z\d])([A-Z])", r"\1 \2", name)
    name = re.sub(r"([A-Za-z])(\d)", r"\1 \2", name)
    name = name.replace("_", " ")
    return name


def get_media_file(clip_id: str, stage: str) -> str:
    """Return the expected GIF filename for a clip at a given stage."""
    if stage == "capture":
        return f"{clip_id}.gif"
    return f"{clip_id}_{stage}.gif"


def scan_stage(repo: Path, category: str, clip_id: str, stage: str) -> dict | None:
    """Check for media files at a pipeline stage. Returns paths dict or None."""
    stage_dir = repo / category / clip_id / stage
    result = {}

    gif_name = get_media_file(clip_id, stage)
    gif_path = stage_dir / gif_name
    if gif_path.exists():
        result["gif"] = f"{category}/{clip_id}/{stage}/{gif_name}"

    # Also check for MP4
    mp4_name = gif_name.replace(".gif", ".mp4")
    mp4_path = stage_dir / mp4_name
    if mp4_path.exists():
        result["mp4"] = f"{category}/{clip_id}/{stage}/{mp4_name}"

    return result if result else None


def main():
    parser = argparse.ArgumentParser(description="Generate site data from manifest")
    parser.add_argument(
        "--repo",
        type=Path,
        default=Path(__file__).parent.parent,
        help="Path to g1-moves repository root",
    )
    args = parser.parse_args()
    repo = args.repo.resolve()

    manifest_path = repo / "manifest.json"
    if not manifest_path.exists():
        print(f"ERROR: manifest.json not found at {manifest_path}")
        return

    manifest = json.loads(manifest_path.read_text())

    clips = []
    stats = {"dance": 0, "karate": 0, "bonus": 0, "policies": 0, "total": 0}

    for clip_id, info in sorted(manifest["clips"].items()):
        cat = info["category"]
        stats[cat] = stats.get(cat, 0) + 1
        stats["total"] += 1

        stages = {}
        for stage in STAGES:
            media = scan_stage(repo, cat, clip_id, stage)
            if media:
                stages[stage] = media

        # Check for ONNX and PT policy files
        onnx_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.onnx"
        pt_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.pt"
        has_onnx = onnx_path.exists()
        has_pt = pt_path.exists()
        has_policy = "policy" in stages or has_onnx or has_pt
        if has_policy:
            stats["policies"] += 1

        subtitle = KARATE_SUBTITLES.get(clip_id)
        display_name = format_display_name(clip_id)
        if subtitle:
            display_name = f"{display_name}: {subtitle}"

        clips.append(
            {
                "id": clip_id,
                "name": display_name,
                "category": cat,
                "performer": info.get("performer", "Unknown"),
                "duration": info.get("duration_s", 0),
                "fps": info.get("fps", 60),
                "frames": info.get("frames", 0),
                "stages": stages,
                "has_policy": has_policy,
                "has_onnx": has_onnx,
            }
        )

    # Sort: categories grouped, then alphabetical
    cat_order = {"dance": 0, "karate": 1, "bonus": 2}
    clips.sort(key=lambda c: (cat_order.get(c["category"], 9), c["id"]))

    data = {
        "base_url": "https://huggingface.co/datasets/exptech/g1-moves/resolve/main",
        "clips": clips,
        "stats": stats,
    }

    output = Path(__file__).parent / "data.json"
    output.write_text(json.dumps(data, indent=2))
    print(f"Generated {output.name}: {len(clips)} clips, {stats['policies']} policies")


if __name__ == "__main__":
    main()