Spaces:

exptech
/

g1-moves

Running

App Files Files Community

g1-moves / generate_data.py

exptech

Pagination, preview-to-WASM swap, trained clips first

e6d1dc8 verified about 1 month ago

raw

history blame contribute delete

5.07 kB

	#!/usr/bin/env python3
	"""Generate data.json for the G1 Moves showcase site.

	Reads manifest.json + scans the filesystem to determine which media
	files (GIF/MP4) exist for each clip at each pipeline stage.

	Usage:
	python generate_data.py # from space/ directory
	python generate_data.py --repo /path/to/g1-moves
	"""

	import argparse
	import json
	import re
	from pathlib import Path

	STAGES = ["capture", "retarget", "training", "policy"]

	# Descriptive subtitles for karate moves (from README)
	KARATE_SUBTITLES = {
	"M_Move1": "Guard Combo",
	"M_Move2": "Low Punch",
	"M_Move3": "Horse Stance",
	"M_Move4": "Spin Punch",
	"M_Move5": "Twist Punch",
	"M_Move6": "Spin Strike",
	"M_Move7": "Rapid Punch",
	"M_Move8": "Drop Spin",
	"M_Move9": "Level Change",
	"M_Move10": "Side Kick",
	"M_Move11": "Blitz",
	"M_Move17": "Double Strike",
	"M_Move18": "Front Kick",
	"M_Move19": "Slow Kata",
	"M_Move20": "Open Strike",
	"M_ShortMove12": "Quick Jab",
	"M_ShortMove13": "Snap Kick",
	"M_ShortMove14": "Light Punch",
	"M_ShortMove15": "Drop Strike",
	"M_ShortMove16": "Power Burst",
	}


	def format_display_name(clip_id: str) -> str:
	"""Convert clip ID to human-readable name."""
	name = re.sub(r"^[BJMV]_", "", clip_id)
	name = re.sub(r"([a-z\d])([A-Z])", r"\1 \2", name)
	name = re.sub(r"([A-Za-z])(\d)", r"\1 \2", name)
	name = name.replace("_", " ")
	return name


	def get_media_file(clip_id: str, stage: str) -> str:
	"""Return the expected GIF filename for a clip at a given stage."""
	if stage == "capture":
	return f"{clip_id}.gif"
	return f"{clip_id}_{stage}.gif"


	def scan_stage(repo: Path, category: str, clip_id: str, stage: str) -> dict \| None:
	"""Check for media files at a pipeline stage. Returns paths dict or None."""
	stage_dir = repo / category / clip_id / stage
	result = {}

	gif_name = get_media_file(clip_id, stage)
	gif_path = stage_dir / gif_name
	if gif_path.exists():
	result["gif"] = f"{category}/{clip_id}/{stage}/{gif_name}"

	# Also check for MP4
	mp4_name = gif_name.replace(".gif", ".mp4")
	mp4_path = stage_dir / mp4_name
	if mp4_path.exists():
	result["mp4"] = f"{category}/{clip_id}/{stage}/{mp4_name}"

	return result if result else None


	def main():
	parser = argparse.ArgumentParser(description="Generate site data from manifest")
	parser.add_argument(
	"--repo",
	type=Path,
	default=Path(__file__).parent.parent,
	help="Path to g1-moves repository root",
	)
	args = parser.parse_args()
	repo = args.repo.resolve()

	manifest_path = repo / "manifest.json"
	if not manifest_path.exists():
	print(f"ERROR: manifest.json not found at {manifest_path}")
	return

	manifest = json.loads(manifest_path.read_text())

	clips = []
	stats = {"dance": 0, "karate": 0, "bonus": 0, "policies": 0, "total": 0}

	for clip_id, info in sorted(manifest["clips"].items()):
	cat = info["category"]
	stats[cat] = stats.get(cat, 0) + 1
	stats["total"] += 1

	stages = {}
	for stage in STAGES:
	media = scan_stage(repo, cat, clip_id, stage)
	if media:
	stages[stage] = media

	# Check for ONNX and PT policy files
	onnx_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.onnx"
	pt_path = repo / cat / clip_id / "policy" / f"{clip_id}_policy.pt"
	has_onnx = onnx_path.exists()
	has_pt = pt_path.exists()
	has_policy = "policy" in stages or has_onnx or has_pt
	if has_policy:
	stats["policies"] += 1

	subtitle = KARATE_SUBTITLES.get(clip_id)
	display_name = format_display_name(clip_id)
	if subtitle:
	display_name = f"{display_name}: {subtitle}"

	clips.append(
	{
	"id": clip_id,
	"name": display_name,
	"category": cat,
	"performer": info.get("performer", "Unknown"),
	"duration": info.get("duration_s", 0),
	"fps": info.get("fps", 60),
	"frames": info.get("frames", 0),
	"stages": stages,
	"has_policy": has_policy,
	"has_onnx": has_onnx,
	}
	)

	# Sort: categories grouped, then alphabetical
	cat_order = {"dance": 0, "karate": 1, "bonus": 2}
	clips.sort(key=lambda c: (cat_order.get(c["category"], 9), c["id"]))

	data = {
	"base_url": "https://huggingface.co/datasets/exptech/g1-moves/resolve/main",
	"clips": clips,
	"stats": stats,
	}

	output = Path(__file__).parent / "data.json"
	output.write_text(json.dumps(data, indent=2))
	print(f"Generated {output.name}: {len(clips)} clips, {stats['policies']} policies")


	if __name__ == "__main__":
	main()