Spaces:

build-small-hackathon
/

kicky-ai

Running on Zero

App Files Files Community

kicky-ai / server.py

dcrey7

Point to renamed Kicky AI model/dataset repos

cec6d52 15 days ago

Raw

History Blame Contribute Delete

7.67 kB

	"""FUT World Cup Coach — custom frontend on gradio.Server (Off-Brand quest).

	A bespoke football-stadium HTML/JS frontend (index.html) talks to Gradio's backend engine:
	@app.api() runs RF-DETR detection on ZeroGPU, renders the analysed video, and calls the
	Nemotron-VL coach on Modal (MiniCPM-V on ZeroGPU as the local fallback) — all behind
	Gradio's queue + gradio_client, hosted on a ZeroGPU Space.
	"""
	import glob
	import json
	import os
	import shutil
	import subprocess

	from gradio import Server
	from gradio.data_classes import FileData
	from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
	import spaces
	from huggingface_hub import hf_hub_download

	from futheros import pipeline as P
	from futheros import coach as C
	from futheros.render import render, pose_card_img

	HF_MODEL = "build-small-hackathon/kicky-ai-rfdetr-seg"
	RENDER_DIR = "/tmp/app_renders"; os.makedirs(RENDER_DIR, exist_ok=True)
	HERE = os.path.dirname(os.path.abspath(__file__))
	GT = json.load(open(os.path.join(HERE, "labels/goal_labels.json")))
	LEG = json.load(open(os.path.join(HERE, "labels/leg_labels.json")))
	EXAMPLES = [os.path.basename(p) for p in sorted(glob.glob(os.path.join(HERE, "examples/*.mp4")))]

	app = Server()
	# Build the detector ONCE at import. Downloading/constructing it inside a @spaces.GPU window
	# means every call rebuilds the model (the GPU fork loses module globals) -> ~20-30s per call.
	# At module level the spaces lib moves it to GPU per call, so calls only run inference.
	_CKPT = hf_hub_download(HF_MODEL, "checkpoint_best_ema.pth")
	try:
	_MODEL = P.rfdetr_model(_CKPT, "small")
	except Exception as _e:
	print("eager detector build failed, will lazy-load:", _e, flush=True); _MODEL = None


	def _model():
	global _MODEL
	if _MODEL is None:
	_MODEL = P.rfdetr_model(_CKPT, "small")
	return _MODEL


	@spaces.GPU(duration=120)
	def _detect(path):
	return P.detect_rfdetr(path, _model())


	# ---- Offline coach: MiniCPM-V-4.6 (OpenBMB, 1.3B) on the Space's own ZeroGPU (PyTorch) ----
	# Runs entirely on the Space — the open VLM grades the shot on-GPU, no external API.
	MINICPM_REPO = "openbmb/MiniCPM-V-4.6"
	_MINICPM = None
	# pre-fetch the offline VLM weights at import (CPU) so the first offline coach call doesn't
	# download ~2.6 GB inside a @spaces.GPU window and time out.
	try:
	from huggingface_hub import snapshot_download
	snapshot_download(MINICPM_REPO)
	except Exception as _e:
	print("MiniCPM prefetch skipped:", _e, flush=True)


	def _minicpm():
	global _MINICPM
	if _MINICPM is None:
	import torch
	from transformers import AutoModelForImageTextToText, AutoProcessor
	proc = AutoProcessor.from_pretrained(MINICPM_REPO, trust_remote_code=True)
	mdl = AutoModelForImageTextToText.from_pretrained(
	MINICPM_REPO, torch_dtype=torch.bfloat16, trust_remote_code=True).eval()
	_MINICPM = (mdl, proc)
	return _MINICPM


	@spaces.GPU(duration=120)
	def _coach_offline(frame_paths, sys_text, user_text):
	import torch
	mdl, proc = _minicpm(); mdl = mdl.to("cuda")
	content = [{"type": "image", "url": os.path.abspath(p)} for p in frame_paths]
	content.append({"type": "text", "text": user_text})
	messages = [{"role": "system", "content": [{"type": "text", "text": sys_text}]},
	{"role": "user", "content": content}]
	inputs = proc.apply_chat_template(
	messages, tokenize=True, add_generation_prompt=True, return_dict=True,
	return_tensors="pt", downsample_mode="16x", max_slice_nums=9).to("cuda")
	with torch.no_grad():
	gen = mdl.generate(**inputs, downsample_mode="16x", max_new_tokens=320, do_sample=False)
	trimmed = [o[len(i):] for i, o in zip(inputs.input_ids, gen)]
	return proc.batch_decode(trimmed, skip_special_tokens=True)[0]


	# two-phase so the UI shows results instantly: /analyse (fast: detect+render+pose+stats) then
	# /coach (slower: the AI coach), keyed by `key` so the coach reuses the analysis (no re-detect).
	_PENDING = {}


	@app.api()
	def analyse(video: FileData, hint: str = "") -> dict:
	"""RF-DETR detect (ZeroGPU) -> goal physics + pose + speed -> rendered video + stats.
	Returns fast; the AI coach runs separately via /coach so it never blocks the results."""
	src = video["path"]
	# uploads land as a nameless 'blob' -> copy to a clean .mp4 (and keep the example name via `hint`)
	name = os.path.splitext(os.path.basename(hint or src))[0] or "clip"
	path = f"/tmp/in_{name}_{os.getpid()}.mp4"
	shutil.copyfile(src, path)
	tr = _detect(path)
	res = P.analyse(tr, path)
	out = f"{RENDER_DIR}/{name}.mp4"
	try:
	render(path, tr, res, out)
	video_out = out
	except Exception as e:
	print("render failed, falling back to raw clip:", e, flush=True)
	video_out = path
	pose = pose_card_img(path, tr, res, f"{RENDER_DIR}/{name}_pose.png")
	# stash everything the coach needs so /coach can run without re-detecting
	_PENDING[name] = {"payload": C.build_payload(res),
	"frames_b64": C.key_frames_b64(path, res),
	"frame_paths": C.key_frame_paths(path, res, RENDER_DIR)}
	return {
	"key": name,
	"video": FileData(path=video_out),
	"pose": FileData(path=pose) if pose else None,
	"goal": res.goal,
	"goal_time": round(res.goal_time, 1) if res.goal_time else None,
	"foot": res.foot,
	"speed": round(res.speed_kmh) if res.speed_kmh else None,
	"gt_goal": GT.get(f"5/{name}.mp4"), "gt_leg": LEG.get(f"5/{name}.mp4"),
	}


	@app.api()
	def coach(key: str, mode: str = "online") -> dict:
	"""Run the AI coach for an earlier /analyse result.
	mode="online" -> Nemotron-Nano-VL on Modal · mode="offline" -> MiniCPM-V-4.6 on this Space's GPU."""
	d = _PENDING.get(key)
	if d is None:
	return {"coach": None, "coach_model": None}
	if mode == "offline":
	coach_model = "MiniCPM-V-4.6 · OpenBMB · on-Space GPU (ZeroGPU)"
	try:
	coaching = C._clean(_coach_offline(d["frame_paths"], C.SYS, C.user_msg(d["payload"])))
	except Exception as e:
	print("offline coach failed:", e, flush=True); coaching = None
	else:
	coach_model = "Nemotron-Nano-VL · online (Modal GPU)"
	try:
	coaching = C.get_coaching(d["payload"], d["frames_b64"])
	except C.CoachUnavailable:
	coaching = None
	return {"coach": coaching, "coach_model": coach_model}


	@app.get("/api/examples")
	async def api_examples():
	return JSONResponse(EXAMPLES)


	@app.get("/examples/{name}")
	async def serve_example(name: str):
	path = os.path.join(HERE, "examples", os.path.basename(name))
	if not os.path.isfile(path):
	return JSONResponse({"error": "not found"}, status_code=404)
	return FileResponse(path, media_type="video/mp4")


	@app.get("/thumb/{name}")
	async def thumb(name: str):
	src = os.path.join(HERE, "examples", os.path.basename(name))
	if not os.path.isfile(src):
	return JSONResponse({"error": "not found"}, status_code=404)
	out = f"/tmp/thumb_{os.path.basename(name)}.jpg"
	if not os.path.exists(out):
	subprocess.run(["ffmpeg", "-y", "-v", "error", "-ss", "1.2", "-i", src,
	"-vframes", "1", "-vf", "scale=360:-1", out], check=False)
	return FileResponse(out, media_type="image/jpeg")


	@app.get("/")
	async def home():
	with open(os.path.join(HERE, "index.html"), encoding="utf-8") as f:
	return HTMLResponse(f.read(), headers={"Cache-Control": "no-store, max-age=0"})


	app.launch(show_error=True, allowed_paths=[RENDER_DIR, "/tmp"])