Spaces:
Running on Zero
Running on Zero
| """FUT World Cup Coach — custom frontend on gradio.Server (Off-Brand quest). | |
| A bespoke football-stadium HTML/JS frontend (index.html) talks to Gradio's backend engine: | |
| @app.api() runs RF-DETR detection on ZeroGPU, renders the analysed video, and calls the | |
| Nemotron-VL coach on Modal (MiniCPM-V on ZeroGPU as the local fallback) — all behind | |
| Gradio's queue + gradio_client, hosted on a ZeroGPU Space. | |
| """ | |
| import glob | |
| import json | |
| import os | |
| import shutil | |
| import subprocess | |
| from gradio import Server | |
| from gradio.data_classes import FileData | |
| from fastapi.responses import HTMLResponse, FileResponse, JSONResponse | |
| import spaces | |
| from huggingface_hub import hf_hub_download | |
| from futheros import pipeline as P | |
| from futheros import coach as C | |
| from futheros.render import render, pose_card_img | |
| HF_MODEL = "build-small-hackathon/kicky-ai-rfdetr-seg" | |
| RENDER_DIR = "/tmp/app_renders"; os.makedirs(RENDER_DIR, exist_ok=True) | |
| HERE = os.path.dirname(os.path.abspath(__file__)) | |
| GT = json.load(open(os.path.join(HERE, "labels/goal_labels.json"))) | |
| LEG = json.load(open(os.path.join(HERE, "labels/leg_labels.json"))) | |
| EXAMPLES = [os.path.basename(p) for p in sorted(glob.glob(os.path.join(HERE, "examples/*.mp4")))] | |
| app = Server() | |
| # Build the detector ONCE at import. Downloading/constructing it *inside* a @spaces.GPU window | |
| # means every call rebuilds the model (the GPU fork loses module globals) -> ~20-30s per call. | |
| # At module level the spaces lib moves it to GPU per call, so calls only run inference. | |
| _CKPT = hf_hub_download(HF_MODEL, "checkpoint_best_ema.pth") | |
| try: | |
| _MODEL = P.rfdetr_model(_CKPT, "small") | |
| except Exception as _e: | |
| print("eager detector build failed, will lazy-load:", _e, flush=True); _MODEL = None | |
| def _model(): | |
| global _MODEL | |
| if _MODEL is None: | |
| _MODEL = P.rfdetr_model(_CKPT, "small") | |
| return _MODEL | |
| def _detect(path): | |
| return P.detect_rfdetr(path, _model()) | |
| # ---- Offline coach: MiniCPM-V-4.6 (OpenBMB, 1.3B) on the Space's own ZeroGPU (PyTorch) ---- | |
| # Runs entirely on the Space — the open VLM grades the shot on-GPU, no external API. | |
| MINICPM_REPO = "openbmb/MiniCPM-V-4.6" | |
| _MINICPM = None | |
| # pre-fetch the offline VLM weights at import (CPU) so the first offline coach call doesn't | |
| # download ~2.6 GB inside a @spaces.GPU window and time out. | |
| try: | |
| from huggingface_hub import snapshot_download | |
| snapshot_download(MINICPM_REPO) | |
| except Exception as _e: | |
| print("MiniCPM prefetch skipped:", _e, flush=True) | |
| def _minicpm(): | |
| global _MINICPM | |
| if _MINICPM is None: | |
| import torch | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| proc = AutoProcessor.from_pretrained(MINICPM_REPO, trust_remote_code=True) | |
| mdl = AutoModelForImageTextToText.from_pretrained( | |
| MINICPM_REPO, torch_dtype=torch.bfloat16, trust_remote_code=True).eval() | |
| _MINICPM = (mdl, proc) | |
| return _MINICPM | |
| def _coach_offline(frame_paths, sys_text, user_text): | |
| import torch | |
| mdl, proc = _minicpm(); mdl = mdl.to("cuda") | |
| content = [{"type": "image", "url": os.path.abspath(p)} for p in frame_paths] | |
| content.append({"type": "text", "text": user_text}) | |
| messages = [{"role": "system", "content": [{"type": "text", "text": sys_text}]}, | |
| {"role": "user", "content": content}] | |
| inputs = proc.apply_chat_template( | |
| messages, tokenize=True, add_generation_prompt=True, return_dict=True, | |
| return_tensors="pt", downsample_mode="16x", max_slice_nums=9).to("cuda") | |
| with torch.no_grad(): | |
| gen = mdl.generate(**inputs, downsample_mode="16x", max_new_tokens=320, do_sample=False) | |
| trimmed = [o[len(i):] for i, o in zip(inputs.input_ids, gen)] | |
| return proc.batch_decode(trimmed, skip_special_tokens=True)[0] | |
| # two-phase so the UI shows results instantly: /analyse (fast: detect+render+pose+stats) then | |
| # /coach (slower: the AI coach), keyed by `key` so the coach reuses the analysis (no re-detect). | |
| _PENDING = {} | |
| def analyse(video: FileData, hint: str = "") -> dict: | |
| """RF-DETR detect (ZeroGPU) -> goal physics + pose + speed -> rendered video + stats. | |
| Returns fast; the AI coach runs separately via /coach so it never blocks the results.""" | |
| src = video["path"] | |
| # uploads land as a nameless 'blob' -> copy to a clean .mp4 (and keep the example name via `hint`) | |
| name = os.path.splitext(os.path.basename(hint or src))[0] or "clip" | |
| path = f"/tmp/in_{name}_{os.getpid()}.mp4" | |
| shutil.copyfile(src, path) | |
| tr = _detect(path) | |
| res = P.analyse(tr, path) | |
| out = f"{RENDER_DIR}/{name}.mp4" | |
| try: | |
| render(path, tr, res, out) | |
| video_out = out | |
| except Exception as e: | |
| print("render failed, falling back to raw clip:", e, flush=True) | |
| video_out = path | |
| pose = pose_card_img(path, tr, res, f"{RENDER_DIR}/{name}_pose.png") | |
| # stash everything the coach needs so /coach can run without re-detecting | |
| _PENDING[name] = {"payload": C.build_payload(res), | |
| "frames_b64": C.key_frames_b64(path, res), | |
| "frame_paths": C.key_frame_paths(path, res, RENDER_DIR)} | |
| return { | |
| "key": name, | |
| "video": FileData(path=video_out), | |
| "pose": FileData(path=pose) if pose else None, | |
| "goal": res.goal, | |
| "goal_time": round(res.goal_time, 1) if res.goal_time else None, | |
| "foot": res.foot, | |
| "speed": round(res.speed_kmh) if res.speed_kmh else None, | |
| "gt_goal": GT.get(f"5/{name}.mp4"), "gt_leg": LEG.get(f"5/{name}.mp4"), | |
| } | |
| def coach(key: str, mode: str = "online") -> dict: | |
| """Run the AI coach for an earlier /analyse result. | |
| mode="online" -> Nemotron-Nano-VL on Modal · mode="offline" -> MiniCPM-V-4.6 on this Space's GPU.""" | |
| d = _PENDING.get(key) | |
| if d is None: | |
| return {"coach": None, "coach_model": None} | |
| if mode == "offline": | |
| coach_model = "MiniCPM-V-4.6 · OpenBMB · on-Space GPU (ZeroGPU)" | |
| try: | |
| coaching = C._clean(_coach_offline(d["frame_paths"], C.SYS, C.user_msg(d["payload"]))) | |
| except Exception as e: | |
| print("offline coach failed:", e, flush=True); coaching = None | |
| else: | |
| coach_model = "Nemotron-Nano-VL · online (Modal GPU)" | |
| try: | |
| coaching = C.get_coaching(d["payload"], d["frames_b64"]) | |
| except C.CoachUnavailable: | |
| coaching = None | |
| return {"coach": coaching, "coach_model": coach_model} | |
| async def api_examples(): | |
| return JSONResponse(EXAMPLES) | |
| async def serve_example(name: str): | |
| path = os.path.join(HERE, "examples", os.path.basename(name)) | |
| if not os.path.isfile(path): | |
| return JSONResponse({"error": "not found"}, status_code=404) | |
| return FileResponse(path, media_type="video/mp4") | |
| async def thumb(name: str): | |
| src = os.path.join(HERE, "examples", os.path.basename(name)) | |
| if not os.path.isfile(src): | |
| return JSONResponse({"error": "not found"}, status_code=404) | |
| out = f"/tmp/thumb_{os.path.basename(name)}.jpg" | |
| if not os.path.exists(out): | |
| subprocess.run(["ffmpeg", "-y", "-v", "error", "-ss", "1.2", "-i", src, | |
| "-vframes", "1", "-vf", "scale=360:-1", out], check=False) | |
| return FileResponse(out, media_type="image/jpeg") | |
| async def home(): | |
| with open(os.path.join(HERE, "index.html"), encoding="utf-8") as f: | |
| return HTMLResponse(f.read(), headers={"Cache-Control": "no-store, max-age=0"}) | |
| app.launch(show_error=True, allowed_paths=[RENDER_DIR, "/tmp"]) | |