Spaces:
Running on Zero
Running on Zero
File size: 7,666 Bytes
5c65a0c cec6d52 5c65a0c 25a548e b8cfdc9 25a548e 5c65a0c b8cfdc9 5c65a0c 865898a b8cfdc9 70694ee 865898a 70694ee 865898a 5c65a0c 865898a 5c65a0c 865898a 5c65a0c 865898a 5c65a0c 865898a 5c65a0c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | """FUT World Cup Coach — custom frontend on gradio.Server (Off-Brand quest).
A bespoke football-stadium HTML/JS frontend (index.html) talks to Gradio's backend engine:
@app.api() runs RF-DETR detection on ZeroGPU, renders the analysed video, and calls the
Nemotron-VL coach on Modal (MiniCPM-V on ZeroGPU as the local fallback) — all behind
Gradio's queue + gradio_client, hosted on a ZeroGPU Space.
"""
import glob
import json
import os
import shutil
import subprocess
from gradio import Server
from gradio.data_classes import FileData
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
import spaces
from huggingface_hub import hf_hub_download
from futheros import pipeline as P
from futheros import coach as C
from futheros.render import render, pose_card_img
HF_MODEL = "build-small-hackathon/kicky-ai-rfdetr-seg"
RENDER_DIR = "/tmp/app_renders"; os.makedirs(RENDER_DIR, exist_ok=True)
HERE = os.path.dirname(os.path.abspath(__file__))
GT = json.load(open(os.path.join(HERE, "labels/goal_labels.json")))
LEG = json.load(open(os.path.join(HERE, "labels/leg_labels.json")))
EXAMPLES = [os.path.basename(p) for p in sorted(glob.glob(os.path.join(HERE, "examples/*.mp4")))]
app = Server()
# Build the detector ONCE at import. Downloading/constructing it *inside* a @spaces.GPU window
# means every call rebuilds the model (the GPU fork loses module globals) -> ~20-30s per call.
# At module level the spaces lib moves it to GPU per call, so calls only run inference.
_CKPT = hf_hub_download(HF_MODEL, "checkpoint_best_ema.pth")
try:
_MODEL = P.rfdetr_model(_CKPT, "small")
except Exception as _e:
print("eager detector build failed, will lazy-load:", _e, flush=True); _MODEL = None
def _model():
global _MODEL
if _MODEL is None:
_MODEL = P.rfdetr_model(_CKPT, "small")
return _MODEL
@spaces.GPU(duration=120)
def _detect(path):
return P.detect_rfdetr(path, _model())
# ---- Offline coach: MiniCPM-V-4.6 (OpenBMB, 1.3B) on the Space's own ZeroGPU (PyTorch) ----
# Runs entirely on the Space — the open VLM grades the shot on-GPU, no external API.
MINICPM_REPO = "openbmb/MiniCPM-V-4.6"
_MINICPM = None
# pre-fetch the offline VLM weights at import (CPU) so the first offline coach call doesn't
# download ~2.6 GB inside a @spaces.GPU window and time out.
try:
from huggingface_hub import snapshot_download
snapshot_download(MINICPM_REPO)
except Exception as _e:
print("MiniCPM prefetch skipped:", _e, flush=True)
def _minicpm():
global _MINICPM
if _MINICPM is None:
import torch
from transformers import AutoModelForImageTextToText, AutoProcessor
proc = AutoProcessor.from_pretrained(MINICPM_REPO, trust_remote_code=True)
mdl = AutoModelForImageTextToText.from_pretrained(
MINICPM_REPO, torch_dtype=torch.bfloat16, trust_remote_code=True).eval()
_MINICPM = (mdl, proc)
return _MINICPM
@spaces.GPU(duration=120)
def _coach_offline(frame_paths, sys_text, user_text):
import torch
mdl, proc = _minicpm(); mdl = mdl.to("cuda")
content = [{"type": "image", "url": os.path.abspath(p)} for p in frame_paths]
content.append({"type": "text", "text": user_text})
messages = [{"role": "system", "content": [{"type": "text", "text": sys_text}]},
{"role": "user", "content": content}]
inputs = proc.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_dict=True,
return_tensors="pt", downsample_mode="16x", max_slice_nums=9).to("cuda")
with torch.no_grad():
gen = mdl.generate(**inputs, downsample_mode="16x", max_new_tokens=320, do_sample=False)
trimmed = [o[len(i):] for i, o in zip(inputs.input_ids, gen)]
return proc.batch_decode(trimmed, skip_special_tokens=True)[0]
# two-phase so the UI shows results instantly: /analyse (fast: detect+render+pose+stats) then
# /coach (slower: the AI coach), keyed by `key` so the coach reuses the analysis (no re-detect).
_PENDING = {}
@app.api()
def analyse(video: FileData, hint: str = "") -> dict:
"""RF-DETR detect (ZeroGPU) -> goal physics + pose + speed -> rendered video + stats.
Returns fast; the AI coach runs separately via /coach so it never blocks the results."""
src = video["path"]
# uploads land as a nameless 'blob' -> copy to a clean .mp4 (and keep the example name via `hint`)
name = os.path.splitext(os.path.basename(hint or src))[0] or "clip"
path = f"/tmp/in_{name}_{os.getpid()}.mp4"
shutil.copyfile(src, path)
tr = _detect(path)
res = P.analyse(tr, path)
out = f"{RENDER_DIR}/{name}.mp4"
try:
render(path, tr, res, out)
video_out = out
except Exception as e:
print("render failed, falling back to raw clip:", e, flush=True)
video_out = path
pose = pose_card_img(path, tr, res, f"{RENDER_DIR}/{name}_pose.png")
# stash everything the coach needs so /coach can run without re-detecting
_PENDING[name] = {"payload": C.build_payload(res),
"frames_b64": C.key_frames_b64(path, res),
"frame_paths": C.key_frame_paths(path, res, RENDER_DIR)}
return {
"key": name,
"video": FileData(path=video_out),
"pose": FileData(path=pose) if pose else None,
"goal": res.goal,
"goal_time": round(res.goal_time, 1) if res.goal_time else None,
"foot": res.foot,
"speed": round(res.speed_kmh) if res.speed_kmh else None,
"gt_goal": GT.get(f"5/{name}.mp4"), "gt_leg": LEG.get(f"5/{name}.mp4"),
}
@app.api()
def coach(key: str, mode: str = "online") -> dict:
"""Run the AI coach for an earlier /analyse result.
mode="online" -> Nemotron-Nano-VL on Modal · mode="offline" -> MiniCPM-V-4.6 on this Space's GPU."""
d = _PENDING.get(key)
if d is None:
return {"coach": None, "coach_model": None}
if mode == "offline":
coach_model = "MiniCPM-V-4.6 · OpenBMB · on-Space GPU (ZeroGPU)"
try:
coaching = C._clean(_coach_offline(d["frame_paths"], C.SYS, C.user_msg(d["payload"])))
except Exception as e:
print("offline coach failed:", e, flush=True); coaching = None
else:
coach_model = "Nemotron-Nano-VL · online (Modal GPU)"
try:
coaching = C.get_coaching(d["payload"], d["frames_b64"])
except C.CoachUnavailable:
coaching = None
return {"coach": coaching, "coach_model": coach_model}
@app.get("/api/examples")
async def api_examples():
return JSONResponse(EXAMPLES)
@app.get("/examples/{name}")
async def serve_example(name: str):
path = os.path.join(HERE, "examples", os.path.basename(name))
if not os.path.isfile(path):
return JSONResponse({"error": "not found"}, status_code=404)
return FileResponse(path, media_type="video/mp4")
@app.get("/thumb/{name}")
async def thumb(name: str):
src = os.path.join(HERE, "examples", os.path.basename(name))
if not os.path.isfile(src):
return JSONResponse({"error": "not found"}, status_code=404)
out = f"/tmp/thumb_{os.path.basename(name)}.jpg"
if not os.path.exists(out):
subprocess.run(["ffmpeg", "-y", "-v", "error", "-ss", "1.2", "-i", src,
"-vframes", "1", "-vf", "scale=360:-1", out], check=False)
return FileResponse(out, media_type="image/jpeg")
@app.get("/")
async def home():
with open(os.path.join(HERE, "index.html"), encoding="utf-8") as f:
return HTMLResponse(f.read(), headers={"Cache-Control": "no-store, max-age=0"})
app.launch(show_error=True, allowed_paths=[RENDER_DIR, "/tmp"])
|