File size: 7,666 Bytes
5c65a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cec6d52
5c65a0c
 
 
 
 
 
 
25a548e
 
 
b8cfdc9
25a548e
 
 
 
5c65a0c
 
 
 
 
b8cfdc9
5c65a0c
 
 
 
 
 
 
 
865898a
 
 
 
b8cfdc9
 
 
 
 
 
 
70694ee
 
865898a
 
 
 
 
 
 
 
 
 
 
 
 
70694ee
865898a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c65a0c
 
 
865898a
 
 
5c65a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865898a
 
 
 
5c65a0c
865898a
5c65a0c
 
 
 
 
 
 
 
 
 
865898a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c65a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""FUT World Cup Coach — custom frontend on gradio.Server (Off-Brand quest).

A bespoke football-stadium HTML/JS frontend (index.html) talks to Gradio's backend engine:
@app.api() runs RF-DETR detection on ZeroGPU, renders the analysed video, and calls the
Nemotron-VL coach on Modal (MiniCPM-V on ZeroGPU as the local fallback) — all behind
Gradio's queue + gradio_client, hosted on a ZeroGPU Space.
"""
import glob
import json
import os
import shutil
import subprocess

from gradio import Server
from gradio.data_classes import FileData
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
import spaces
from huggingface_hub import hf_hub_download

from futheros import pipeline as P
from futheros import coach as C
from futheros.render import render, pose_card_img

HF_MODEL = "build-small-hackathon/kicky-ai-rfdetr-seg"
RENDER_DIR = "/tmp/app_renders"; os.makedirs(RENDER_DIR, exist_ok=True)
HERE = os.path.dirname(os.path.abspath(__file__))
GT = json.load(open(os.path.join(HERE, "labels/goal_labels.json")))
LEG = json.load(open(os.path.join(HERE, "labels/leg_labels.json")))
EXAMPLES = [os.path.basename(p) for p in sorted(glob.glob(os.path.join(HERE, "examples/*.mp4")))]

app = Server()
# Build the detector ONCE at import. Downloading/constructing it *inside* a @spaces.GPU window
# means every call rebuilds the model (the GPU fork loses module globals) -> ~20-30s per call.
# At module level the spaces lib moves it to GPU per call, so calls only run inference.
_CKPT = hf_hub_download(HF_MODEL, "checkpoint_best_ema.pth")
try:
    _MODEL = P.rfdetr_model(_CKPT, "small")
except Exception as _e:
    print("eager detector build failed, will lazy-load:", _e, flush=True); _MODEL = None


def _model():
    global _MODEL
    if _MODEL is None:
        _MODEL = P.rfdetr_model(_CKPT, "small")
    return _MODEL


@spaces.GPU(duration=120)
def _detect(path):
    return P.detect_rfdetr(path, _model())


# ---- Offline coach: MiniCPM-V-4.6 (OpenBMB, 1.3B) on the Space's own ZeroGPU (PyTorch) ----
# Runs entirely on the Space — the open VLM grades the shot on-GPU, no external API.
MINICPM_REPO = "openbmb/MiniCPM-V-4.6"
_MINICPM = None
# pre-fetch the offline VLM weights at import (CPU) so the first offline coach call doesn't
# download ~2.6 GB inside a @spaces.GPU window and time out.
try:
    from huggingface_hub import snapshot_download
    snapshot_download(MINICPM_REPO)
except Exception as _e:
    print("MiniCPM prefetch skipped:", _e, flush=True)


def _minicpm():
    global _MINICPM
    if _MINICPM is None:
        import torch
        from transformers import AutoModelForImageTextToText, AutoProcessor
        proc = AutoProcessor.from_pretrained(MINICPM_REPO, trust_remote_code=True)
        mdl = AutoModelForImageTextToText.from_pretrained(
            MINICPM_REPO, torch_dtype=torch.bfloat16, trust_remote_code=True).eval()
        _MINICPM = (mdl, proc)
    return _MINICPM


@spaces.GPU(duration=120)
def _coach_offline(frame_paths, sys_text, user_text):
    import torch
    mdl, proc = _minicpm(); mdl = mdl.to("cuda")
    content = [{"type": "image", "url": os.path.abspath(p)} for p in frame_paths]
    content.append({"type": "text", "text": user_text})
    messages = [{"role": "system", "content": [{"type": "text", "text": sys_text}]},
                {"role": "user", "content": content}]
    inputs = proc.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_dict=True,
        return_tensors="pt", downsample_mode="16x", max_slice_nums=9).to("cuda")
    with torch.no_grad():
        gen = mdl.generate(**inputs, downsample_mode="16x", max_new_tokens=320, do_sample=False)
    trimmed = [o[len(i):] for i, o in zip(inputs.input_ids, gen)]
    return proc.batch_decode(trimmed, skip_special_tokens=True)[0]


# two-phase so the UI shows results instantly: /analyse (fast: detect+render+pose+stats) then
# /coach (slower: the AI coach), keyed by `key` so the coach reuses the analysis (no re-detect).
_PENDING = {}


@app.api()
def analyse(video: FileData, hint: str = "") -> dict:
    """RF-DETR detect (ZeroGPU) -> goal physics + pose + speed -> rendered video + stats.
    Returns fast; the AI coach runs separately via /coach so it never blocks the results."""
    src = video["path"]
    # uploads land as a nameless 'blob' -> copy to a clean .mp4 (and keep the example name via `hint`)
    name = os.path.splitext(os.path.basename(hint or src))[0] or "clip"
    path = f"/tmp/in_{name}_{os.getpid()}.mp4"
    shutil.copyfile(src, path)
    tr = _detect(path)
    res = P.analyse(tr, path)
    out = f"{RENDER_DIR}/{name}.mp4"
    try:
        render(path, tr, res, out)
        video_out = out
    except Exception as e:
        print("render failed, falling back to raw clip:", e, flush=True)
        video_out = path
    pose = pose_card_img(path, tr, res, f"{RENDER_DIR}/{name}_pose.png")
    # stash everything the coach needs so /coach can run without re-detecting
    _PENDING[name] = {"payload": C.build_payload(res),
                      "frames_b64": C.key_frames_b64(path, res),
                      "frame_paths": C.key_frame_paths(path, res, RENDER_DIR)}
    return {
        "key": name,
        "video": FileData(path=video_out),
        "pose": FileData(path=pose) if pose else None,
        "goal": res.goal,
        "goal_time": round(res.goal_time, 1) if res.goal_time else None,
        "foot": res.foot,
        "speed": round(res.speed_kmh) if res.speed_kmh else None,
        "gt_goal": GT.get(f"5/{name}.mp4"), "gt_leg": LEG.get(f"5/{name}.mp4"),
    }


@app.api()
def coach(key: str, mode: str = "online") -> dict:
    """Run the AI coach for an earlier /analyse result.
    mode="online" -> Nemotron-Nano-VL on Modal · mode="offline" -> MiniCPM-V-4.6 on this Space's GPU."""
    d = _PENDING.get(key)
    if d is None:
        return {"coach": None, "coach_model": None}
    if mode == "offline":
        coach_model = "MiniCPM-V-4.6 · OpenBMB · on-Space GPU (ZeroGPU)"
        try:
            coaching = C._clean(_coach_offline(d["frame_paths"], C.SYS, C.user_msg(d["payload"])))
        except Exception as e:
            print("offline coach failed:", e, flush=True); coaching = None
    else:
        coach_model = "Nemotron-Nano-VL · online (Modal GPU)"
        try:
            coaching = C.get_coaching(d["payload"], d["frames_b64"])
        except C.CoachUnavailable:
            coaching = None
    return {"coach": coaching, "coach_model": coach_model}


@app.get("/api/examples")
async def api_examples():
    return JSONResponse(EXAMPLES)


@app.get("/examples/{name}")
async def serve_example(name: str):
    path = os.path.join(HERE, "examples", os.path.basename(name))
    if not os.path.isfile(path):
        return JSONResponse({"error": "not found"}, status_code=404)
    return FileResponse(path, media_type="video/mp4")


@app.get("/thumb/{name}")
async def thumb(name: str):
    src = os.path.join(HERE, "examples", os.path.basename(name))
    if not os.path.isfile(src):
        return JSONResponse({"error": "not found"}, status_code=404)
    out = f"/tmp/thumb_{os.path.basename(name)}.jpg"
    if not os.path.exists(out):
        subprocess.run(["ffmpeg", "-y", "-v", "error", "-ss", "1.2", "-i", src,
                        "-vframes", "1", "-vf", "scale=360:-1", out], check=False)
    return FileResponse(out, media_type="image/jpeg")


@app.get("/")
async def home():
    with open(os.path.join(HERE, "index.html"), encoding="utf-8") as f:
        return HTMLResponse(f.read(), headers={"Cache-Control": "no-store, max-age=0"})


app.launch(show_error=True, allowed_paths=[RENDER_DIR, "/tmp"])