fix: define REPO_NAME in hf_upload.sh (ensure_blade_space referenced it)

#13
by BladeSzaSza - opened
CLAUDE.md CHANGED
@@ -170,7 +170,7 @@ Track the running sum in `MODEL_BUDGET.md`. The two Qwen3-VL-8B models share a b
170
 
171
  The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
172
 
173
- - ZeroGPU: wrap heavy inference (`Pose2DAgent.run`, `Body3DAgent.run`) in `@spaces.GPU` before deploying to Spaces.
174
  - Verify Gradio APIs against current docs before use β€” pin exact versions in `requirements.txt`.
175
 
176
  ## Build phases
 
170
 
171
  The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
172
 
173
+ - ZeroGPU: `app.py`'s `process_video` (the Start Analysis handler) is decorated with `@spaces.GPU` (via the `gpu_task` shim, no-op off-Space) so one GPU window wraps the whole pipeline β€” pose, optional 3D, and the judge. **ZeroGPU aborts startup with "No @spaces.GPU function detected" unless a decorated function exists at import time**, so the decorator must stay at module level on a top-level function, not buried behind a lazy import. Window length is `config.ZEROGPU_DURATION` (default 120s, `FORMSCOUT_ZEROGPU_DURATION`).
174
  - Verify Gradio APIs against current docs before use β€” pin exact versions in `requirements.txt`.
175
 
176
  ## Build phases
app.py CHANGED
@@ -20,6 +20,23 @@ from formscout import config
20
  from formscout import session as session_mod
21
  from formscout.startup import ensure_checkpoints
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  ensure_checkpoints()
24
 
25
 
@@ -50,9 +67,14 @@ SCORE_DESCRIPTIONS = {
50
 
51
  # ─── Processing ──────────────────────────────────────────────────────────────
52
 
 
53
  def process_video(video_path: str, test_name: str, side: str, model_key: str,
54
  layers: list[str], session_state):
55
- """Analyse one clip and accumulate it into the screening session."""
 
 
 
 
56
  if not video_path:
57
  return (
58
  session_state, _render_empty_state(), "Upload a video to begin analysis.",
 
20
  from formscout import session as session_mod
21
  from formscout.startup import ensure_checkpoints
22
 
23
+ # ─── ZeroGPU ──────────────────────────────────────────────────────────────────
24
+ # On an HF Spaces ZeroGPU runtime the heavy analysis MUST run inside an
25
+ # @spaces.GPU function, and that function must already exist at import time:
26
+ # ZeroGPU scans for one during startup and aborts the Space with
27
+ # "No @spaces.GPU function detected during startup" if none is registered.
28
+ # We decorate process_video (the Start Analysis handler) so a single GPU window
29
+ # covers the whole pipeline β€” pose, optional 3D, and the Qwen3-VL judge. Off a
30
+ # ZeroGPU Space the `spaces` package is absent (or its decorator is effect-free),
31
+ # so local runs and CPU Spaces are unaffected.
32
+ try:
33
+ import spaces
34
+
35
+ gpu_task = spaces.GPU(duration=config.ZEROGPU_DURATION)
36
+ except Exception: # local dev / non-ZeroGPU β€” decorate as a no-op
37
+ def gpu_task(fn):
38
+ return fn
39
+
40
  ensure_checkpoints()
41
 
42
 
 
67
 
68
  # ─── Processing ──────────────────────────────────────────────────────────────
69
 
70
+ @gpu_task
71
  def process_video(video_path: str, test_name: str, side: str, model_key: str,
72
  layers: list[str], session_state):
73
+ """Analyse one clip and accumulate it into the screening session.
74
+
75
+ Decorated with @spaces.GPU on ZeroGPU: the whole pipeline (pose, optional 3D,
76
+ Qwen3-VL judge) runs inside one GPU window. The decorator is a no-op off-Space.
77
+ """
78
  if not video_path:
79
  return (
80
  session_state, _render_empty_state(), "Upload a video to begin analysis.",
formscout/config.py CHANGED
@@ -147,14 +147,40 @@ LLAMA_CPP_PORT_EMBED = 8081
147
  # ─── Judge backend selection ────────────────────────────────────────────────
148
  # "llama_cpp" β€” local llama-server (default for local dev; works perfectly)
149
  # "transformers"β€” in-process Qwen3-VL via transformers, GPU on HF Spaces (ZeroGPU)
150
- # "auto" β€” transformers on a Space (SPACE_ID set), llama_cpp locally
151
  JUDGE_BACKEND = os.environ.get("FORMSCOUT_JUDGE_BACKEND", "auto")
152
  JUDGE_HF_MODEL = os.environ.get("FORMSCOUT_JUDGE_HF_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
153
  ON_HF_SPACE = bool(os.environ.get("SPACE_ID"))
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  def resolve_judge_backend() -> str:
157
- """Resolve the effective judge backend from JUDGE_BACKEND + environment."""
 
 
 
 
 
 
158
  if JUDGE_BACKEND in ("llama_cpp", "transformers"):
159
  return JUDGE_BACKEND
160
- return "transformers" if ON_HF_SPACE else "llama_cpp"
 
147
  # ─── Judge backend selection ────────────────────────────────────────────────
148
  # "llama_cpp" β€” local llama-server (default for local dev; works perfectly)
149
  # "transformers"β€” in-process Qwen3-VL via transformers, GPU on HF Spaces (ZeroGPU)
150
+ # "auto" β€” transformers ONLY on a GPU/ZeroGPU Space, else llama_cpp
151
  JUDGE_BACKEND = os.environ.get("FORMSCOUT_JUDGE_BACKEND", "auto")
152
  JUDGE_HF_MODEL = os.environ.get("FORMSCOUT_JUDGE_HF_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
153
  ON_HF_SPACE = bool(os.environ.get("SPACE_ID"))
154
 
155
+ # Seconds the ZeroGPU window stays allocated per analysis. One window wraps the
156
+ # whole pipeline (pose, optional 3D, Qwen3-VL judge), so size it for the slowest
157
+ # clip; raise via env for long videos. Only effective on a ZeroGPU Space.
158
+ ZEROGPU_DURATION = int(os.environ.get("FORMSCOUT_ZEROGPU_DURATION", "120"))
159
+
160
+
161
+ def has_gpu() -> bool:
162
+ """True on a ZeroGPU Space (env flag) or when CUDA is actually present.
163
+
164
+ ZeroGPU exposes no CUDA outside @spaces.GPU, so it is detected via the
165
+ SPACES_ZERO_GPU env flag; ordinary GPU Spaces report via torch.cuda.
166
+ """
167
+ if os.environ.get("SPACES_ZERO_GPU") or os.environ.get("ZERO_GPU"):
168
+ return True
169
+ try:
170
+ import torch
171
+ return bool(torch.cuda.is_available())
172
+ except Exception:
173
+ return False
174
+
175
 
176
  def resolve_judge_backend() -> str:
177
+ """Resolve the effective judge backend from JUDGE_BACKEND + environment.
178
+
179
+ `auto` only engages the heavy in-process transformers model when a GPU is
180
+ actually available β€” a CPU-only Space stays on llama_cpp (which is then
181
+ unreachable, so the Judge falls back to the fast rubric instead of trying to
182
+ run a 17 GB model on CPU).
183
+ """
184
  if JUDGE_BACKEND in ("llama_cpp", "transformers"):
185
  return JUDGE_BACKEND
186
+ return "transformers" if (ON_HF_SPACE and has_gpu()) else "llama_cpp"
formscout/serving/transformers_vlm.py CHANGED
@@ -39,19 +39,27 @@ except Exception: # pragma: no cover
39
  return fn
40
 
41
 
42
- @_gpu
43
- def _generate(model_id: str, prompt: str, pil_images: list, max_tokens: int,
44
- temperature: float) -> str: # pragma: no cover - needs GPU + model
45
- """Load (cached) and run Qwen3-VL; returns the raw decoded string."""
46
- import torch
47
- from transformers import AutoModelForImageTextToText, AutoProcessor
48
-
49
  if "model" not in _CACHE:
 
 
50
  _CACHE["processor"] = AutoProcessor.from_pretrained(model_id)
51
  _CACHE["model"] = AutoModelForImageTextToText.from_pretrained(
52
- model_id, torch_dtype="auto", device_map="auto",
53
  )
 
 
 
 
 
 
 
 
 
54
  processor, model = _CACHE["processor"], _CACHE["model"]
 
55
 
56
  content = [{"type": "image", "image": im} for im in pil_images]
57
  content.append({"type": "text", "text": prompt})
@@ -60,7 +68,7 @@ def _generate(model_id: str, prompt: str, pil_images: list, max_tokens: int,
60
  inputs = processor.apply_chat_template(
61
  messages, tokenize=True, add_generation_prompt=True,
62
  return_tensors="pt", return_dict=True,
63
- ).to(model.device)
64
 
65
  with torch.no_grad():
66
  out = model.generate(
@@ -90,7 +98,8 @@ class TransformersVLMClient:
90
  stop: list[str] | None = None) -> dict:
91
  try:
92
  pil_images = self._decode_images(images)
93
- text = _generate(self.model_id, prompt, pil_images, max_tokens, temperature)
 
94
  return LlamaCppClient._parse_json_reply(text)
95
  except Exception as e: # pragma: no cover - needs GPU + model
96
  logger.warning("transformers VLM failed (%s) β€” falling back to rubric", e)
 
39
  return fn
40
 
41
 
42
+ def _ensure_loaded(model_id: str): # pragma: no cover - downloads ~16 GB
43
+ """Load processor + model to CPU once (cached). Kept OUT of the GPU window so
44
+ the 17 GB download/load does not eat ZeroGPU time."""
 
 
 
 
45
  if "model" not in _CACHE:
46
+ import torch
47
+ from transformers import AutoModelForImageTextToText, AutoProcessor
48
  _CACHE["processor"] = AutoProcessor.from_pretrained(model_id)
49
  _CACHE["model"] = AutoModelForImageTextToText.from_pretrained(
50
+ model_id, torch_dtype=torch.bfloat16,
51
  )
52
+ return _CACHE["processor"], _CACHE["model"]
53
+
54
+
55
+ @_gpu
56
+ def _generate(prompt: str, pil_images: list, max_tokens: int,
57
+ temperature: float) -> str: # pragma: no cover - needs GPU + model
58
+ """Move the cached model to CUDA and run Qwen3-VL (ZeroGPU window)."""
59
+ import torch
60
+
61
  processor, model = _CACHE["processor"], _CACHE["model"]
62
+ model.to("cuda")
63
 
64
  content = [{"type": "image", "image": im} for im in pil_images]
65
  content.append({"type": "text", "text": prompt})
 
68
  inputs = processor.apply_chat_template(
69
  messages, tokenize=True, add_generation_prompt=True,
70
  return_tensors="pt", return_dict=True,
71
+ ).to("cuda")
72
 
73
  with torch.no_grad():
74
  out = model.generate(
 
98
  stop: list[str] | None = None) -> dict:
99
  try:
100
  pil_images = self._decode_images(images)
101
+ _ensure_loaded(self.model_id) # CPU load (no GPU time)
102
+ text = _generate(prompt, pil_images, max_tokens, temperature)
103
  return LlamaCppClient._parse_json_reply(text)
104
  except Exception as e: # pragma: no cover - needs GPU + model
105
  logger.warning("transformers VLM failed (%s) β€” falling back to rubric", e)
formscout/ui/theme.py CHANGED
@@ -59,15 +59,20 @@ def formscout_theme() -> gr.Theme:
59
  button_secondary_background_fill="rgba(156, 188, 173, 0.55)",
60
  button_secondary_text_color=INK,
61
  # Inputs
62
- input_background_fill="rgba(255, 255, 255, 0.85)",
63
- input_background_fill_dark="rgba(255, 255, 255, 0.85)",
 
64
  input_border_color="rgba(43, 138, 138, 0.30)",
65
  input_border_color_focus="rgba(43, 138, 138, 0.75)",
 
 
 
 
 
66
  # Text
67
  body_text_color=INK,
68
  body_text_color_dark=INK,
69
  block_title_text_color=TEAL_DEEP,
70
- block_label_text_color=INK_MUTED,
71
  # Spacing
72
  block_padding="16px",
73
  layout_gap="16px",
 
59
  button_secondary_background_fill="rgba(156, 188, 173, 0.55)",
60
  button_secondary_text_color=INK,
61
  # Inputs
62
+ input_background_fill="rgba(255, 255, 255, 0.92)",
63
+ input_background_fill_dark="rgba(255, 255, 255, 0.92)",
64
+ input_background_fill_focus="rgba(255, 255, 255, 1.0)",
65
  input_border_color="rgba(43, 138, 138, 0.30)",
66
  input_border_color_focus="rgba(43, 138, 138, 0.75)",
67
+ # Labels β€” pin light in both modes so no dark dropdown header appears
68
+ block_label_background_fill="rgba(188, 211, 200, 0.55)",
69
+ block_label_background_fill_dark="rgba(188, 211, 200, 0.55)",
70
+ block_label_text_color=INK,
71
+ block_label_text_color_dark=INK,
72
  # Text
73
  body_text_color=INK,
74
  body_text_color_dark=INK,
75
  block_title_text_color=TEAL_DEEP,
 
76
  # Spacing
77
  block_padding="16px",
78
  layout_gap="16px",
scripts/hf_upload.sh CHANGED
@@ -21,8 +21,11 @@ set -euo pipefail
21
 
22
  cd "$(dirname "$0")/.."
23
 
24
- MODEL_REPO="silas-therapy/small-functional-movement-screening"
25
- SPACE_REPO="spaces/silas-therapy/small-functional-movement-screening"
 
 
 
26
  MSG="${1:-$(git log -1 --pretty=%s)}"
27
  LARGE_THRESHOLD="${FORMSCOUT_HF_LARGE_THRESHOLD:-500}"
28
 
@@ -76,22 +79,62 @@ if (( N_FILES == 0 )); then
76
  exit 1
77
  fi
78
 
 
 
 
79
  upload_repo() {
80
  local repo="$1"
 
81
  if (( N_FILES > LARGE_THRESHOLD )); then
82
  echo "── $repo: $N_FILES files > $LARGE_THRESHOLD, using upload-large-folder"
83
  echo " (resumable; commits directly to main β€” no PR, no custom message)"
84
  hf upload-large-folder "$repo" . "${EXCLUDES[@]}"
 
 
 
85
  else
86
- echo "── uploading to: $repo"
87
- hf upload "$repo" . . \
88
- "${EXCLUDES[@]}" \
89
- --create-pr \
90
- --commit-message="$MSG"
91
  fi
92
  }
93
 
94
- upload_repo "$MODEL_REPO"
95
- upload_repo "$SPACE_REPO"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- echo "βœ“ done"
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  cd "$(dirname "$0")/.."
23
 
24
+ REPO_NAME="small-functional-movement-screening"
25
+ BLADE_OWNER="${FORMSCOUT_HF_BLADE_OWNER:-BladeSzaSza}"
26
+ MODEL_REPO="silas-therapy/$REPO_NAME"
27
+ SPACE_REPO="spaces/silas-therapy/$REPO_NAME"
28
+ SPACE_BLADESZASZA_REPO="spaces/$BLADE_OWNER/$REPO_NAME"
29
  MSG="${1:-$(git log -1 --pretty=%s)}"
30
  LARGE_THRESHOLD="${FORMSCOUT_HF_LARGE_THRESHOLD:-500}"
31
 
 
79
  exit 1
80
  fi
81
 
82
+ # upload_repo <repo> [pr|direct]
83
+ # pr β€” open a PR (shared org repos; review before merge)
84
+ # direct β€” commit straight to main (repos you own; deploys immediately)
85
  upload_repo() {
86
  local repo="$1"
87
+ local mode="${2:-pr}"
88
  if (( N_FILES > LARGE_THRESHOLD )); then
89
  echo "── $repo: $N_FILES files > $LARGE_THRESHOLD, using upload-large-folder"
90
  echo " (resumable; commits directly to main β€” no PR, no custom message)"
91
  hf upload-large-folder "$repo" . "${EXCLUDES[@]}"
92
+ elif [[ "$mode" == "direct" ]]; then
93
+ echo "── uploading (direct β†’ main) to: $repo"
94
+ hf upload "$repo" . . "${EXCLUDES[@]}" --commit-message="$MSG"
95
  else
96
+ echo "── uploading (PR) to: $repo"
97
+ hf upload "$repo" . . "${EXCLUDES[@]}" --create-pr --commit-message="$MSG"
 
 
 
98
  fi
99
  }
100
 
101
+ # Ensure the personal ZeroGPU Space exists. Tries zero-a10g (needs Pro/ZeroGPU);
102
+ # falls back to cpu-basic so the upload still has a target (set ZeroGPU in
103
+ # Settings afterward). Idempotent via --exist-ok.
104
+ ensure_blade_space() {
105
+ local id="$BLADE_OWNER/$REPO_NAME"
106
+ if hf repos create "$id" --type space --space-sdk gradio --flavor zero-a10g --exist-ok 2>/dev/null; then
107
+ echo "── Space ready (ZeroGPU / zero-a10g): $id"; return 0
108
+ fi
109
+ if hf repos create "$id" --type space --space-sdk gradio --exist-ok 2>/dev/null; then
110
+ echo "── Space created cpu-basic (set ZeroGPU in Settings β†’ Hardware): $id"; return 0
111
+ fi
112
+ return 1
113
+ }
114
+
115
+ blade_help() {
116
+ cat >&2 <<EOF
117
+ ── ⚠ Could not create/deploy to $SPACE_BLADESZASZA_REPO
118
+ Your active HF token can push to silas-therapy but not create repos under
119
+ "$BLADE_OWNER". To deploy your own ZeroGPU Space:
120
+ 1) In the HF UI create a Space: $BLADE_OWNER/$REPO_NAME
121
+ SDK = Gradio, Hardware = ZeroGPU (Nvidia A10G).
122
+ 2) Re-auth with a token that can write there:
123
+ hf auth login (token with 'Write' role, or fine-grained with
124
+ write access to $BLADE_OWNER)
125
+ 3) Re-run ./scripts/hf_upload.sh
126
+ EOF
127
+ }
128
 
129
+ # Shared org repos β†’ PRs; personal ZeroGPU Space β†’ created + direct deploy.
130
+ upload_repo "$MODEL_REPO" pr
131
+ upload_repo "$SPACE_REPO" pr
132
+
133
+ set +e
134
+ if ensure_blade_space; then
135
+ upload_repo "$SPACE_BLADESZASZA_REPO" direct || blade_help
136
+ else
137
+ blade_help
138
+ fi
139
+ set -e
140
+ echo "βœ“ done (silas-therapy PRs created; see any notes above for the personal Space)"
tests/test_judge_backend.py CHANGED
@@ -21,9 +21,19 @@ def test_resolve_backend_default_local(monkeypatch):
21
  assert cfg.resolve_judge_backend() == "llama_cpp"
22
 
23
 
24
- def test_resolve_backend_auto_on_space(monkeypatch):
25
- cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto", SPACE_ID="me/space")
 
26
  assert cfg.resolve_judge_backend() == "transformers"
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def test_resolve_backend_explicit(monkeypatch):
 
21
  assert cfg.resolve_judge_backend() == "llama_cpp"
22
 
23
 
24
+ def test_resolve_backend_auto_on_zero_gpu_space(monkeypatch):
25
+ cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
26
+ SPACE_ID="me/space", SPACES_ZERO_GPU="true")
27
  assert cfg.resolve_judge_backend() == "transformers"
28
+ importlib.reload(config)
29
+
30
+
31
+ def test_resolve_backend_auto_on_cpu_space_stays_llama(monkeypatch):
32
+ # A CPU-only Space must NOT load the 17 GB transformers model.
33
+ cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
34
+ SPACE_ID="me/space", SPACES_ZERO_GPU=None, ZERO_GPU=None)
35
+ assert cfg.resolve_judge_backend() == "llama_cpp"
36
+ importlib.reload(config)
37
 
38
 
39
  def test_resolve_backend_explicit(monkeypatch):