fix: define REPO_NAME in hf_upload.sh (ensure_blade_space referenced it)

#13

by BladeSzaSza - opened 19 days ago

base: refs/heads/main

←

from: refs/pr/13

Discussion Files changed

+145

-30

Files changed (7) hide show

CLAUDE.md +1 -1
app.py +23 -1
formscout/config.py +29 -3
formscout/serving/transformers_vlm.py +19 -10
formscout/ui/theme.py +8 -3
scripts/hf_upload.sh +53 -10
tests/test_judge_backend.py +12 -2

CLAUDE.md CHANGED Viewed

@@ -170,7 +170,7 @@ Track the running sum in `MODEL_BUDGET.md`. The two Qwen3-VL-8B models share a b
 The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
-- ZeroGPU: wrap heavy inference (`Pose2DAgent.run`, `Body3DAgent.run`) in `@spaces.GPU` before deploying to Spaces.
 - Verify Gradio APIs against current docs before use — pin exact versions in `requirements.txt`.
 ## Build phases

 The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
+- ZeroGPU: `app.py`'s `process_video` (the Start Analysis handler) is decorated with `@spaces.GPU` (via the `gpu_task` shim, no-op off-Space) so one GPU window wraps the whole pipeline — pose, optional 3D, and the judge. **ZeroGPU aborts startup with "No @spaces.GPU function detected" unless a decorated function exists at import time**, so the decorator must stay at module level on a top-level function, not buried behind a lazy import. Window length is `config.ZEROGPU_DURATION` (default 120s, `FORMSCOUT_ZEROGPU_DURATION`).
 - Verify Gradio APIs against current docs before use — pin exact versions in `requirements.txt`.
 ## Build phases

app.py CHANGED Viewed

@@ -20,6 +20,23 @@ from formscout import config
 from formscout import session as session_mod
 from formscout.startup import ensure_checkpoints
 ensure_checkpoints()
@@ -50,9 +67,14 @@ SCORE_DESCRIPTIONS = {
 # ─── Processing ──────────────────────────────────────────────────────────────
 def process_video(video_path: str, test_name: str, side: str, model_key: str,
                   layers: list[str], session_state):
-    """Analyse one clip and accumulate it into the screening session."""
     if not video_path:
         return (
             session_state, _render_empty_state(), "Upload a video to begin analysis.",

 from formscout import session as session_mod
 from formscout.startup import ensure_checkpoints
+# ─── ZeroGPU ──────────────────────────────────────────────────────────────────
+# On an HF Spaces ZeroGPU runtime the heavy analysis MUST run inside an
+# @spaces.GPU function, and that function must already exist at import time:
+# ZeroGPU scans for one during startup and aborts the Space with
+# "No @spaces.GPU function detected during startup" if none is registered.
+# We decorate process_video (the Start Analysis handler) so a single GPU window
+# covers the whole pipeline — pose, optional 3D, and the Qwen3-VL judge. Off a
+# ZeroGPU Space the `spaces` package is absent (or its decorator is effect-free),
+# so local runs and CPU Spaces are unaffected.
+try:
+    import spaces
+    gpu_task = spaces.GPU(duration=config.ZEROGPU_DURATION)
+except Exception:  # local dev / non-ZeroGPU — decorate as a no-op
+    def gpu_task(fn):
+        return fn
 ensure_checkpoints()
 # ─── Processing ──────────────────────────────────────────────────────────────
+@gpu_task
 def process_video(video_path: str, test_name: str, side: str, model_key: str,
                   layers: list[str], session_state):
+    """Analyse one clip and accumulate it into the screening session.
+    Decorated with @spaces.GPU on ZeroGPU: the whole pipeline (pose, optional 3D,
+    Qwen3-VL judge) runs inside one GPU window. The decorator is a no-op off-Space.
+    """
     if not video_path:
         return (
             session_state, _render_empty_state(), "Upload a video to begin analysis.",

formscout/config.py CHANGED Viewed

@@ -147,14 +147,40 @@ LLAMA_CPP_PORT_EMBED = 8081
 # ─── Judge backend selection ────────────────────────────────────────────────
 # "llama_cpp"   — local llama-server (default for local dev; works perfectly)
 # "transformers"— in-process Qwen3-VL via transformers, GPU on HF Spaces (ZeroGPU)
-# "auto"        — transformers on a Space (SPACE_ID set), llama_cpp locally
 JUDGE_BACKEND = os.environ.get("FORMSCOUT_JUDGE_BACKEND", "auto")
 JUDGE_HF_MODEL = os.environ.get("FORMSCOUT_JUDGE_HF_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
 ON_HF_SPACE = bool(os.environ.get("SPACE_ID"))
 def resolve_judge_backend() -> str:
-    """Resolve the effective judge backend from JUDGE_BACKEND + environment."""
     if JUDGE_BACKEND in ("llama_cpp", "transformers"):
         return JUDGE_BACKEND
-    return "transformers" if ON_HF_SPACE else "llama_cpp"

 # ─── Judge backend selection ────────────────────────────────────────────────
 # "llama_cpp"   — local llama-server (default for local dev; works perfectly)
 # "transformers"— in-process Qwen3-VL via transformers, GPU on HF Spaces (ZeroGPU)
+# "auto"        — transformers ONLY on a GPU/ZeroGPU Space, else llama_cpp
 JUDGE_BACKEND = os.environ.get("FORMSCOUT_JUDGE_BACKEND", "auto")
 JUDGE_HF_MODEL = os.environ.get("FORMSCOUT_JUDGE_HF_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
 ON_HF_SPACE = bool(os.environ.get("SPACE_ID"))
+# Seconds the ZeroGPU window stays allocated per analysis. One window wraps the
+# whole pipeline (pose, optional 3D, Qwen3-VL judge), so size it for the slowest
+# clip; raise via env for long videos. Only effective on a ZeroGPU Space.
+ZEROGPU_DURATION = int(os.environ.get("FORMSCOUT_ZEROGPU_DURATION", "120"))
+def has_gpu() -> bool:
+    """True on a ZeroGPU Space (env flag) or when CUDA is actually present.
+    ZeroGPU exposes no CUDA outside @spaces.GPU, so it is detected via the
+    SPACES_ZERO_GPU env flag; ordinary GPU Spaces report via torch.cuda.
+    """
+    if os.environ.get("SPACES_ZERO_GPU") or os.environ.get("ZERO_GPU"):
+        return True
+    try:
+        import torch
+        return bool(torch.cuda.is_available())
+    except Exception:
+        return False
 def resolve_judge_backend() -> str:
+    """Resolve the effective judge backend from JUDGE_BACKEND + environment.
+    `auto` only engages the heavy in-process transformers model when a GPU is
+    actually available — a CPU-only Space stays on llama_cpp (which is then
+    unreachable, so the Judge falls back to the fast rubric instead of trying to
+    run a 17 GB model on CPU).
+    """
     if JUDGE_BACKEND in ("llama_cpp", "transformers"):
         return JUDGE_BACKEND
+    return "transformers" if (ON_HF_SPACE and has_gpu()) else "llama_cpp"

formscout/serving/transformers_vlm.py CHANGED Viewed

@@ -39,19 +39,27 @@ except Exception:  # pragma: no cover
         return fn
-@_gpu
-def _generate(model_id: str, prompt: str, pil_images: list, max_tokens: int,
-              temperature: float) -> str:  # pragma: no cover - needs GPU + model
-    """Load (cached) and run Qwen3-VL; returns the raw decoded string."""
-    import torch
-    from transformers import AutoModelForImageTextToText, AutoProcessor
     if "model" not in _CACHE:
         _CACHE["processor"] = AutoProcessor.from_pretrained(model_id)
         _CACHE["model"] = AutoModelForImageTextToText.from_pretrained(
-            model_id, torch_dtype="auto", device_map="auto",
         )
     processor, model = _CACHE["processor"], _CACHE["model"]
     content = [{"type": "image", "image": im} for im in pil_images]
     content.append({"type": "text", "text": prompt})
@@ -60,7 +68,7 @@ def _generate(model_id: str, prompt: str, pil_images: list, max_tokens: int,
     inputs = processor.apply_chat_template(
         messages, tokenize=True, add_generation_prompt=True,
         return_tensors="pt", return_dict=True,
-    ).to(model.device)
     with torch.no_grad():
         out = model.generate(
@@ -90,7 +98,8 @@ class TransformersVLMClient:
                  stop: list[str] | None = None) -> dict:
         try:
             pil_images = self._decode_images(images)
-            text = _generate(self.model_id, prompt, pil_images, max_tokens, temperature)
             return LlamaCppClient._parse_json_reply(text)
         except Exception as e:  # pragma: no cover - needs GPU + model
             logger.warning("transformers VLM failed (%s) — falling back to rubric", e)

         return fn
+def _ensure_loaded(model_id: str):  # pragma: no cover - downloads ~16 GB
+    """Load processor + model to CPU once (cached). Kept OUT of the GPU window so
+    the 17 GB download/load does not eat ZeroGPU time."""
     if "model" not in _CACHE:
+        import torch
+        from transformers import AutoModelForImageTextToText, AutoProcessor
         _CACHE["processor"] = AutoProcessor.from_pretrained(model_id)
         _CACHE["model"] = AutoModelForImageTextToText.from_pretrained(
+            model_id, torch_dtype=torch.bfloat16,
         )
+    return _CACHE["processor"], _CACHE["model"]
+@_gpu
+def _generate(prompt: str, pil_images: list, max_tokens: int,
+              temperature: float) -> str:  # pragma: no cover - needs GPU + model
+    """Move the cached model to CUDA and run Qwen3-VL (ZeroGPU window)."""
+    import torch
     processor, model = _CACHE["processor"], _CACHE["model"]
+    model.to("cuda")
     content = [{"type": "image", "image": im} for im in pil_images]
     content.append({"type": "text", "text": prompt})
     inputs = processor.apply_chat_template(
         messages, tokenize=True, add_generation_prompt=True,
         return_tensors="pt", return_dict=True,
+    ).to("cuda")
     with torch.no_grad():
         out = model.generate(
                  stop: list[str] | None = None) -> dict:
         try:
             pil_images = self._decode_images(images)
+            _ensure_loaded(self.model_id)  # CPU load (no GPU time)
+            text = _generate(prompt, pil_images, max_tokens, temperature)
             return LlamaCppClient._parse_json_reply(text)
         except Exception as e:  # pragma: no cover - needs GPU + model
             logger.warning("transformers VLM failed (%s) — falling back to rubric", e)

formscout/ui/theme.py CHANGED Viewed

@@ -59,15 +59,20 @@ def formscout_theme() -> gr.Theme:
         button_secondary_background_fill="rgba(156, 188, 173, 0.55)",
         button_secondary_text_color=INK,
         # Inputs
-        input_background_fill="rgba(255, 255, 255, 0.85)",
-        input_background_fill_dark="rgba(255, 255, 255, 0.85)",
         input_border_color="rgba(43, 138, 138, 0.30)",
         input_border_color_focus="rgba(43, 138, 138, 0.75)",
         # Text
         body_text_color=INK,
         body_text_color_dark=INK,
         block_title_text_color=TEAL_DEEP,
-        block_label_text_color=INK_MUTED,
         # Spacing
         block_padding="16px",
         layout_gap="16px",

         button_secondary_background_fill="rgba(156, 188, 173, 0.55)",
         button_secondary_text_color=INK,
         # Inputs
+        input_background_fill="rgba(255, 255, 255, 0.92)",
+        input_background_fill_dark="rgba(255, 255, 255, 0.92)",
+        input_background_fill_focus="rgba(255, 255, 255, 1.0)",
         input_border_color="rgba(43, 138, 138, 0.30)",
         input_border_color_focus="rgba(43, 138, 138, 0.75)",
+        # Labels — pin light in both modes so no dark dropdown header appears
+        block_label_background_fill="rgba(188, 211, 200, 0.55)",
+        block_label_background_fill_dark="rgba(188, 211, 200, 0.55)",
+        block_label_text_color=INK,
+        block_label_text_color_dark=INK,
         # Text
         body_text_color=INK,
         body_text_color_dark=INK,
         block_title_text_color=TEAL_DEEP,
         # Spacing
         block_padding="16px",
         layout_gap="16px",

scripts/hf_upload.sh CHANGED Viewed

@@ -21,8 +21,11 @@ set -euo pipefail
 cd "$(dirname "$0")/.."
-MODEL_REPO="silas-therapy/small-functional-movement-screening"
-SPACE_REPO="spaces/silas-therapy/small-functional-movement-screening"
 MSG="${1:-$(git log -1 --pretty=%s)}"
 LARGE_THRESHOLD="${FORMSCOUT_HF_LARGE_THRESHOLD:-500}"
@@ -76,22 +79,62 @@ if (( N_FILES == 0 )); then
     exit 1
 fi
 upload_repo() {
     local repo="$1"
     if (( N_FILES > LARGE_THRESHOLD )); then
         echo "── $repo: $N_FILES files > $LARGE_THRESHOLD, using upload-large-folder"
         echo "   (resumable; commits directly to main — no PR, no custom message)"
         hf upload-large-folder "$repo" . "${EXCLUDES[@]}"
     else
-        echo "── uploading to: $repo"
-        hf upload "$repo" . . \
-            "${EXCLUDES[@]}" \
-            --create-pr \
-            --commit-message="$MSG"
     fi
 }
-upload_repo "$MODEL_REPO"
-upload_repo "$SPACE_REPO"
-echo "✓ done"

 cd "$(dirname "$0")/.."
+REPO_NAME="small-functional-movement-screening"
+BLADE_OWNER="${FORMSCOUT_HF_BLADE_OWNER:-BladeSzaSza}"
+MODEL_REPO="silas-therapy/$REPO_NAME"
+SPACE_REPO="spaces/silas-therapy/$REPO_NAME"
+SPACE_BLADESZASZA_REPO="spaces/$BLADE_OWNER/$REPO_NAME"
 MSG="${1:-$(git log -1 --pretty=%s)}"
 LARGE_THRESHOLD="${FORMSCOUT_HF_LARGE_THRESHOLD:-500}"
     exit 1
 fi
+# upload_repo <repo> [pr|direct]
+#   pr     — open a PR (shared org repos; review before merge)
+#   direct — commit straight to main (repos you own; deploys immediately)
 upload_repo() {
     local repo="$1"
+    local mode="${2:-pr}"
     if (( N_FILES > LARGE_THRESHOLD )); then
         echo "── $repo: $N_FILES files > $LARGE_THRESHOLD, using upload-large-folder"
         echo "   (resumable; commits directly to main — no PR, no custom message)"
         hf upload-large-folder "$repo" . "${EXCLUDES[@]}"
+    elif [[ "$mode" == "direct" ]]; then
+        echo "── uploading (direct → main) to: $repo"
+        hf upload "$repo" . . "${EXCLUDES[@]}" --commit-message="$MSG"
     else
+        echo "── uploading (PR) to: $repo"
+        hf upload "$repo" . . "${EXCLUDES[@]}" --create-pr --commit-message="$MSG"
     fi
 }
+# Ensure the personal ZeroGPU Space exists. Tries zero-a10g (needs Pro/ZeroGPU);
+# falls back to cpu-basic so the upload still has a target (set ZeroGPU in
+# Settings afterward). Idempotent via --exist-ok.
+ensure_blade_space() {
+    local id="$BLADE_OWNER/$REPO_NAME"
+    if hf repos create "$id" --type space --space-sdk gradio --flavor zero-a10g --exist-ok 2>/dev/null; then
+        echo "── Space ready (ZeroGPU / zero-a10g): $id"; return 0
+    fi
+    if hf repos create "$id" --type space --space-sdk gradio --exist-ok 2>/dev/null; then
+        echo "── Space created cpu-basic (set ZeroGPU in Settings → Hardware): $id"; return 0
+    fi
+    return 1
+}
+blade_help() {
+    cat >&2 <<EOF
+── ⚠ Could not create/deploy to $SPACE_BLADESZASZA_REPO
+   Your active HF token can push to silas-therapy but not create repos under
+   "$BLADE_OWNER". To deploy your own ZeroGPU Space:
+     1) In the HF UI create a Space:  $BLADE_OWNER/$REPO_NAME
+        SDK = Gradio,  Hardware = ZeroGPU (Nvidia A10G).
+     2) Re-auth with a token that can write there:
+          hf auth login        (token with 'Write' role, or fine-grained with
+                                 write access to $BLADE_OWNER)
+     3) Re-run ./scripts/hf_upload.sh
+EOF
+}
+# Shared org repos → PRs; personal ZeroGPU Space → created + direct deploy.
+upload_repo "$MODEL_REPO" pr
+upload_repo "$SPACE_REPO" pr
+set +e
+if ensure_blade_space; then
+    upload_repo "$SPACE_BLADESZASZA_REPO" direct || blade_help
+else
+    blade_help
+fi
+set -e
+echo "✓ done (silas-therapy PRs created; see any notes above for the personal Space)"

tests/test_judge_backend.py CHANGED Viewed

@@ -21,9 +21,19 @@ def test_resolve_backend_default_local(monkeypatch):
     assert cfg.resolve_judge_backend() == "llama_cpp"
-def test_resolve_backend_auto_on_space(monkeypatch):
-    cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto", SPACE_ID="me/space")
     assert cfg.resolve_judge_backend() == "transformers"
 def test_resolve_backend_explicit(monkeypatch):

     assert cfg.resolve_judge_backend() == "llama_cpp"
+def test_resolve_backend_auto_on_zero_gpu_space(monkeypatch):
+    cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
+                         SPACE_ID="me/space", SPACES_ZERO_GPU="true")
     assert cfg.resolve_judge_backend() == "transformers"
+    importlib.reload(config)
+def test_resolve_backend_auto_on_cpu_space_stays_llama(monkeypatch):
+    # A CPU-only Space must NOT load the 17 GB transformers model.
+    cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
+                         SPACE_ID="me/space", SPACES_ZERO_GPU=None, ZERO_GPU=None)
+    assert cfg.resolve_judge_backend() == "llama_cpp"
+    importlib.reload(config)
 def test_resolve_backend_explicit(monkeypatch):