fix: define REPO_NAME in hf_upload.sh (ensure_blade_space referenced it)
#13
by BladeSzaSza - opened
- CLAUDE.md +1 -1
- app.py +23 -1
- formscout/config.py +29 -3
- formscout/serving/transformers_vlm.py +19 -10
- formscout/ui/theme.py +8 -3
- scripts/hf_upload.sh +53 -10
- tests/test_judge_backend.py +12 -2
CLAUDE.md
CHANGED
|
@@ -170,7 +170,7 @@ Track the running sum in `MODEL_BUDGET.md`. The two Qwen3-VL-8B models share a b
|
|
| 170 |
|
| 171 |
The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
|
| 172 |
|
| 173 |
-
- ZeroGPU:
|
| 174 |
- Verify Gradio APIs against current docs before use β pin exact versions in `requirements.txt`.
|
| 175 |
|
| 176 |
## Build phases
|
|
|
|
| 170 |
|
| 171 |
The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
|
| 172 |
|
| 173 |
+
- ZeroGPU: `app.py`'s `process_video` (the Start Analysis handler) is decorated with `@spaces.GPU` (via the `gpu_task` shim, no-op off-Space) so one GPU window wraps the whole pipeline β pose, optional 3D, and the judge. **ZeroGPU aborts startup with "No @spaces.GPU function detected" unless a decorated function exists at import time**, so the decorator must stay at module level on a top-level function, not buried behind a lazy import. Window length is `config.ZEROGPU_DURATION` (default 120s, `FORMSCOUT_ZEROGPU_DURATION`).
|
| 174 |
- Verify Gradio APIs against current docs before use β pin exact versions in `requirements.txt`.
|
| 175 |
|
| 176 |
## Build phases
|
app.py
CHANGED
|
@@ -20,6 +20,23 @@ from formscout import config
|
|
| 20 |
from formscout import session as session_mod
|
| 21 |
from formscout.startup import ensure_checkpoints
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
ensure_checkpoints()
|
| 24 |
|
| 25 |
|
|
@@ -50,9 +67,14 @@ SCORE_DESCRIPTIONS = {
|
|
| 50 |
|
| 51 |
# βββ Processing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
|
|
|
|
| 53 |
def process_video(video_path: str, test_name: str, side: str, model_key: str,
|
| 54 |
layers: list[str], session_state):
|
| 55 |
-
"""Analyse one clip and accumulate it into the screening session.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
if not video_path:
|
| 57 |
return (
|
| 58 |
session_state, _render_empty_state(), "Upload a video to begin analysis.",
|
|
|
|
| 20 |
from formscout import session as session_mod
|
| 21 |
from formscout.startup import ensure_checkpoints
|
| 22 |
|
| 23 |
+
# βββ ZeroGPU ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
# On an HF Spaces ZeroGPU runtime the heavy analysis MUST run inside an
|
| 25 |
+
# @spaces.GPU function, and that function must already exist at import time:
|
| 26 |
+
# ZeroGPU scans for one during startup and aborts the Space with
|
| 27 |
+
# "No @spaces.GPU function detected during startup" if none is registered.
|
| 28 |
+
# We decorate process_video (the Start Analysis handler) so a single GPU window
|
| 29 |
+
# covers the whole pipeline β pose, optional 3D, and the Qwen3-VL judge. Off a
|
| 30 |
+
# ZeroGPU Space the `spaces` package is absent (or its decorator is effect-free),
|
| 31 |
+
# so local runs and CPU Spaces are unaffected.
|
| 32 |
+
try:
|
| 33 |
+
import spaces
|
| 34 |
+
|
| 35 |
+
gpu_task = spaces.GPU(duration=config.ZEROGPU_DURATION)
|
| 36 |
+
except Exception: # local dev / non-ZeroGPU β decorate as a no-op
|
| 37 |
+
def gpu_task(fn):
|
| 38 |
+
return fn
|
| 39 |
+
|
| 40 |
ensure_checkpoints()
|
| 41 |
|
| 42 |
|
|
|
|
| 67 |
|
| 68 |
# βββ Processing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
|
| 70 |
+
@gpu_task
|
| 71 |
def process_video(video_path: str, test_name: str, side: str, model_key: str,
|
| 72 |
layers: list[str], session_state):
|
| 73 |
+
"""Analyse one clip and accumulate it into the screening session.
|
| 74 |
+
|
| 75 |
+
Decorated with @spaces.GPU on ZeroGPU: the whole pipeline (pose, optional 3D,
|
| 76 |
+
Qwen3-VL judge) runs inside one GPU window. The decorator is a no-op off-Space.
|
| 77 |
+
"""
|
| 78 |
if not video_path:
|
| 79 |
return (
|
| 80 |
session_state, _render_empty_state(), "Upload a video to begin analysis.",
|
formscout/config.py
CHANGED
|
@@ -147,14 +147,40 @@ LLAMA_CPP_PORT_EMBED = 8081
|
|
| 147 |
# βββ Judge backend selection ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 148 |
# "llama_cpp" β local llama-server (default for local dev; works perfectly)
|
| 149 |
# "transformers"β in-process Qwen3-VL via transformers, GPU on HF Spaces (ZeroGPU)
|
| 150 |
-
# "auto" β transformers on a
|
| 151 |
JUDGE_BACKEND = os.environ.get("FORMSCOUT_JUDGE_BACKEND", "auto")
|
| 152 |
JUDGE_HF_MODEL = os.environ.get("FORMSCOUT_JUDGE_HF_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
|
| 153 |
ON_HF_SPACE = bool(os.environ.get("SPACE_ID"))
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
def resolve_judge_backend() -> str:
|
| 157 |
-
"""Resolve the effective judge backend from JUDGE_BACKEND + environment.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
if JUDGE_BACKEND in ("llama_cpp", "transformers"):
|
| 159 |
return JUDGE_BACKEND
|
| 160 |
-
return "transformers" if ON_HF_SPACE else "llama_cpp"
|
|
|
|
| 147 |
# βββ Judge backend selection ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 148 |
# "llama_cpp" β local llama-server (default for local dev; works perfectly)
|
| 149 |
# "transformers"β in-process Qwen3-VL via transformers, GPU on HF Spaces (ZeroGPU)
|
| 150 |
+
# "auto" β transformers ONLY on a GPU/ZeroGPU Space, else llama_cpp
|
| 151 |
JUDGE_BACKEND = os.environ.get("FORMSCOUT_JUDGE_BACKEND", "auto")
|
| 152 |
JUDGE_HF_MODEL = os.environ.get("FORMSCOUT_JUDGE_HF_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
|
| 153 |
ON_HF_SPACE = bool(os.environ.get("SPACE_ID"))
|
| 154 |
|
| 155 |
+
# Seconds the ZeroGPU window stays allocated per analysis. One window wraps the
|
| 156 |
+
# whole pipeline (pose, optional 3D, Qwen3-VL judge), so size it for the slowest
|
| 157 |
+
# clip; raise via env for long videos. Only effective on a ZeroGPU Space.
|
| 158 |
+
ZEROGPU_DURATION = int(os.environ.get("FORMSCOUT_ZEROGPU_DURATION", "120"))
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def has_gpu() -> bool:
|
| 162 |
+
"""True on a ZeroGPU Space (env flag) or when CUDA is actually present.
|
| 163 |
+
|
| 164 |
+
ZeroGPU exposes no CUDA outside @spaces.GPU, so it is detected via the
|
| 165 |
+
SPACES_ZERO_GPU env flag; ordinary GPU Spaces report via torch.cuda.
|
| 166 |
+
"""
|
| 167 |
+
if os.environ.get("SPACES_ZERO_GPU") or os.environ.get("ZERO_GPU"):
|
| 168 |
+
return True
|
| 169 |
+
try:
|
| 170 |
+
import torch
|
| 171 |
+
return bool(torch.cuda.is_available())
|
| 172 |
+
except Exception:
|
| 173 |
+
return False
|
| 174 |
+
|
| 175 |
|
| 176 |
def resolve_judge_backend() -> str:
|
| 177 |
+
"""Resolve the effective judge backend from JUDGE_BACKEND + environment.
|
| 178 |
+
|
| 179 |
+
`auto` only engages the heavy in-process transformers model when a GPU is
|
| 180 |
+
actually available β a CPU-only Space stays on llama_cpp (which is then
|
| 181 |
+
unreachable, so the Judge falls back to the fast rubric instead of trying to
|
| 182 |
+
run a 17 GB model on CPU).
|
| 183 |
+
"""
|
| 184 |
if JUDGE_BACKEND in ("llama_cpp", "transformers"):
|
| 185 |
return JUDGE_BACKEND
|
| 186 |
+
return "transformers" if (ON_HF_SPACE and has_gpu()) else "llama_cpp"
|
formscout/serving/transformers_vlm.py
CHANGED
|
@@ -39,19 +39,27 @@ except Exception: # pragma: no cover
|
|
| 39 |
return fn
|
| 40 |
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
"""Load (cached) and run Qwen3-VL; returns the raw decoded string."""
|
| 46 |
-
import torch
|
| 47 |
-
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 48 |
-
|
| 49 |
if "model" not in _CACHE:
|
|
|
|
|
|
|
| 50 |
_CACHE["processor"] = AutoProcessor.from_pretrained(model_id)
|
| 51 |
_CACHE["model"] = AutoModelForImageTextToText.from_pretrained(
|
| 52 |
-
model_id, torch_dtype=
|
| 53 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
processor, model = _CACHE["processor"], _CACHE["model"]
|
|
|
|
| 55 |
|
| 56 |
content = [{"type": "image", "image": im} for im in pil_images]
|
| 57 |
content.append({"type": "text", "text": prompt})
|
|
@@ -60,7 +68,7 @@ def _generate(model_id: str, prompt: str, pil_images: list, max_tokens: int,
|
|
| 60 |
inputs = processor.apply_chat_template(
|
| 61 |
messages, tokenize=True, add_generation_prompt=True,
|
| 62 |
return_tensors="pt", return_dict=True,
|
| 63 |
-
).to(
|
| 64 |
|
| 65 |
with torch.no_grad():
|
| 66 |
out = model.generate(
|
|
@@ -90,7 +98,8 @@ class TransformersVLMClient:
|
|
| 90 |
stop: list[str] | None = None) -> dict:
|
| 91 |
try:
|
| 92 |
pil_images = self._decode_images(images)
|
| 93 |
-
|
|
|
|
| 94 |
return LlamaCppClient._parse_json_reply(text)
|
| 95 |
except Exception as e: # pragma: no cover - needs GPU + model
|
| 96 |
logger.warning("transformers VLM failed (%s) β falling back to rubric", e)
|
|
|
|
| 39 |
return fn
|
| 40 |
|
| 41 |
|
| 42 |
+
def _ensure_loaded(model_id: str): # pragma: no cover - downloads ~16 GB
|
| 43 |
+
"""Load processor + model to CPU once (cached). Kept OUT of the GPU window so
|
| 44 |
+
the 17 GB download/load does not eat ZeroGPU time."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
if "model" not in _CACHE:
|
| 46 |
+
import torch
|
| 47 |
+
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 48 |
_CACHE["processor"] = AutoProcessor.from_pretrained(model_id)
|
| 49 |
_CACHE["model"] = AutoModelForImageTextToText.from_pretrained(
|
| 50 |
+
model_id, torch_dtype=torch.bfloat16,
|
| 51 |
)
|
| 52 |
+
return _CACHE["processor"], _CACHE["model"]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@_gpu
|
| 56 |
+
def _generate(prompt: str, pil_images: list, max_tokens: int,
|
| 57 |
+
temperature: float) -> str: # pragma: no cover - needs GPU + model
|
| 58 |
+
"""Move the cached model to CUDA and run Qwen3-VL (ZeroGPU window)."""
|
| 59 |
+
import torch
|
| 60 |
+
|
| 61 |
processor, model = _CACHE["processor"], _CACHE["model"]
|
| 62 |
+
model.to("cuda")
|
| 63 |
|
| 64 |
content = [{"type": "image", "image": im} for im in pil_images]
|
| 65 |
content.append({"type": "text", "text": prompt})
|
|
|
|
| 68 |
inputs = processor.apply_chat_template(
|
| 69 |
messages, tokenize=True, add_generation_prompt=True,
|
| 70 |
return_tensors="pt", return_dict=True,
|
| 71 |
+
).to("cuda")
|
| 72 |
|
| 73 |
with torch.no_grad():
|
| 74 |
out = model.generate(
|
|
|
|
| 98 |
stop: list[str] | None = None) -> dict:
|
| 99 |
try:
|
| 100 |
pil_images = self._decode_images(images)
|
| 101 |
+
_ensure_loaded(self.model_id) # CPU load (no GPU time)
|
| 102 |
+
text = _generate(prompt, pil_images, max_tokens, temperature)
|
| 103 |
return LlamaCppClient._parse_json_reply(text)
|
| 104 |
except Exception as e: # pragma: no cover - needs GPU + model
|
| 105 |
logger.warning("transformers VLM failed (%s) β falling back to rubric", e)
|
formscout/ui/theme.py
CHANGED
|
@@ -59,15 +59,20 @@ def formscout_theme() -> gr.Theme:
|
|
| 59 |
button_secondary_background_fill="rgba(156, 188, 173, 0.55)",
|
| 60 |
button_secondary_text_color=INK,
|
| 61 |
# Inputs
|
| 62 |
-
input_background_fill="rgba(255, 255, 255, 0.
|
| 63 |
-
input_background_fill_dark="rgba(255, 255, 255, 0.
|
|
|
|
| 64 |
input_border_color="rgba(43, 138, 138, 0.30)",
|
| 65 |
input_border_color_focus="rgba(43, 138, 138, 0.75)",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# Text
|
| 67 |
body_text_color=INK,
|
| 68 |
body_text_color_dark=INK,
|
| 69 |
block_title_text_color=TEAL_DEEP,
|
| 70 |
-
block_label_text_color=INK_MUTED,
|
| 71 |
# Spacing
|
| 72 |
block_padding="16px",
|
| 73 |
layout_gap="16px",
|
|
|
|
| 59 |
button_secondary_background_fill="rgba(156, 188, 173, 0.55)",
|
| 60 |
button_secondary_text_color=INK,
|
| 61 |
# Inputs
|
| 62 |
+
input_background_fill="rgba(255, 255, 255, 0.92)",
|
| 63 |
+
input_background_fill_dark="rgba(255, 255, 255, 0.92)",
|
| 64 |
+
input_background_fill_focus="rgba(255, 255, 255, 1.0)",
|
| 65 |
input_border_color="rgba(43, 138, 138, 0.30)",
|
| 66 |
input_border_color_focus="rgba(43, 138, 138, 0.75)",
|
| 67 |
+
# Labels β pin light in both modes so no dark dropdown header appears
|
| 68 |
+
block_label_background_fill="rgba(188, 211, 200, 0.55)",
|
| 69 |
+
block_label_background_fill_dark="rgba(188, 211, 200, 0.55)",
|
| 70 |
+
block_label_text_color=INK,
|
| 71 |
+
block_label_text_color_dark=INK,
|
| 72 |
# Text
|
| 73 |
body_text_color=INK,
|
| 74 |
body_text_color_dark=INK,
|
| 75 |
block_title_text_color=TEAL_DEEP,
|
|
|
|
| 76 |
# Spacing
|
| 77 |
block_padding="16px",
|
| 78 |
layout_gap="16px",
|
scripts/hf_upload.sh
CHANGED
|
@@ -21,8 +21,11 @@ set -euo pipefail
|
|
| 21 |
|
| 22 |
cd "$(dirname "$0")/.."
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
| 26 |
MSG="${1:-$(git log -1 --pretty=%s)}"
|
| 27 |
LARGE_THRESHOLD="${FORMSCOUT_HF_LARGE_THRESHOLD:-500}"
|
| 28 |
|
|
@@ -76,22 +79,62 @@ if (( N_FILES == 0 )); then
|
|
| 76 |
exit 1
|
| 77 |
fi
|
| 78 |
|
|
|
|
|
|
|
|
|
|
| 79 |
upload_repo() {
|
| 80 |
local repo="$1"
|
|
|
|
| 81 |
if (( N_FILES > LARGE_THRESHOLD )); then
|
| 82 |
echo "ββ $repo: $N_FILES files > $LARGE_THRESHOLD, using upload-large-folder"
|
| 83 |
echo " (resumable; commits directly to main β no PR, no custom message)"
|
| 84 |
hf upload-large-folder "$repo" . "${EXCLUDES[@]}"
|
|
|
|
|
|
|
|
|
|
| 85 |
else
|
| 86 |
-
echo "ββ uploading to: $repo"
|
| 87 |
-
hf upload "$repo" . .
|
| 88 |
-
"${EXCLUDES[@]}" \
|
| 89 |
-
--create-pr \
|
| 90 |
-
--commit-message="$MSG"
|
| 91 |
fi
|
| 92 |
}
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
cd "$(dirname "$0")/.."
|
| 23 |
|
| 24 |
+
REPO_NAME="small-functional-movement-screening"
|
| 25 |
+
BLADE_OWNER="${FORMSCOUT_HF_BLADE_OWNER:-BladeSzaSza}"
|
| 26 |
+
MODEL_REPO="silas-therapy/$REPO_NAME"
|
| 27 |
+
SPACE_REPO="spaces/silas-therapy/$REPO_NAME"
|
| 28 |
+
SPACE_BLADESZASZA_REPO="spaces/$BLADE_OWNER/$REPO_NAME"
|
| 29 |
MSG="${1:-$(git log -1 --pretty=%s)}"
|
| 30 |
LARGE_THRESHOLD="${FORMSCOUT_HF_LARGE_THRESHOLD:-500}"
|
| 31 |
|
|
|
|
| 79 |
exit 1
|
| 80 |
fi
|
| 81 |
|
| 82 |
+
# upload_repo <repo> [pr|direct]
|
| 83 |
+
# pr β open a PR (shared org repos; review before merge)
|
| 84 |
+
# direct β commit straight to main (repos you own; deploys immediately)
|
| 85 |
upload_repo() {
|
| 86 |
local repo="$1"
|
| 87 |
+
local mode="${2:-pr}"
|
| 88 |
if (( N_FILES > LARGE_THRESHOLD )); then
|
| 89 |
echo "ββ $repo: $N_FILES files > $LARGE_THRESHOLD, using upload-large-folder"
|
| 90 |
echo " (resumable; commits directly to main β no PR, no custom message)"
|
| 91 |
hf upload-large-folder "$repo" . "${EXCLUDES[@]}"
|
| 92 |
+
elif [[ "$mode" == "direct" ]]; then
|
| 93 |
+
echo "ββ uploading (direct β main) to: $repo"
|
| 94 |
+
hf upload "$repo" . . "${EXCLUDES[@]}" --commit-message="$MSG"
|
| 95 |
else
|
| 96 |
+
echo "ββ uploading (PR) to: $repo"
|
| 97 |
+
hf upload "$repo" . . "${EXCLUDES[@]}" --create-pr --commit-message="$MSG"
|
|
|
|
|
|
|
|
|
|
| 98 |
fi
|
| 99 |
}
|
| 100 |
|
| 101 |
+
# Ensure the personal ZeroGPU Space exists. Tries zero-a10g (needs Pro/ZeroGPU);
|
| 102 |
+
# falls back to cpu-basic so the upload still has a target (set ZeroGPU in
|
| 103 |
+
# Settings afterward). Idempotent via --exist-ok.
|
| 104 |
+
ensure_blade_space() {
|
| 105 |
+
local id="$BLADE_OWNER/$REPO_NAME"
|
| 106 |
+
if hf repos create "$id" --type space --space-sdk gradio --flavor zero-a10g --exist-ok 2>/dev/null; then
|
| 107 |
+
echo "ββ Space ready (ZeroGPU / zero-a10g): $id"; return 0
|
| 108 |
+
fi
|
| 109 |
+
if hf repos create "$id" --type space --space-sdk gradio --exist-ok 2>/dev/null; then
|
| 110 |
+
echo "ββ Space created cpu-basic (set ZeroGPU in Settings β Hardware): $id"; return 0
|
| 111 |
+
fi
|
| 112 |
+
return 1
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
blade_help() {
|
| 116 |
+
cat >&2 <<EOF
|
| 117 |
+
ββ β Could not create/deploy to $SPACE_BLADESZASZA_REPO
|
| 118 |
+
Your active HF token can push to silas-therapy but not create repos under
|
| 119 |
+
"$BLADE_OWNER". To deploy your own ZeroGPU Space:
|
| 120 |
+
1) In the HF UI create a Space: $BLADE_OWNER/$REPO_NAME
|
| 121 |
+
SDK = Gradio, Hardware = ZeroGPU (Nvidia A10G).
|
| 122 |
+
2) Re-auth with a token that can write there:
|
| 123 |
+
hf auth login (token with 'Write' role, or fine-grained with
|
| 124 |
+
write access to $BLADE_OWNER)
|
| 125 |
+
3) Re-run ./scripts/hf_upload.sh
|
| 126 |
+
EOF
|
| 127 |
+
}
|
| 128 |
|
| 129 |
+
# Shared org repos β PRs; personal ZeroGPU Space β created + direct deploy.
|
| 130 |
+
upload_repo "$MODEL_REPO" pr
|
| 131 |
+
upload_repo "$SPACE_REPO" pr
|
| 132 |
+
|
| 133 |
+
set +e
|
| 134 |
+
if ensure_blade_space; then
|
| 135 |
+
upload_repo "$SPACE_BLADESZASZA_REPO" direct || blade_help
|
| 136 |
+
else
|
| 137 |
+
blade_help
|
| 138 |
+
fi
|
| 139 |
+
set -e
|
| 140 |
+
echo "β done (silas-therapy PRs created; see any notes above for the personal Space)"
|
tests/test_judge_backend.py
CHANGED
|
@@ -21,9 +21,19 @@ def test_resolve_backend_default_local(monkeypatch):
|
|
| 21 |
assert cfg.resolve_judge_backend() == "llama_cpp"
|
| 22 |
|
| 23 |
|
| 24 |
-
def
|
| 25 |
-
cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
|
|
|
|
| 26 |
assert cfg.resolve_judge_backend() == "transformers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def test_resolve_backend_explicit(monkeypatch):
|
|
|
|
| 21 |
assert cfg.resolve_judge_backend() == "llama_cpp"
|
| 22 |
|
| 23 |
|
| 24 |
+
def test_resolve_backend_auto_on_zero_gpu_space(monkeypatch):
|
| 25 |
+
cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
|
| 26 |
+
SPACE_ID="me/space", SPACES_ZERO_GPU="true")
|
| 27 |
assert cfg.resolve_judge_backend() == "transformers"
|
| 28 |
+
importlib.reload(config)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_resolve_backend_auto_on_cpu_space_stays_llama(monkeypatch):
|
| 32 |
+
# A CPU-only Space must NOT load the 17 GB transformers model.
|
| 33 |
+
cfg = _reload_config(monkeypatch, FORMSCOUT_JUDGE_BACKEND="auto",
|
| 34 |
+
SPACE_ID="me/space", SPACES_ZERO_GPU=None, ZERO_GPU=None)
|
| 35 |
+
assert cfg.resolve_judge_backend() == "llama_cpp"
|
| 36 |
+
importlib.reload(config)
|
| 37 |
|
| 38 |
|
| 39 |
def test_resolve_backend_explicit(monkeypatch):
|