Spaces:
Running on Zero
Running on Zero
Arjunvir Singh commited on
Commit ·
31ea79c
1
Parent(s): 4e3af73
smoke_embedding: drop eager probe (incompatible with ZeroGPU CUDA emulation)
Browse filesZeroGPU runs the main process in CUDA-emulation mode that intercepts
operations only inside @spaces.GPU-decorated functions. Calling
SentenceTransformer(...) in the main process — as the smoke probe did
to give early load diagnostics — triggers 'Low-level CUDA init reached'
and skips the smoke before the actual benchmark runs.
The benchmark below already routes through _gpu_encode_batch (decorated,
runs in worker), so the right thing is to let any model-load failure
surface through that path as an status.
Test updated: assert benchmark exceptions yield status=error rather
than the old skip-with-install-hint path.
- scripts/run_space_smoke.py +6 -18
- tests/test_space_smoke.py +10 -17
scripts/run_space_smoke.py
CHANGED
|
@@ -201,28 +201,16 @@ def smoke_embedding() -> SmokeResult:
|
|
| 201 |
|
| 202 |
import os
|
| 203 |
|
| 204 |
-
from zsgdp.benchmarks.embedding_retriever import EmbeddingRetriever
|
| 205 |
from zsgdp.benchmarks.parser_quality import run_parser_benchmark
|
| 206 |
|
| 207 |
override_model_id = os.environ.get("ZSGDP_SMOKE_EMBEDDING_MODEL_ID") or None
|
| 208 |
|
| 209 |
-
#
|
| 210 |
-
#
|
| 211 |
-
#
|
| 212 |
-
#
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
retriever._ensure_embedder() # type: ignore[attr-defined] # private but intentional
|
| 216 |
-
except Exception as exc:
|
| 217 |
-
return SmokeResult(
|
| 218 |
-
name="embedding",
|
| 219 |
-
status="skip",
|
| 220 |
-
elapsed_seconds=time.perf_counter() - started,
|
| 221 |
-
skip_reason=f"embedding model failed to load: {exc}",
|
| 222 |
-
install_hint="Set HF_TOKEN if the model is gated, OR set "
|
| 223 |
-
"ZSGDP_SMOKE_EMBEDDING_MODEL_ID=sentence-transformers/all-MiniLM-L6-v2 "
|
| 224 |
-
"to use a smaller compat-friendly model.",
|
| 225 |
-
)
|
| 226 |
|
| 227 |
with tempfile.TemporaryDirectory() as tmp:
|
| 228 |
tmp_path = Path(tmp)
|
|
|
|
| 201 |
|
| 202 |
import os
|
| 203 |
|
|
|
|
| 204 |
from zsgdp.benchmarks.parser_quality import run_parser_benchmark
|
| 205 |
|
| 206 |
override_model_id = os.environ.get("ZSGDP_SMOKE_EMBEDDING_MODEL_ID") or None
|
| 207 |
|
| 208 |
+
# NOTE: do not eagerly instantiate SentenceTransformer in the main
|
| 209 |
+
# process. On ZeroGPU Spaces, the main process runs in CUDA-emulation
|
| 210 |
+
# mode and any CUDA call outside a @spaces.GPU-decorated function raises
|
| 211 |
+
# "Low-level CUDA init reached". The actual benchmark below routes
|
| 212 |
+
# through _gpu_encode_batch (decorated), which is the supported path.
|
| 213 |
+
# Errors inside the benchmark surface through the `error` status branch.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
with tempfile.TemporaryDirectory() as tmp:
|
| 216 |
tmp_path = Path(tmp)
|
tests/test_space_smoke.py
CHANGED
|
@@ -138,27 +138,20 @@ class RunSmokesIntegrationTests(unittest.TestCase):
|
|
| 138 |
self.assertIn("sentence-transformers", result.skip_reason)
|
| 139 |
self.assertIn("pip install", result.install_hint)
|
| 140 |
|
| 141 |
-
def
|
| 142 |
-
#
|
| 143 |
-
#
|
| 144 |
-
#
|
| 145 |
-
#
|
| 146 |
-
# because smoke_embedding does a function-local lazy import.
|
| 147 |
-
from unittest.mock import MagicMock
|
| 148 |
-
|
| 149 |
-
retriever_mock = MagicMock()
|
| 150 |
-
retriever_mock.return_value._ensure_embedder.side_effect = RuntimeError("synthetic load failure")
|
| 151 |
-
|
| 152 |
with patch("scripts.run_space_smoke.importlib.util.find_spec") as find_spec, patch(
|
| 153 |
-
"zsgdp.benchmarks.
|
|
|
|
| 154 |
):
|
| 155 |
-
find_spec.return_value = object()
|
| 156 |
result = smoke_embedding()
|
| 157 |
|
| 158 |
-
self.assertEqual(result.status, "
|
| 159 |
-
self.assertIn("synthetic
|
| 160 |
-
self.assertIn("ZSGDP_SMOKE_EMBEDDING_MODEL_ID", result.install_hint)
|
| 161 |
-
self.assertIn("all-MiniLM-L6-v2", result.install_hint)
|
| 162 |
|
| 163 |
def test_marker_smoke_skips_when_binary_missing(self):
|
| 164 |
with patch("scripts.run_space_smoke.shutil.which", return_value=None):
|
|
|
|
| 138 |
self.assertIn("sentence-transformers", result.skip_reason)
|
| 139 |
self.assertIn("pip install", result.install_hint)
|
| 140 |
|
| 141 |
+
def test_embedding_smoke_routes_failures_through_error_status(self):
|
| 142 |
+
# We removed the eager EmbeddingRetriever probe (it triggered ZeroGPU's
|
| 143 |
+
# CUDA-init guard in the main process). Failures now surface through
|
| 144 |
+
# run_parser_benchmark — exception caught in the smoke wrapper and
|
| 145 |
+
# turned into an `error` status result.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
with patch("scripts.run_space_smoke.importlib.util.find_spec") as find_spec, patch(
|
| 147 |
+
"zsgdp.benchmarks.parser_quality.run_parser_benchmark",
|
| 148 |
+
side_effect=RuntimeError("synthetic benchmark failure"),
|
| 149 |
):
|
| 150 |
+
find_spec.return_value = object()
|
| 151 |
result = smoke_embedding()
|
| 152 |
|
| 153 |
+
self.assertEqual(result.status, "error")
|
| 154 |
+
self.assertIn("synthetic benchmark failure", result.detail.get("exception", ""))
|
|
|
|
|
|
|
| 155 |
|
| 156 |
def test_marker_smoke_skips_when_binary_missing(self):
|
| 157 |
with patch("scripts.run_space_smoke.shutil.which", return_value=None):
|