Spaces:

arjun10g
/

zeroshotGPU

Running on Zero

Arjunvir Singh commited on 19 days ago

Commit

31ea79c

1 Parent(s): 4e3af73

smoke_embedding: drop eager probe (incompatible with ZeroGPU CUDA emulation)

ZeroGPU runs the main process in CUDA-emulation mode that intercepts
operations only inside @spaces.GPU-decorated functions. Calling
SentenceTransformer(...) in the main process — as the smoke probe did
to give early load diagnostics — triggers 'Low-level CUDA init reached'
and skips the smoke before the actual benchmark runs.

The benchmark below already routes through _gpu_encode_batch (decorated,
runs in worker), so the right thing is to let any model-load failure
surface through that path as an status.

Test updated: assert benchmark exceptions yield status=error rather
than the old skip-with-install-hint path.

Files changed (2) hide show

scripts/run_space_smoke.py +6 -18
tests/test_space_smoke.py +10 -17

scripts/run_space_smoke.py CHANGED Viewed

@@ -201,28 +201,16 @@ def smoke_embedding() -> SmokeResult:
     import os
-    from zsgdp.benchmarks.embedding_retriever import EmbeddingRetriever
     from zsgdp.benchmarks.parser_quality import run_parser_benchmark
     override_model_id = os.environ.get("ZSGDP_SMOKE_EMBEDDING_MODEL_ID") or None
-    # Try to load the configured embedding model. If the load fails (no HF
-    # token, download error, OOM at import time), we report it as a skip
-    # with the exception text so the operator sees what to fix without the
-    # whole smoke run blowing up.
-    try:
-        retriever = EmbeddingRetriever(model_id=override_model_id) if override_model_id else EmbeddingRetriever()
-        retriever._ensure_embedder()  # type: ignore[attr-defined]  # private but intentional
-    except Exception as exc:
-        return SmokeResult(
-            name="embedding",
-            status="skip",
-            elapsed_seconds=time.perf_counter() - started,
-            skip_reason=f"embedding model failed to load: {exc}",
-            install_hint="Set HF_TOKEN if the model is gated, OR set "
-                        "ZSGDP_SMOKE_EMBEDDING_MODEL_ID=sentence-transformers/all-MiniLM-L6-v2 "
-                        "to use a smaller compat-friendly model.",
-        )
     with tempfile.TemporaryDirectory() as tmp:
         tmp_path = Path(tmp)

     import os
     from zsgdp.benchmarks.parser_quality import run_parser_benchmark
     override_model_id = os.environ.get("ZSGDP_SMOKE_EMBEDDING_MODEL_ID") or None
+    # NOTE: do not eagerly instantiate SentenceTransformer in the main
+    # process. On ZeroGPU Spaces, the main process runs in CUDA-emulation
+    # mode and any CUDA call outside a @spaces.GPU-decorated function raises
+    # "Low-level CUDA init reached". The actual benchmark below routes
+    # through _gpu_encode_batch (decorated), which is the supported path.
+    # Errors inside the benchmark surface through the `error` status branch.
     with tempfile.TemporaryDirectory() as tmp:
         tmp_path = Path(tmp)

tests/test_space_smoke.py CHANGED Viewed

@@ -138,27 +138,20 @@ class RunSmokesIntegrationTests(unittest.TestCase):
         self.assertIn("sentence-transformers", result.skip_reason)
         self.assertIn("pip install", result.install_hint)
-    def test_embedding_smoke_install_hint_mentions_model_override(self):
-        # When the model fails to load (e.g. jina-v3 transformers compat),
-        # the install_hint must point at the env-var override path so the
-        # operator can immediately switch to a compat-friendly model.
-        # Patch where EmbeddingRetriever is *defined*, not where it's imported,
-        # because smoke_embedding does a function-local lazy import.
-        from unittest.mock import MagicMock
-        retriever_mock = MagicMock()
-        retriever_mock.return_value._ensure_embedder.side_effect = RuntimeError("synthetic load failure")
         with patch("scripts.run_space_smoke.importlib.util.find_spec") as find_spec, patch(
-            "zsgdp.benchmarks.embedding_retriever.EmbeddingRetriever", retriever_mock
         ):
-            find_spec.return_value = object()  # spec found, dep present
             result = smoke_embedding()
-        self.assertEqual(result.status, "skip")
-        self.assertIn("synthetic load failure", result.skip_reason)
-        self.assertIn("ZSGDP_SMOKE_EMBEDDING_MODEL_ID", result.install_hint)
-        self.assertIn("all-MiniLM-L6-v2", result.install_hint)
     def test_marker_smoke_skips_when_binary_missing(self):
         with patch("scripts.run_space_smoke.shutil.which", return_value=None):

         self.assertIn("sentence-transformers", result.skip_reason)
         self.assertIn("pip install", result.install_hint)
+    def test_embedding_smoke_routes_failures_through_error_status(self):
+        # We removed the eager EmbeddingRetriever probe (it triggered ZeroGPU's
+        # CUDA-init guard in the main process). Failures now surface through
+        # run_parser_benchmark — exception caught in the smoke wrapper and
+        # turned into an `error` status result.
         with patch("scripts.run_space_smoke.importlib.util.find_spec") as find_spec, patch(
+            "zsgdp.benchmarks.parser_quality.run_parser_benchmark",
+            side_effect=RuntimeError("synthetic benchmark failure"),
         ):
+            find_spec.return_value = object()
             result = smoke_embedding()
+        self.assertEqual(result.status, "error")
+        self.assertIn("synthetic benchmark failure", result.detail.get("exception", ""))
     def test_marker_smoke_skips_when_binary_missing(self):
         with patch("scripts.run_space_smoke.shutil.which", return_value=None):