Arjunvir Singh commited on
Commit
31ea79c
·
1 Parent(s): 4e3af73

smoke_embedding: drop eager probe (incompatible with ZeroGPU CUDA emulation)

Browse files

ZeroGPU runs the main process in CUDA-emulation mode that intercepts
operations only inside @spaces.GPU-decorated functions. Calling
SentenceTransformer(...) in the main process — as the smoke probe did
to give early load diagnostics — triggers 'Low-level CUDA init reached'
and skips the smoke before the actual benchmark runs.

The benchmark below already routes through _gpu_encode_batch (decorated,
runs in worker), so the right thing is to let any model-load failure
surface through that path as an status.

Test updated: assert benchmark exceptions yield status=error rather
than the old skip-with-install-hint path.

scripts/run_space_smoke.py CHANGED
@@ -201,28 +201,16 @@ def smoke_embedding() -> SmokeResult:
201
 
202
  import os
203
 
204
- from zsgdp.benchmarks.embedding_retriever import EmbeddingRetriever
205
  from zsgdp.benchmarks.parser_quality import run_parser_benchmark
206
 
207
  override_model_id = os.environ.get("ZSGDP_SMOKE_EMBEDDING_MODEL_ID") or None
208
 
209
- # Try to load the configured embedding model. If the load fails (no HF
210
- # token, download error, OOM at import time), we report it as a skip
211
- # with the exception text so the operator sees what to fix without the
212
- # whole smoke run blowing up.
213
- try:
214
- retriever = EmbeddingRetriever(model_id=override_model_id) if override_model_id else EmbeddingRetriever()
215
- retriever._ensure_embedder() # type: ignore[attr-defined] # private but intentional
216
- except Exception as exc:
217
- return SmokeResult(
218
- name="embedding",
219
- status="skip",
220
- elapsed_seconds=time.perf_counter() - started,
221
- skip_reason=f"embedding model failed to load: {exc}",
222
- install_hint="Set HF_TOKEN if the model is gated, OR set "
223
- "ZSGDP_SMOKE_EMBEDDING_MODEL_ID=sentence-transformers/all-MiniLM-L6-v2 "
224
- "to use a smaller compat-friendly model.",
225
- )
226
 
227
  with tempfile.TemporaryDirectory() as tmp:
228
  tmp_path = Path(tmp)
 
201
 
202
  import os
203
 
 
204
  from zsgdp.benchmarks.parser_quality import run_parser_benchmark
205
 
206
  override_model_id = os.environ.get("ZSGDP_SMOKE_EMBEDDING_MODEL_ID") or None
207
 
208
+ # NOTE: do not eagerly instantiate SentenceTransformer in the main
209
+ # process. On ZeroGPU Spaces, the main process runs in CUDA-emulation
210
+ # mode and any CUDA call outside a @spaces.GPU-decorated function raises
211
+ # "Low-level CUDA init reached". The actual benchmark below routes
212
+ # through _gpu_encode_batch (decorated), which is the supported path.
213
+ # Errors inside the benchmark surface through the `error` status branch.
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  with tempfile.TemporaryDirectory() as tmp:
216
  tmp_path = Path(tmp)
tests/test_space_smoke.py CHANGED
@@ -138,27 +138,20 @@ class RunSmokesIntegrationTests(unittest.TestCase):
138
  self.assertIn("sentence-transformers", result.skip_reason)
139
  self.assertIn("pip install", result.install_hint)
140
 
141
- def test_embedding_smoke_install_hint_mentions_model_override(self):
142
- # When the model fails to load (e.g. jina-v3 transformers compat),
143
- # the install_hint must point at the env-var override path so the
144
- # operator can immediately switch to a compat-friendly model.
145
- # Patch where EmbeddingRetriever is *defined*, not where it's imported,
146
- # because smoke_embedding does a function-local lazy import.
147
- from unittest.mock import MagicMock
148
-
149
- retriever_mock = MagicMock()
150
- retriever_mock.return_value._ensure_embedder.side_effect = RuntimeError("synthetic load failure")
151
-
152
  with patch("scripts.run_space_smoke.importlib.util.find_spec") as find_spec, patch(
153
- "zsgdp.benchmarks.embedding_retriever.EmbeddingRetriever", retriever_mock
 
154
  ):
155
- find_spec.return_value = object() # spec found, dep present
156
  result = smoke_embedding()
157
 
158
- self.assertEqual(result.status, "skip")
159
- self.assertIn("synthetic load failure", result.skip_reason)
160
- self.assertIn("ZSGDP_SMOKE_EMBEDDING_MODEL_ID", result.install_hint)
161
- self.assertIn("all-MiniLM-L6-v2", result.install_hint)
162
 
163
  def test_marker_smoke_skips_when_binary_missing(self):
164
  with patch("scripts.run_space_smoke.shutil.which", return_value=None):
 
138
  self.assertIn("sentence-transformers", result.skip_reason)
139
  self.assertIn("pip install", result.install_hint)
140
 
141
+ def test_embedding_smoke_routes_failures_through_error_status(self):
142
+ # We removed the eager EmbeddingRetriever probe (it triggered ZeroGPU's
143
+ # CUDA-init guard in the main process). Failures now surface through
144
+ # run_parser_benchmark exception caught in the smoke wrapper and
145
+ # turned into an `error` status result.
 
 
 
 
 
 
146
  with patch("scripts.run_space_smoke.importlib.util.find_spec") as find_spec, patch(
147
+ "zsgdp.benchmarks.parser_quality.run_parser_benchmark",
148
+ side_effect=RuntimeError("synthetic benchmark failure"),
149
  ):
150
+ find_spec.return_value = object()
151
  result = smoke_embedding()
152
 
153
+ self.assertEqual(result.status, "error")
154
+ self.assertIn("synthetic benchmark failure", result.detail.get("exception", ""))
 
 
155
 
156
  def test_marker_smoke_skips_when_binary_missing(self):
157
  with patch("scripts.run_space_smoke.shutil.which", return_value=None):