ghitaben commited on
Commit
6efd8e4
·
1 Parent(s): 244eb20

hardware: zero-gpu

Browse files
Files changed (3) hide show
  1. README.md +1 -0
  2. requirements.txt +1 -0
  3. src/loader.py +23 -10
README.md CHANGED
@@ -6,6 +6,7 @@ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: "5.25.0"
8
  python_version: "3.12"
 
9
  pinned: true
10
  license: apache-2.0
11
  tags:
 
6
  sdk: gradio
7
  sdk_version: "5.25.0"
8
  python_version: "3.12"
9
+ hardware: zero-gpu
10
  pinned: true
11
  license: apache-2.0
12
  tags:
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  # ── PyTorch (CUDA build for HF GPU Spaces) ────────────────────────────────
2
  --extra-index-url https://download.pytorch.org/whl/cu124
3
  torch
 
4
 
5
  # ── ML / HuggingFace ───────────────────────────────────────────────────────
6
  transformers>=4.50.0
 
1
  # ── PyTorch (CUDA build for HF GPU Spaces) ────────────────────────────────
2
  --extra-index-url https://download.pytorch.org/whl/cu124
3
  torch
4
+ spaces
5
 
6
  # ── ML / HuggingFace ───────────────────────────────────────────────────────
7
  transformers>=4.50.0
src/loader.py CHANGED
@@ -172,15 +172,20 @@ def get_text_model(
172
  def _is_zerogpu_error(e: Exception) -> bool:
173
  """Return True for errors that indicate ZeroGPU failed to allocate / init a GPU.
174
 
175
- The spaces package re-wraps the original CUDA RuntimeError as
176
- RuntimeError('RuntimeError'), so we check for that pattern too.
 
177
  """
178
  import traceback as _tb
 
 
 
 
179
  msg = str(e)
180
  if "No CUDA GPUs are available" in msg or "CUDA" in msg:
181
  return True
182
- # spaces re-wraps: RuntimeError('RuntimeError')
183
- if msg == "RuntimeError":
184
  return True
185
  # Inspect traceback for ZeroGPU stack frames
186
  full_tb = "".join(_tb.format_exception(type(e), e, e.__traceback__))
@@ -260,10 +265,14 @@ def run_inference(
260
  logger.info(f"Running inference with {model_name}, max_tokens={max_new_tokens}, temp={temperature}")
261
  try:
262
  return _run_inference_gpu(prompt, model_name, max_new_tokens, temperature, **kwargs)
263
- except RuntimeError as e:
264
  if _is_zerogpu_error(e):
265
- logger.warning("ZeroGPU unavailable (%s) — retrying on CPU", e)
266
- return _inference_core(prompt, model_name, max_new_tokens, temperature, **kwargs)
 
 
 
 
267
  logger.error(f"Inference failed for {model_name}: {e}", exc_info=True)
268
  raise
269
 
@@ -285,10 +294,14 @@ def run_inference_with_image(
285
  logger.info(f"Running vision inference with {model_name}, max_tokens={max_new_tokens}")
286
  try:
287
  return _run_inference_with_image_gpu(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
288
- except RuntimeError as e:
289
  if _is_zerogpu_error(e):
290
- logger.warning("ZeroGPU unavailable (%s) — retrying vision inference on CPU", e)
291
- return _inference_with_image_core(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
 
 
 
 
292
  logger.error(f"Vision inference failed for {model_name}: {e}", exc_info=True)
293
  raise
294
 
 
172
  def _is_zerogpu_error(e: Exception) -> bool:
173
  """Return True for errors that indicate ZeroGPU failed to allocate / init a GPU.
174
 
175
+ The spaces package raises ZeroGPUException (not RuntimeError) in newer versions,
176
+ and re-wraps the original CUDA RuntimeError as RuntimeError('RuntimeError') in
177
+ older versions, so we check for multiple patterns.
178
  """
179
  import traceback as _tb
180
+ # Check exception class name — spaces raises ZeroGPUException in newer versions
181
+ cls_name = type(e).__name__
182
+ if "ZeroGPU" in cls_name or "GPU" in cls_name:
183
+ return True
184
  msg = str(e)
185
  if "No CUDA GPUs are available" in msg or "CUDA" in msg:
186
  return True
187
+ # spaces re-wraps with the type name: RuntimeError("'RuntimeError'") or RuntimeError("RuntimeError")
188
+ if "RuntimeError" in msg:
189
  return True
190
  # Inspect traceback for ZeroGPU stack frames
191
  full_tb = "".join(_tb.format_exception(type(e), e, e.__traceback__))
 
265
  logger.info(f"Running inference with {model_name}, max_tokens={max_new_tokens}, temp={temperature}")
266
  try:
267
  return _run_inference_gpu(prompt, model_name, max_new_tokens, temperature, **kwargs)
268
+ except Exception as e:
269
  if _is_zerogpu_error(e):
270
+ logger.warning("ZeroGPU unavailable (%s: %s) — retrying on CPU", type(e).__name__, e)
271
+ try:
272
+ return _inference_core(prompt, model_name, max_new_tokens, temperature, **kwargs)
273
+ except Exception as cpu_err:
274
+ logger.error(f"CPU fallback also failed for {model_name}: {cpu_err}", exc_info=True)
275
+ raise
276
  logger.error(f"Inference failed for {model_name}: {e}", exc_info=True)
277
  raise
278
 
 
294
  logger.info(f"Running vision inference with {model_name}, max_tokens={max_new_tokens}")
295
  try:
296
  return _run_inference_with_image_gpu(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
297
+ except Exception as e:
298
  if _is_zerogpu_error(e):
299
+ logger.warning("ZeroGPU unavailable (%s: %s) — retrying vision inference on CPU", type(e).__name__, e)
300
+ try:
301
+ return _inference_with_image_core(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
302
+ except Exception as cpu_err:
303
+ logger.error(f"CPU vision fallback also failed for {model_name}: {cpu_err}", exc_info=True)
304
+ raise
305
  logger.error(f"Vision inference failed for {model_name}: {e}", exc_info=True)
306
  raise
307