hardware: zero-gpu
Browse files- README.md +1 -0
- requirements.txt +1 -0
- src/loader.py +23 -10
README.md
CHANGED
|
@@ -6,6 +6,7 @@ colorTo: indigo
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "5.25.0"
|
| 8 |
python_version: "3.12"
|
|
|
|
| 9 |
pinned: true
|
| 10 |
license: apache-2.0
|
| 11 |
tags:
|
|
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "5.25.0"
|
| 8 |
python_version: "3.12"
|
| 9 |
+
hardware: zero-gpu
|
| 10 |
pinned: true
|
| 11 |
license: apache-2.0
|
| 12 |
tags:
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# ── PyTorch (CUDA build for HF GPU Spaces) ────────────────────────────────
|
| 2 |
--extra-index-url https://download.pytorch.org/whl/cu124
|
| 3 |
torch
|
|
|
|
| 4 |
|
| 5 |
# ── ML / HuggingFace ───────────────────────────────────────────────────────
|
| 6 |
transformers>=4.50.0
|
|
|
|
| 1 |
# ── PyTorch (CUDA build for HF GPU Spaces) ────────────────────────────────
|
| 2 |
--extra-index-url https://download.pytorch.org/whl/cu124
|
| 3 |
torch
|
| 4 |
+
spaces
|
| 5 |
|
| 6 |
# ── ML / HuggingFace ───────────────────────────────────────────────────────
|
| 7 |
transformers>=4.50.0
|
src/loader.py
CHANGED
|
@@ -172,15 +172,20 @@ def get_text_model(
|
|
| 172 |
def _is_zerogpu_error(e: Exception) -> bool:
|
| 173 |
"""Return True for errors that indicate ZeroGPU failed to allocate / init a GPU.
|
| 174 |
|
| 175 |
-
The spaces package
|
| 176 |
-
|
|
|
|
| 177 |
"""
|
| 178 |
import traceback as _tb
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
msg = str(e)
|
| 180 |
if "No CUDA GPUs are available" in msg or "CUDA" in msg:
|
| 181 |
return True
|
| 182 |
-
# spaces re-wraps: RuntimeError('RuntimeError')
|
| 183 |
-
if
|
| 184 |
return True
|
| 185 |
# Inspect traceback for ZeroGPU stack frames
|
| 186 |
full_tb = "".join(_tb.format_exception(type(e), e, e.__traceback__))
|
|
@@ -260,10 +265,14 @@ def run_inference(
|
|
| 260 |
logger.info(f"Running inference with {model_name}, max_tokens={max_new_tokens}, temp={temperature}")
|
| 261 |
try:
|
| 262 |
return _run_inference_gpu(prompt, model_name, max_new_tokens, temperature, **kwargs)
|
| 263 |
-
except
|
| 264 |
if _is_zerogpu_error(e):
|
| 265 |
-
logger.warning("ZeroGPU unavailable (%s) — retrying on CPU", e)
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
logger.error(f"Inference failed for {model_name}: {e}", exc_info=True)
|
| 268 |
raise
|
| 269 |
|
|
@@ -285,10 +294,14 @@ def run_inference_with_image(
|
|
| 285 |
logger.info(f"Running vision inference with {model_name}, max_tokens={max_new_tokens}")
|
| 286 |
try:
|
| 287 |
return _run_inference_with_image_gpu(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
|
| 288 |
-
except
|
| 289 |
if _is_zerogpu_error(e):
|
| 290 |
-
logger.warning("ZeroGPU unavailable (%s) — retrying vision inference on CPU", e)
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
logger.error(f"Vision inference failed for {model_name}: {e}", exc_info=True)
|
| 293 |
raise
|
| 294 |
|
|
|
|
| 172 |
def _is_zerogpu_error(e: Exception) -> bool:
|
| 173 |
"""Return True for errors that indicate ZeroGPU failed to allocate / init a GPU.
|
| 174 |
|
| 175 |
+
The spaces package raises ZeroGPUException (not RuntimeError) in newer versions,
|
| 176 |
+
and re-wraps the original CUDA RuntimeError as RuntimeError('RuntimeError') in
|
| 177 |
+
older versions, so we check for multiple patterns.
|
| 178 |
"""
|
| 179 |
import traceback as _tb
|
| 180 |
+
# Check exception class name — spaces raises ZeroGPUException in newer versions
|
| 181 |
+
cls_name = type(e).__name__
|
| 182 |
+
if "ZeroGPU" in cls_name or "GPU" in cls_name:
|
| 183 |
+
return True
|
| 184 |
msg = str(e)
|
| 185 |
if "No CUDA GPUs are available" in msg or "CUDA" in msg:
|
| 186 |
return True
|
| 187 |
+
# spaces re-wraps with the type name: RuntimeError("'RuntimeError'") or RuntimeError("RuntimeError")
|
| 188 |
+
if "RuntimeError" in msg:
|
| 189 |
return True
|
| 190 |
# Inspect traceback for ZeroGPU stack frames
|
| 191 |
full_tb = "".join(_tb.format_exception(type(e), e, e.__traceback__))
|
|
|
|
| 265 |
logger.info(f"Running inference with {model_name}, max_tokens={max_new_tokens}, temp={temperature}")
|
| 266 |
try:
|
| 267 |
return _run_inference_gpu(prompt, model_name, max_new_tokens, temperature, **kwargs)
|
| 268 |
+
except Exception as e:
|
| 269 |
if _is_zerogpu_error(e):
|
| 270 |
+
logger.warning("ZeroGPU unavailable (%s: %s) — retrying on CPU", type(e).__name__, e)
|
| 271 |
+
try:
|
| 272 |
+
return _inference_core(prompt, model_name, max_new_tokens, temperature, **kwargs)
|
| 273 |
+
except Exception as cpu_err:
|
| 274 |
+
logger.error(f"CPU fallback also failed for {model_name}: {cpu_err}", exc_info=True)
|
| 275 |
+
raise
|
| 276 |
logger.error(f"Inference failed for {model_name}: {e}", exc_info=True)
|
| 277 |
raise
|
| 278 |
|
|
|
|
| 294 |
logger.info(f"Running vision inference with {model_name}, max_tokens={max_new_tokens}")
|
| 295 |
try:
|
| 296 |
return _run_inference_with_image_gpu(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
|
| 297 |
+
except Exception as e:
|
| 298 |
if _is_zerogpu_error(e):
|
| 299 |
+
logger.warning("ZeroGPU unavailable (%s: %s) — retrying vision inference on CPU", type(e).__name__, e)
|
| 300 |
+
try:
|
| 301 |
+
return _inference_with_image_core(prompt, image, model_name, max_new_tokens, temperature, **kwargs)
|
| 302 |
+
except Exception as cpu_err:
|
| 303 |
+
logger.error(f"CPU vision fallback also failed for {model_name}: {cpu_err}", exc_info=True)
|
| 304 |
+
raise
|
| 305 |
logger.error(f"Vision inference failed for {model_name}: {e}", exc_info=True)
|
| 306 |
raise
|
| 307 |
|