Spaces:
Running on Zero
Running on Zero
| """ZeroGPU integration helper. | |
| Hugging Face Spaces' ZeroGPU tier exposes a `spaces` Python package that | |
| provides `@spaces.GPU(duration=...)` — when applied to a function, the | |
| runtime allocates an H200 slot for that call and releases it when the | |
| call returns. The package is only importable inside a ZeroGPU Space; on | |
| local dev, CI, and any non-ZeroGPU Space, the decorator must be a no-op | |
| so the same code runs unchanged. | |
| Contract (pinned): | |
| - `is_zero_gpu_available()` → True only when `import spaces` succeeds AND | |
| `spaces.GPU` is callable. Used by `gpu_runtime.json` and `gpu-status` | |
| CLI to surface the integration state. | |
| - `gpu(duration=N)` → decorator. On a ZeroGPU Space it returns | |
| `spaces.GPU(duration=N)`. Anywhere else it returns a passthrough that | |
| leaves the function unchanged. | |
| - The integration is intentionally invisible to callers: every code path | |
| decorated here runs the exact same way locally as it does on a regular | |
| paid GPU Space (CPU-basic / L4 / etc.). ZeroGPU users get the slot | |
| allocation; everyone else gets the no-op. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Callable, TypeVar | |
| F = TypeVar("F", bound=Callable[..., Any]) | |
| try: # pragma: no cover - exercised on HF ZeroGPU Spaces only | |
| import spaces as _spaces # type: ignore | |
| _SPACES_AVAILABLE = callable(getattr(_spaces, "GPU", None)) | |
| except ImportError: | |
| _spaces = None # type: ignore | |
| _SPACES_AVAILABLE = False | |
| def is_zero_gpu_available() -> bool: | |
| """True when the `spaces` SDK is importable and exposes `spaces.GPU`.""" | |
| return _SPACES_AVAILABLE | |
| def gpu(duration: int = 60) -> Callable[[F], F]: | |
| """Allocate a ZeroGPU slot for the decorated call (no-op off-Space). | |
| `duration` is the maximum slot time in seconds. ZeroGPU prioritises | |
| shorter durations, so pick the smallest value that comfortably fits | |
| your model's typical invocation. Defaults to 60s. | |
| """ | |
| if _SPACES_AVAILABLE: | |
| return _spaces.GPU(duration=duration) # type: ignore[no-any-return] | |
| def passthrough(fn: F) -> F: | |
| return fn | |
| return passthrough | |