"""ZeroGPU integration helper. Hugging Face Spaces' ZeroGPU tier exposes a `spaces` Python package that provides `@spaces.GPU(duration=...)` — when applied to a function, the runtime allocates an H200 slot for that call and releases it when the call returns. The package is only importable inside a ZeroGPU Space; on local dev, CI, and any non-ZeroGPU Space, the decorator must be a no-op so the same code runs unchanged. Contract (pinned): - `is_zero_gpu_available()` → True only when `import spaces` succeeds AND `spaces.GPU` is callable. Used by `gpu_runtime.json` and `gpu-status` CLI to surface the integration state. - `gpu(duration=N)` → decorator. On a ZeroGPU Space it returns `spaces.GPU(duration=N)`. Anywhere else it returns a passthrough that leaves the function unchanged. - The integration is intentionally invisible to callers: every code path decorated here runs the exact same way locally as it does on a regular paid GPU Space (CPU-basic / L4 / etc.). ZeroGPU users get the slot allocation; everyone else gets the no-op. """ from __future__ import annotations from typing import Any, Callable, TypeVar F = TypeVar("F", bound=Callable[..., Any]) try: # pragma: no cover - exercised on HF ZeroGPU Spaces only import spaces as _spaces # type: ignore _SPACES_AVAILABLE = callable(getattr(_spaces, "GPU", None)) except ImportError: _spaces = None # type: ignore _SPACES_AVAILABLE = False def is_zero_gpu_available() -> bool: """True when the `spaces` SDK is importable and exposes `spaces.GPU`.""" return _SPACES_AVAILABLE def gpu(duration: int = 60) -> Callable[[F], F]: """Allocate a ZeroGPU slot for the decorated call (no-op off-Space). `duration` is the maximum slot time in seconds. ZeroGPU prioritises shorter durations, so pick the smallest value that comfortably fits your model's typical invocation. Defaults to 60s. """ if _SPACES_AVAILABLE: return _spaces.GPU(duration=duration) # type: ignore[no-any-return] def passthrough(fn: F) -> F: return fn return passthrough