File size: 2,106 Bytes
fa2127b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""ZeroGPU integration helper.

Hugging Face Spaces' ZeroGPU tier exposes a `spaces` Python package that
provides `@spaces.GPU(duration=...)` — when applied to a function, the
runtime allocates an H200 slot for that call and releases it when the
call returns. The package is only importable inside a ZeroGPU Space; on
local dev, CI, and any non-ZeroGPU Space, the decorator must be a no-op
so the same code runs unchanged.

Contract (pinned):

- `is_zero_gpu_available()` → True only when `import spaces` succeeds AND
  `spaces.GPU` is callable. Used by `gpu_runtime.json` and `gpu-status`
  CLI to surface the integration state.
- `gpu(duration=N)` → decorator. On a ZeroGPU Space it returns
  `spaces.GPU(duration=N)`. Anywhere else it returns a passthrough that
  leaves the function unchanged.
- The integration is intentionally invisible to callers: every code path
  decorated here runs the exact same way locally as it does on a regular
  paid GPU Space (CPU-basic / L4 / etc.). ZeroGPU users get the slot
  allocation; everyone else gets the no-op.
"""

from __future__ import annotations

from typing import Any, Callable, TypeVar

F = TypeVar("F", bound=Callable[..., Any])

try:  # pragma: no cover - exercised on HF ZeroGPU Spaces only
    import spaces as _spaces  # type: ignore

    _SPACES_AVAILABLE = callable(getattr(_spaces, "GPU", None))
except ImportError:
    _spaces = None  # type: ignore
    _SPACES_AVAILABLE = False


def is_zero_gpu_available() -> bool:
    """True when the `spaces` SDK is importable and exposes `spaces.GPU`."""

    return _SPACES_AVAILABLE


def gpu(duration: int = 60) -> Callable[[F], F]:
    """Allocate a ZeroGPU slot for the decorated call (no-op off-Space).

    `duration` is the maximum slot time in seconds. ZeroGPU prioritises
    shorter durations, so pick the smallest value that comfortably fits
    your model's typical invocation. Defaults to 60s.
    """

    if _SPACES_AVAILABLE:
        return _spaces.GPU(duration=duration)  # type: ignore[no-any-return]

    def passthrough(fn: F) -> F:
        return fn

    return passthrough