zeroshotGPU / zsgdp /gpu /runtime.py
Arjunvir Singh
Add ZeroGPU integration
fa2127b
"""GPU runtime and Hugging Face Spaces status helpers."""
from __future__ import annotations
from dataclasses import dataclass, field
import os
from typing import Any
from zsgdp.gpu.model_server import GPUModelConfig
from zsgdp.gpu.zero_gpu import is_zero_gpu_available
from zsgdp.utils import to_plain_data
@dataclass(slots=True)
class GPURuntimeStatus:
provider: str
backend: str
space_name: str
gpu_models_target: str
running_on_huggingface_space: bool
space_id: str | None
hardware: str | None
device: str
torch_available: bool
torch_version: str | None = None
cuda_available: bool = False
cuda_device_count: int = 0
cuda_devices: list[str] = field(default_factory=list)
mps_available: bool = False
batch_pages: bool = True
max_batch_size: int = 4
max_gpu_seconds_per_doc: float = 120.0
max_vlm_calls_per_doc: int = 30
configured_models: dict[str, Any] = field(default_factory=dict)
zero_gpu_available: bool = False
notes: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return to_plain_data(self)
def collect_gpu_runtime_status(config: dict[str, Any]) -> GPURuntimeStatus:
gpu = config.get("gpu", {})
deployment = config.get("deployment", {})
model_config = GPUModelConfig.from_config(config)
torch_status = _torch_status()
running_on_space = bool(os.environ.get("SPACE_ID") or os.environ.get("SPACE_HOST"))
hardware = os.environ.get("SPACE_HARDWARE") or os.environ.get("HF_SPACE_HARDWARE")
device = _preferred_device(torch_status)
zero_gpu = is_zero_gpu_available()
notes: list[str] = []
if not running_on_space:
notes.append("Hugging Face Spaces environment variables were not detected; this looks like a local run.")
if device == "cpu":
notes.append("No CUDA or MPS accelerator was detected by PyTorch.")
elif device == "cuda":
notes.append("CUDA accelerator detected.")
elif device == "mps":
notes.append("Apple MPS accelerator detected.")
if model_config.provider == "huggingface_spaces" and not hardware:
notes.append("No Space hardware label was found; set hardware in the Space settings for GPU deployment.")
if zero_gpu:
notes.append("ZeroGPU SDK detected — H200 slots will be allocated per @spaces.GPU call.")
elif running_on_space and (hardware or "").lower().startswith("zero"):
notes.append("Hardware reports ZeroGPU but the `spaces` SDK was not importable; install via the Space's requirements.txt.")
return GPURuntimeStatus(
provider=model_config.provider,
backend=model_config.backend,
space_name=model_config.space_name,
gpu_models_target=str(deployment.get("gpu_models_target", model_config.space_name)),
running_on_huggingface_space=running_on_space,
space_id=os.environ.get("SPACE_ID"),
hardware=hardware,
device=device,
batch_pages=bool(gpu.get("batch_pages", True)),
max_batch_size=model_config.max_batch_size,
max_gpu_seconds_per_doc=float(gpu.get("max_gpu_seconds_per_doc", 120)),
max_vlm_calls_per_doc=int(gpu.get("max_vlm_calls_per_doc", 30)),
configured_models=dict(gpu.get("models", {})),
zero_gpu_available=zero_gpu,
notes=notes,
**torch_status,
)
def _torch_status() -> dict[str, Any]:
try:
import torch # type: ignore
except Exception:
return {
"torch_available": False,
"torch_version": None,
"cuda_available": False,
"cuda_device_count": 0,
"cuda_devices": [],
"mps_available": False,
}
cuda_available = bool(torch.cuda.is_available())
cuda_device_count = int(torch.cuda.device_count()) if cuda_available else 0
cuda_devices = [torch.cuda.get_device_name(index) for index in range(cuda_device_count)]
mps_available = bool(getattr(torch.backends, "mps", None) and torch.backends.mps.is_available())
return {
"torch_available": True,
"torch_version": getattr(torch, "__version__", None),
"cuda_available": cuda_available,
"cuda_device_count": cuda_device_count,
"cuda_devices": cuda_devices,
"mps_available": mps_available,
}
def _preferred_device(torch_status: dict[str, Any]) -> str:
if torch_status.get("cuda_available"):
return "cuda"
if torch_status.get("mps_available"):
return "mps"
return "cpu"