"""GPU runtime and Hugging Face Spaces status helpers.""" from __future__ import annotations from dataclasses import dataclass, field import os from typing import Any from zsgdp.gpu.model_server import GPUModelConfig from zsgdp.gpu.zero_gpu import is_zero_gpu_available from zsgdp.utils import to_plain_data @dataclass(slots=True) class GPURuntimeStatus: provider: str backend: str space_name: str gpu_models_target: str running_on_huggingface_space: bool space_id: str | None hardware: str | None device: str torch_available: bool torch_version: str | None = None cuda_available: bool = False cuda_device_count: int = 0 cuda_devices: list[str] = field(default_factory=list) mps_available: bool = False batch_pages: bool = True max_batch_size: int = 4 max_gpu_seconds_per_doc: float = 120.0 max_vlm_calls_per_doc: int = 30 configured_models: dict[str, Any] = field(default_factory=dict) zero_gpu_available: bool = False notes: list[str] = field(default_factory=list) def to_dict(self) -> dict[str, Any]: return to_plain_data(self) def collect_gpu_runtime_status(config: dict[str, Any]) -> GPURuntimeStatus: gpu = config.get("gpu", {}) deployment = config.get("deployment", {}) model_config = GPUModelConfig.from_config(config) torch_status = _torch_status() running_on_space = bool(os.environ.get("SPACE_ID") or os.environ.get("SPACE_HOST")) hardware = os.environ.get("SPACE_HARDWARE") or os.environ.get("HF_SPACE_HARDWARE") device = _preferred_device(torch_status) zero_gpu = is_zero_gpu_available() notes: list[str] = [] if not running_on_space: notes.append("Hugging Face Spaces environment variables were not detected; this looks like a local run.") if device == "cpu": notes.append("No CUDA or MPS accelerator was detected by PyTorch.") elif device == "cuda": notes.append("CUDA accelerator detected.") elif device == "mps": notes.append("Apple MPS accelerator detected.") if model_config.provider == "huggingface_spaces" and not hardware: notes.append("No Space hardware label was found; set hardware in the Space settings for GPU deployment.") if zero_gpu: notes.append("ZeroGPU SDK detected — H200 slots will be allocated per @spaces.GPU call.") elif running_on_space and (hardware or "").lower().startswith("zero"): notes.append("Hardware reports ZeroGPU but the `spaces` SDK was not importable; install via the Space's requirements.txt.") return GPURuntimeStatus( provider=model_config.provider, backend=model_config.backend, space_name=model_config.space_name, gpu_models_target=str(deployment.get("gpu_models_target", model_config.space_name)), running_on_huggingface_space=running_on_space, space_id=os.environ.get("SPACE_ID"), hardware=hardware, device=device, batch_pages=bool(gpu.get("batch_pages", True)), max_batch_size=model_config.max_batch_size, max_gpu_seconds_per_doc=float(gpu.get("max_gpu_seconds_per_doc", 120)), max_vlm_calls_per_doc=int(gpu.get("max_vlm_calls_per_doc", 30)), configured_models=dict(gpu.get("models", {})), zero_gpu_available=zero_gpu, notes=notes, **torch_status, ) def _torch_status() -> dict[str, Any]: try: import torch # type: ignore except Exception: return { "torch_available": False, "torch_version": None, "cuda_available": False, "cuda_device_count": 0, "cuda_devices": [], "mps_available": False, } cuda_available = bool(torch.cuda.is_available()) cuda_device_count = int(torch.cuda.device_count()) if cuda_available else 0 cuda_devices = [torch.cuda.get_device_name(index) for index in range(cuda_device_count)] mps_available = bool(getattr(torch.backends, "mps", None) and torch.backends.mps.is_available()) return { "torch_available": True, "torch_version": getattr(torch, "__version__", None), "cuda_available": cuda_available, "cuda_device_count": cuda_device_count, "cuda_devices": cuda_devices, "mps_available": mps_available, } def _preferred_device(torch_status: dict[str, Any]) -> str: if torch_status.get("cuda_available"): return "cuda" if torch_status.get("mps_available"): return "mps" return "cpu"