Spaces:

arjun10g
/

zeroshotGPU

Running on Zero

File size: 4,542 Bytes

"""GPU runtime and Hugging Face Spaces status helpers."""

from __future__ import annotations

from dataclasses import dataclass, field
import os
from typing import Any

from zsgdp.gpu.model_server import GPUModelConfig
from zsgdp.gpu.zero_gpu import is_zero_gpu_available
from zsgdp.utils import to_plain_data


@dataclass(slots=True)
class GPURuntimeStatus:
    provider: str
    backend: str
    space_name: str
    gpu_models_target: str
    running_on_huggingface_space: bool
    space_id: str | None
    hardware: str | None
    device: str
    torch_available: bool
    torch_version: str | None = None
    cuda_available: bool = False
    cuda_device_count: int = 0
    cuda_devices: list[str] = field(default_factory=list)
    mps_available: bool = False
    batch_pages: bool = True
    max_batch_size: int = 4
    max_gpu_seconds_per_doc: float = 120.0
    max_vlm_calls_per_doc: int = 30
    configured_models: dict[str, Any] = field(default_factory=dict)
    zero_gpu_available: bool = False
    notes: list[str] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
        return to_plain_data(self)


def collect_gpu_runtime_status(config: dict[str, Any]) -> GPURuntimeStatus:
    gpu = config.get("gpu", {})
    deployment = config.get("deployment", {})
    model_config = GPUModelConfig.from_config(config)
    torch_status = _torch_status()
    running_on_space = bool(os.environ.get("SPACE_ID") or os.environ.get("SPACE_HOST"))
    hardware = os.environ.get("SPACE_HARDWARE") or os.environ.get("HF_SPACE_HARDWARE")
    device = _preferred_device(torch_status)

    zero_gpu = is_zero_gpu_available()
    notes: list[str] = []
    if not running_on_space:
        notes.append("Hugging Face Spaces environment variables were not detected; this looks like a local run.")
    if device == "cpu":
        notes.append("No CUDA or MPS accelerator was detected by PyTorch.")
    elif device == "cuda":
        notes.append("CUDA accelerator detected.")
    elif device == "mps":
        notes.append("Apple MPS accelerator detected.")
    if model_config.provider == "huggingface_spaces" and not hardware:
        notes.append("No Space hardware label was found; set hardware in the Space settings for GPU deployment.")
    if zero_gpu:
        notes.append("ZeroGPU SDK detected — H200 slots will be allocated per @spaces.GPU call.")
    elif running_on_space and (hardware or "").lower().startswith("zero"):
        notes.append("Hardware reports ZeroGPU but the `spaces` SDK was not importable; install via the Space's requirements.txt.")

    return GPURuntimeStatus(
        provider=model_config.provider,
        backend=model_config.backend,
        space_name=model_config.space_name,
        gpu_models_target=str(deployment.get("gpu_models_target", model_config.space_name)),
        running_on_huggingface_space=running_on_space,
        space_id=os.environ.get("SPACE_ID"),
        hardware=hardware,
        device=device,
        batch_pages=bool(gpu.get("batch_pages", True)),
        max_batch_size=model_config.max_batch_size,
        max_gpu_seconds_per_doc=float(gpu.get("max_gpu_seconds_per_doc", 120)),
        max_vlm_calls_per_doc=int(gpu.get("max_vlm_calls_per_doc", 30)),
        configured_models=dict(gpu.get("models", {})),
        zero_gpu_available=zero_gpu,
        notes=notes,
        **torch_status,
    )


def _torch_status() -> dict[str, Any]:
    try:
        import torch  # type: ignore
    except Exception:
        return {
            "torch_available": False,
            "torch_version": None,
            "cuda_available": False,
            "cuda_device_count": 0,
            "cuda_devices": [],
            "mps_available": False,
        }

    cuda_available = bool(torch.cuda.is_available())
    cuda_device_count = int(torch.cuda.device_count()) if cuda_available else 0
    cuda_devices = [torch.cuda.get_device_name(index) for index in range(cuda_device_count)]
    mps_available = bool(getattr(torch.backends, "mps", None) and torch.backends.mps.is_available())
    return {
        "torch_available": True,
        "torch_version": getattr(torch, "__version__", None),
        "cuda_available": cuda_available,
        "cuda_device_count": cuda_device_count,
        "cuda_devices": cuda_devices,
        "mps_available": mps_available,
    }


def _preferred_device(torch_status: dict[str, Any]) -> str:
    if torch_status.get("cuda_available"):
        return "cuda"
    if torch_status.get("mps_available"):
        return "mps"
    return "cpu"