Spaces:
Running on Zero
Running on Zero
| """GPU runtime and Hugging Face Spaces status helpers.""" | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| import os | |
| from typing import Any | |
| from zsgdp.gpu.model_server import GPUModelConfig | |
| from zsgdp.gpu.zero_gpu import is_zero_gpu_available | |
| from zsgdp.utils import to_plain_data | |
| class GPURuntimeStatus: | |
| provider: str | |
| backend: str | |
| space_name: str | |
| gpu_models_target: str | |
| running_on_huggingface_space: bool | |
| space_id: str | None | |
| hardware: str | None | |
| device: str | |
| torch_available: bool | |
| torch_version: str | None = None | |
| cuda_available: bool = False | |
| cuda_device_count: int = 0 | |
| cuda_devices: list[str] = field(default_factory=list) | |
| mps_available: bool = False | |
| batch_pages: bool = True | |
| max_batch_size: int = 4 | |
| max_gpu_seconds_per_doc: float = 120.0 | |
| max_vlm_calls_per_doc: int = 30 | |
| configured_models: dict[str, Any] = field(default_factory=dict) | |
| zero_gpu_available: bool = False | |
| notes: list[str] = field(default_factory=list) | |
| def to_dict(self) -> dict[str, Any]: | |
| return to_plain_data(self) | |
| def collect_gpu_runtime_status(config: dict[str, Any]) -> GPURuntimeStatus: | |
| gpu = config.get("gpu", {}) | |
| deployment = config.get("deployment", {}) | |
| model_config = GPUModelConfig.from_config(config) | |
| torch_status = _torch_status() | |
| running_on_space = bool(os.environ.get("SPACE_ID") or os.environ.get("SPACE_HOST")) | |
| hardware = os.environ.get("SPACE_HARDWARE") or os.environ.get("HF_SPACE_HARDWARE") | |
| device = _preferred_device(torch_status) | |
| zero_gpu = is_zero_gpu_available() | |
| notes: list[str] = [] | |
| if not running_on_space: | |
| notes.append("Hugging Face Spaces environment variables were not detected; this looks like a local run.") | |
| if device == "cpu": | |
| notes.append("No CUDA or MPS accelerator was detected by PyTorch.") | |
| elif device == "cuda": | |
| notes.append("CUDA accelerator detected.") | |
| elif device == "mps": | |
| notes.append("Apple MPS accelerator detected.") | |
| if model_config.provider == "huggingface_spaces" and not hardware: | |
| notes.append("No Space hardware label was found; set hardware in the Space settings for GPU deployment.") | |
| if zero_gpu: | |
| notes.append("ZeroGPU SDK detected — H200 slots will be allocated per @spaces.GPU call.") | |
| elif running_on_space and (hardware or "").lower().startswith("zero"): | |
| notes.append("Hardware reports ZeroGPU but the `spaces` SDK was not importable; install via the Space's requirements.txt.") | |
| return GPURuntimeStatus( | |
| provider=model_config.provider, | |
| backend=model_config.backend, | |
| space_name=model_config.space_name, | |
| gpu_models_target=str(deployment.get("gpu_models_target", model_config.space_name)), | |
| running_on_huggingface_space=running_on_space, | |
| space_id=os.environ.get("SPACE_ID"), | |
| hardware=hardware, | |
| device=device, | |
| batch_pages=bool(gpu.get("batch_pages", True)), | |
| max_batch_size=model_config.max_batch_size, | |
| max_gpu_seconds_per_doc=float(gpu.get("max_gpu_seconds_per_doc", 120)), | |
| max_vlm_calls_per_doc=int(gpu.get("max_vlm_calls_per_doc", 30)), | |
| configured_models=dict(gpu.get("models", {})), | |
| zero_gpu_available=zero_gpu, | |
| notes=notes, | |
| **torch_status, | |
| ) | |
| def _torch_status() -> dict[str, Any]: | |
| try: | |
| import torch # type: ignore | |
| except Exception: | |
| return { | |
| "torch_available": False, | |
| "torch_version": None, | |
| "cuda_available": False, | |
| "cuda_device_count": 0, | |
| "cuda_devices": [], | |
| "mps_available": False, | |
| } | |
| cuda_available = bool(torch.cuda.is_available()) | |
| cuda_device_count = int(torch.cuda.device_count()) if cuda_available else 0 | |
| cuda_devices = [torch.cuda.get_device_name(index) for index in range(cuda_device_count)] | |
| mps_available = bool(getattr(torch.backends, "mps", None) and torch.backends.mps.is_available()) | |
| return { | |
| "torch_available": True, | |
| "torch_version": getattr(torch, "__version__", None), | |
| "cuda_available": cuda_available, | |
| "cuda_device_count": cuda_device_count, | |
| "cuda_devices": cuda_devices, | |
| "mps_available": mps_available, | |
| } | |
| def _preferred_device(torch_status: dict[str, Any]) -> str: | |
| if torch_status.get("cuda_available"): | |
| return "cuda" | |
| if torch_status.get("mps_available"): | |
| return "mps" | |
| return "cpu" | |