"""GPU device management for inspection endpoints. Provides round-robin device selection across available GPUs, matching the multi-GPU pattern used in inference.py. """ import threading import torch import logging logger = logging.getLogger(__name__) _gpu_counter = 0 _gpu_lock = threading.Lock() def get_num_gpus() -> int: """Return the number of available CUDA GPUs.""" return torch.cuda.device_count() if torch.cuda.is_available() else 0 def next_device() -> str: """Return the next GPU device string via round-robin, or 'cpu' if no GPUs.""" global _gpu_counter num_gpus = get_num_gpus() if num_gpus == 0: return "cpu" with _gpu_lock: device = f"cuda:{_gpu_counter % num_gpus}" _gpu_counter += 1 return device