"""GPU device management for inspection endpoints.

Provides round-robin device selection across available GPUs,
matching the multi-GPU pattern used in inference.py.
"""

import threading
import torch
import logging

logger = logging.getLogger(__name__)

_gpu_counter = 0
_gpu_lock = threading.Lock()


def get_num_gpus() -> int:
    """Return the number of available CUDA GPUs."""
    return torch.cuda.device_count() if torch.cuda.is_available() else 0


def next_device() -> str:
    """Return the next GPU device string via round-robin, or 'cpu' if no GPUs."""
    global _gpu_counter
    num_gpus = get_num_gpus()
    if num_gpus == 0:
        return "cpu"

    with _gpu_lock:
        device = f"cuda:{_gpu_counter % num_gpus}"
        _gpu_counter += 1
    return device