File size: 778 Bytes
88a545a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""GPU device management for inspection endpoints.

Provides round-robin device selection across available GPUs,
matching the multi-GPU pattern used in inference.py.
"""

import threading
import torch
import logging

logger = logging.getLogger(__name__)

_gpu_counter = 0
_gpu_lock = threading.Lock()


def get_num_gpus() -> int:
    """Return the number of available CUDA GPUs."""
    return torch.cuda.device_count() if torch.cuda.is_available() else 0


def next_device() -> str:
    """Return the next GPU device string via round-robin, or 'cpu' if no GPUs."""
    global _gpu_counter
    num_gpus = get_num_gpus()
    if num_gpus == 0:
        return "cpu"

    with _gpu_lock:
        device = f"cuda:{_gpu_counter % num_gpus}"
        _gpu_counter += 1
    return device