| """ |
| Backend detection and resolution utilities. |
| |
| Provides consistent backend selection and CUDA availability checking |
| across all applications. |
| |
| Availability checks use importlib.find_spec() for instant package detection |
| without importing heavy libraries. Actual imports happen lazily when the |
| backend is first used. |
| """ |
|
|
| import importlib.util |
| from typing import Tuple, Optional |
| from shared.utils.logging_config import get_logger |
|
|
| logger = get_logger(__name__) |
|
|
| |
|
|
| |
| |
|
|
| HAS_CUML_PACKAGE: bool = importlib.util.find_spec("cuml") is not None |
| HAS_CUPY_PACKAGE: bool = importlib.util.find_spec("cupy") is not None |
| HAS_TORCH_PACKAGE: bool = importlib.util.find_spec("torch") is not None |
|
|
| |
|
|
| |
| _cuda_check_cache: Optional[Tuple[bool, str]] = None |
|
|
|
|
| def check_cuda_available() -> Tuple[bool, str]: |
| """ |
| Check if CUDA is available for GPU-accelerated backends. |
| |
| Returns: |
| Tuple of (is_available, device_info_string) |
| """ |
| global _cuda_check_cache |
|
|
| if _cuda_check_cache is not None: |
| return _cuda_check_cache |
|
|
| |
| if HAS_TORCH_PACKAGE: |
| try: |
| import torch |
| if torch.cuda.is_available(): |
| device_name = torch.cuda.get_device_name(0) |
| _cuda_check_cache = (True, device_name) |
| logger.info(f"CUDA available via PyTorch: {device_name}") |
| return _cuda_check_cache |
| except ImportError: |
| pass |
|
|
| |
| if HAS_CUPY_PACKAGE: |
| try: |
| import cupy as cp |
| if cp.cuda.is_available(): |
| device = cp.cuda.Device(0) |
| device_info = f"GPU {device.id}" |
| _cuda_check_cache = (True, device_info) |
| logger.info(f"CUDA available via CuPy: {device_info}") |
| return _cuda_check_cache |
| except ImportError: |
| pass |
|
|
| _cuda_check_cache = (False, "CPU only") |
| logger.info("CUDA not available, using CPU") |
| return _cuda_check_cache |
|
|
|
|
| def check_cuml_available() -> bool: |
| """Check if cuML is available (actual import, for runtime use).""" |
| if not HAS_CUML_PACKAGE: |
| return False |
| try: |
| import cuml |
| return True |
| except ImportError: |
| return False |
|
|
|
|
| def resolve_backend(backend: str, operation: str = "general") -> str: |
| """ |
| Resolve 'auto' backend to actual backend based on available hardware. |
| |
| Args: |
| backend: Requested backend ("auto", "sklearn", "cuml") |
| operation: Operation type for logging ("clustering", "reduction", "general") |
| |
| Returns: |
| Resolved backend name. CPU paths always go through sklearn. |
| """ |
| if backend != "auto": |
| logger.debug(f"Using explicitly requested backend: {backend}") |
| return backend |
|
|
| cuda_available, device_info = check_cuda_available() |
| |
| if cuda_available and check_cuml_available(): |
| resolved = "cuml" |
| logger.info(f"Auto-resolved {operation} backend to cuML (GPU: {device_info})") |
| else: |
| resolved = "sklearn" |
| logger.info(f"Auto-resolved {operation} backend to sklearn (CPU)") |
|
|
| return resolved |
|
|
|
|
| def get_backend_info() -> dict: |
| """ |
| Get comprehensive backend availability information. |
| |
| Returns: |
| Dictionary with backend availability status |
| """ |
| cuda_available, device_info = check_cuda_available() |
|
|
| return { |
| "cuda_available": cuda_available, |
| "device_info": device_info, |
| "cuml_available": check_cuml_available(), |
| } |
|
|
|
|
| def is_gpu_error(error: Exception) -> bool: |
| """ |
| Check if an exception is a GPU-related error. |
| |
| Args: |
| error: Exception to check |
| |
| Returns: |
| True if error is GPU-related |
| """ |
| error_msg = str(error).lower() |
| gpu_indicators = [ |
| "out of memory", |
| "oom", |
| "cuda", |
| "gpu", |
| "nvrtc", |
| "libnvrtc", |
| "no kernel image", |
| "cudaerror", |
| ] |
| return any(indicator in error_msg for indicator in gpu_indicators) |
|
|
|
|
| def is_oom_error(error: Exception) -> bool: |
| """Check if an exception is an out-of-memory error.""" |
| error_msg = str(error).lower() |
| oom_indicators = [ |
| "out of memory", |
| "cudaerroroutofmemory", |
| "oom", |
| "memory allocation failed", |
| "cudamalloc failed", |
| "failed to allocate", |
| ] |
| return any(indicator in error_msg for indicator in oom_indicators) |
|
|
|
|
| def is_cuda_arch_error(error: Exception) -> bool: |
| """Check if an exception is a CUDA architecture incompatibility error.""" |
| error_msg = str(error).lower() |
| arch_indicators = [ |
| "no kernel image", |
| "cudaerrornokernel", |
| "unsupported gpu", |
| "compute capability", |
| ] |
| return any(indicator in error_msg for indicator in arch_indicators) |
|
|