| | """ |
| | Device and Hardware Management Module |
| | ==================================== |
| | |
| | Handles device detection, CUDA compatibility, memory management, |
| | and threading configuration for BackgroundFX Pro. |
| | |
| | Fixes: |
| | - CUDA multiprocessor_count compatibility error |
| | - OpenMP threading issues (OMP_NUM_THREADS) |
| | - GPU memory optimization |
| | - Automatic device selection |
| | |
| | Author: BackgroundFX Pro Team |
| | License: MIT |
| | """ |
| |
|
| | import os |
| | import logging |
| | import warnings |
| | from typing import Dict, Optional, List, Tuple |
| | import platform |
| | import psutil |
| |
|
| | |
| | os.environ.setdefault('OMP_NUM_THREADS', '4') |
| | os.environ.setdefault('MKL_NUM_THREADS', '4') |
| | os.environ.setdefault('NUMEXPR_NUM_THREADS', '4') |
| |
|
| | try: |
| | import torch |
| | TORCH_AVAILABLE = True |
| | except ImportError: |
| | TORCH_AVAILABLE = False |
| | warnings.warn("PyTorch not available - using CPU-only processing") |
| |
|
| | try: |
| | import cv2 |
| | OPENCV_AVAILABLE = True |
| | except ImportError: |
| | OPENCV_AVAILABLE = False |
| | warnings.warn("OpenCV not available") |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | class DeviceManager: |
| | """Manages device detection, selection and optimization""" |
| | |
| | def __init__(self): |
| | self.device = None |
| | self.device_info = {} |
| | self.cuda_available = False |
| | self.gpu_count = 0 |
| | self.memory_info = {} |
| | self.threading_configured = False |
| | |
| | def initialize(self) -> bool: |
| | """Initialize device manager and configure optimal settings""" |
| | try: |
| | logger.info("🔧 Initializing Device Manager...") |
| | |
| | |
| | self._configure_threading() |
| | |
| | |
| | self._detect_devices() |
| | |
| | |
| | if self.cuda_available: |
| | self._configure_cuda() |
| | |
| | |
| | self.device = self._select_optimal_device() |
| | |
| | |
| | self._log_system_info() |
| | |
| | logger.info(f"✅ Device Manager initialized - Using: {self.device}") |
| | return True |
| | |
| | except Exception as e: |
| | logger.error(f"❌ Device Manager initialization failed: {e}") |
| | self.device = 'cpu' |
| | return False |
| | |
| | def _configure_threading(self): |
| | """Configure threading for optimal performance""" |
| | try: |
| | |
| | if 'OMP_NUM_THREADS' not in os.environ: |
| | os.environ['OMP_NUM_THREADS'] = '4' |
| | |
| | |
| | if 'MKL_NUM_THREADS' not in os.environ: |
| | os.environ['MKL_NUM_THREADS'] = '4' |
| | |
| | |
| | if 'NUMEXPR_NUM_THREADS' not in os.environ: |
| | os.environ['NUMEXPR_NUM_THREADS'] = '4' |
| | |
| | |
| | if TORCH_AVAILABLE: |
| | torch.set_num_threads(4) |
| | torch.set_num_interop_threads(4) |
| | |
| | |
| | if OPENCV_AVAILABLE: |
| | cv2.setNumThreads(4) |
| | |
| | self.threading_configured = True |
| | logger.info(f"✅ Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}") |
| | |
| | except Exception as e: |
| | logger.warning(f"⚠️ Threading configuration warning: {e}") |
| | |
| | def _detect_devices(self): |
| | """Detect available computing devices""" |
| | try: |
| | if not TORCH_AVAILABLE: |
| | self.cuda_available = False |
| | self.gpu_count = 0 |
| | return |
| | |
| | |
| | self.cuda_available = torch.cuda.is_available() |
| | self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0 |
| | |
| | if self.cuda_available: |
| | logger.info(f"✅ CUDA available: {self.gpu_count} GPU(s)") |
| | |
| | |
| | for i in range(self.gpu_count): |
| | try: |
| | props = self._get_cuda_properties_safe(i) |
| | self.device_info[f'cuda:{i}'] = props |
| | logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)") |
| | except Exception as e: |
| | logger.warning(f" GPU {i}: Properties unavailable ({e})") |
| | else: |
| | logger.info("ℹ️ CUDA not available - using CPU") |
| | |
| | except Exception as e: |
| | logger.error(f"❌ Device detection failed: {e}") |
| | self.cuda_available = False |
| | self.gpu_count = 0 |
| | |
| | def _get_cuda_properties_safe(self, device_id: int) -> Dict: |
| | """Safely get CUDA device properties with compatibility handling""" |
| | try: |
| | if not TORCH_AVAILABLE or not torch.cuda.is_available(): |
| | return {} |
| | |
| | props = torch.cuda.get_device_properties(device_id) |
| | |
| | |
| | if hasattr(props, 'multi_processor_count'): |
| | sm_count = props.multi_processor_count |
| | elif hasattr(props, 'multiprocessor_count'): |
| | sm_count = props.multiprocessor_count |
| | else: |
| | |
| | try: |
| | major, minor = torch.cuda.get_device_capability(device_id) |
| | |
| | sm_count = major * 8 if major >= 6 else major * 4 |
| | except: |
| | sm_count = 'Unknown' |
| | |
| | device_props = { |
| | 'name': props.name, |
| | 'memory_gb': props.total_memory / (1024**3), |
| | 'memory_bytes': props.total_memory, |
| | 'multiprocessor_count': sm_count, |
| | 'major': props.major, |
| | 'minor': props.minor, |
| | 'compute_capability': f"{props.major}.{props.minor}" |
| | } |
| | |
| | return device_props |
| | |
| | except Exception as e: |
| | logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}") |
| | return { |
| | 'name': 'Unknown GPU', |
| | 'memory_gb': 0.0, |
| | 'memory_bytes': 0, |
| | 'multiprocessor_count': 'Unknown', |
| | 'error': str(e) |
| | } |
| | |
| | def _configure_cuda(self): |
| | """Configure CUDA for optimal performance""" |
| | try: |
| | if not self.cuda_available or not TORCH_AVAILABLE: |
| | return |
| | |
| | |
| | torch.backends.cudnn.benchmark = True |
| | torch.backends.cudnn.deterministic = False |
| | |
| | |
| | torch.cuda.empty_cache() |
| | |
| | |
| | try: |
| | |
| | from torch.cuda.amp import autocast |
| | logger.info("✅ Mixed precision available") |
| | except ImportError: |
| | logger.info("ℹ️ Mixed precision not available") |
| | |
| | logger.info("✅ CUDA optimization configured") |
| | |
| | except Exception as e: |
| | logger.warning(f"⚠️ CUDA configuration warning: {e}") |
| | |
| | def _select_optimal_device(self) -> str: |
| | """Select the optimal device for processing""" |
| | try: |
| | if not TORCH_AVAILABLE: |
| | return 'cpu' |
| | |
| | if not self.cuda_available or self.gpu_count == 0: |
| | return 'cpu' |
| | |
| | |
| | best_device = 'cuda:0' |
| | best_memory = 0 |
| | |
| | for device_name, props in self.device_info.items(): |
| | if device_name.startswith('cuda:'): |
| | memory = props.get('memory_gb', 0) |
| | if memory > best_memory: |
| | best_memory = memory |
| | best_device = device_name |
| | |
| | |
| | if best_memory < 2.0: |
| | logger.warning(f"⚠️ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU") |
| | return 'cpu' |
| | |
| | return best_device |
| | |
| | except Exception as e: |
| | logger.error(f"❌ Device selection failed: {e}") |
| | return 'cpu' |
| | |
| | def _log_system_info(self): |
| | """Log comprehensive system information""" |
| | try: |
| | |
| | logger.info(f"📊 System: {platform.system()} {platform.release()}") |
| | logger.info(f"💾 CPU: {platform.processor()}") |
| | logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB") |
| | |
| | |
| | logger.info(f"🐍 Python: {platform.python_version()}") |
| | |
| | if TORCH_AVAILABLE: |
| | logger.info(f"🔥 PyTorch: {torch.__version__}") |
| | if torch.cuda.is_available(): |
| | logger.info(f"⚡ CUDA: {torch.version.cuda}") |
| | |
| | if OPENCV_AVAILABLE: |
| | logger.info(f"📷 OpenCV: {cv2.__version__}") |
| | |
| | except Exception as e: |
| | logger.warning(f"⚠️ System info logging failed: {e}") |
| | |
| | def get_device(self) -> str: |
| | """Get the selected device""" |
| | return self.device or 'cpu' |
| | |
| | def get_device_info(self) -> Dict: |
| | """Get device information""" |
| | return { |
| | 'device': self.device, |
| | 'cuda_available': self.cuda_available, |
| | 'gpu_count': self.gpu_count, |
| | 'device_info': self.device_info, |
| | 'threading_configured': self.threading_configured |
| | } |
| | |
| | def get_memory_usage(self) -> Dict: |
| | """Get current memory usage""" |
| | memory_info = { |
| | 'system_memory_gb': psutil.virtual_memory().total / (1024**3), |
| | 'system_memory_used_gb': psutil.virtual_memory().used / (1024**3), |
| | 'system_memory_percent': psutil.virtual_memory().percent |
| | } |
| | |
| | if self.cuda_available and TORCH_AVAILABLE: |
| | try: |
| | for i in range(self.gpu_count): |
| | allocated = torch.cuda.memory_allocated(i) / (1024**3) |
| | reserved = torch.cuda.memory_reserved(i) / (1024**3) |
| | total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0) |
| | |
| | memory_info[f'gpu_{i}_allocated_gb'] = allocated |
| | memory_info[f'gpu_{i}_reserved_gb'] = reserved |
| | memory_info[f'gpu_{i}_total_gb'] = total |
| | memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100 |
| | |
| | except Exception as e: |
| | logger.warning(f"⚠️ GPU memory info failed: {e}") |
| | |
| | return memory_info |
| | |
| | def optimize_for_model(self, model_name: str) -> Dict: |
| | """Optimize device settings for specific model""" |
| | optimizations = { |
| | 'device': self.device, |
| | 'mixed_precision': False, |
| | 'gradient_checkpointing': False, |
| | 'batch_size': 1 |
| | } |
| | |
| | try: |
| | |
| | if model_name.lower() == 'sam2': |
| | if self.cuda_available and self._get_gpu_memory_gb() >= 8: |
| | optimizations.update({ |
| | 'mixed_precision': True, |
| | 'batch_size': 2 |
| | }) |
| | |
| | elif model_name.lower() == 'matanyone': |
| | if self.cuda_available and self._get_gpu_memory_gb() >= 6: |
| | optimizations.update({ |
| | 'mixed_precision': True |
| | }) |
| | |
| | logger.info(f"⚙️ Optimizations for {model_name}: {optimizations}") |
| | |
| | except Exception as e: |
| | logger.warning(f"⚠️ Model optimization failed: {e}") |
| | |
| | return optimizations |
| | |
| | def _get_gpu_memory_gb(self) -> float: |
| | """Get GPU memory in GB""" |
| | if not self.cuda_available or not self.device_info: |
| | return 0.0 |
| | |
| | device_key = self.device if self.device in self.device_info else 'cuda:0' |
| | return self.device_info.get(device_key, {}).get('memory_gb', 0.0) |
| | |
| | def cleanup(self): |
| | """Cleanup device resources""" |
| | try: |
| | if self.cuda_available and TORCH_AVAILABLE: |
| | torch.cuda.empty_cache() |
| | logger.info("✅ GPU cache cleared") |
| | except Exception as e: |
| | logger.warning(f"⚠️ Cleanup warning: {e}") |
| |
|
| | |
| | _device_manager = None |
| |
|
| | def get_device_manager() -> DeviceManager: |
| | """Get the global device manager instance""" |
| | global _device_manager |
| | if _device_manager is None: |
| | _device_manager = DeviceManager() |
| | _device_manager.initialize() |
| | return _device_manager |
| |
|
| | def get_optimal_device() -> str: |
| | """Get the optimal device for processing""" |
| | return get_device_manager().get_device() |
| |
|
| | def fix_cuda_compatibility(): |
| | """Fix CUDA compatibility issues""" |
| | try: |
| | dm = get_device_manager() |
| | logger.info("✅ CUDA compatibility checked and fixed") |
| | return dm.get_device_info() |
| | except Exception as e: |
| | logger.error(f"❌ CUDA compatibility fix failed: {e}") |
| | return {'device': 'cpu', 'error': str(e)} |
| |
|
| | def setup_optimal_threading(): |
| | """Setup optimal threading configuration""" |
| | try: |
| | dm = get_device_manager() |
| | if dm.threading_configured: |
| | logger.info("✅ Threading already configured optimally") |
| | else: |
| | dm._configure_threading() |
| | return True |
| | except Exception as e: |
| | logger.error(f"❌ Threading setup failed: {e}") |
| | return False |
| |
|
| | def get_system_diagnostics() -> Dict: |
| | """Get comprehensive system diagnostics""" |
| | dm = get_device_manager() |
| | return { |
| | 'device_info': dm.get_device_info(), |
| | 'memory_usage': dm.get_memory_usage(), |
| | 'system_ready': dm.device is not None |
| | } |
| |
|
| | |
| | try: |
| | _device_manager = DeviceManager() |
| | _device_manager.initialize() |
| | logger.info("✅ Device manager initialized on import") |
| | except Exception as e: |
| | logger.warning(f"⚠️ Device manager initialization warning: {e}") |