| | """System resource detection and management for adaptive processing.""" |
| |
|
| | import os |
| | import psutil |
| | import torch |
| | import logging |
| | from typing import Optional, Dict, Any |
| |
|
| | class SystemResources: |
| | """Detect and manage system resources for adaptive processing. |
| | |
| | This class provides a unified interface to system resource detection, |
| | handling CPU, RAM, and GPU capabilities. It calculates appropriate |
| | thresholds and settings based on the detected hardware configuration. |
| | |
| | It implements extreme memory conservation strategies to prevent OOM crashes |
| | even on large datasets or limited hardware. |
| | """ |
| | |
| | def __init__(self): |
| | |
| | self.cpu_cores = os.cpu_count() or 1 |
| | self.cpu_threads = self.cpu_cores |
| | |
| | |
| | try: |
| | self.cpu_physical_cores = psutil.cpu_count(logical=False) or self.cpu_cores |
| | except: |
| | self.cpu_physical_cores = self.cpu_cores |
| | |
| | |
| | self.total_ram_gb = psutil.virtual_memory().total / (1024 ** 3) |
| | self.available_ram_gb = psutil.virtual_memory().available / (1024 ** 3) |
| | |
| | |
| | self.has_cuda = torch.cuda.is_available() |
| | self.cuda_device = None |
| | self.cuda_mem_gb = 0 |
| | |
| | if self.has_cuda: |
| | try: |
| | torch.cuda.empty_cache() |
| | self.cuda_device = torch.cuda.get_device_name(0) |
| | self.cuda_mem_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) |
| | except Exception as e: |
| | logging.warning(f"Error detecting CUDA properties: {e}") |
| | self.has_cuda = False |
| | |
| | |
| | self._calculate_thresholds() |
| | |
| | |
| | self._log_resources() |
| | |
| | def _calculate_thresholds(self): |
| | """Calculate adaptive thresholds based on detected system resources.""" |
| | |
| | |
| | |
| | |
| | self.emergency_reserve_gb = max(2.0, self.total_ram_gb * 0.2) |
| | |
| | if self.total_ram_gb < 8: |
| | self.ram_usage_warning = self.total_ram_gb * 0.45 |
| | self.ram_usage_critical = self.total_ram_gb * 0.60 |
| | self.max_files_multiplier = 0.03 |
| | self.use_disk_offload = True |
| | elif self.total_ram_gb < 16: |
| | self.ram_usage_warning = self.total_ram_gb * 0.55 |
| | self.ram_usage_critical = self.total_ram_gb * 0.70 |
| | self.max_files_multiplier = 0.05 |
| | self.use_disk_offload = True |
| | else: |
| | self.ram_usage_warning = self.total_ram_gb * 0.60 |
| | self.ram_usage_critical = self.total_ram_gb * 0.75 |
| | self.max_files_multiplier = 0.1 |
| | self.use_disk_offload = True |
| | |
| | |
| | |
| | self.max_text_chunk_size = min(10_000_000, int(self.total_ram_gb * 1_000_000)) |
| | |
| | |
| | |
| | self.max_workers = max(1, min(self.cpu_physical_cores, 4)) |
| | |
| | |
| | if self.cpu_cores <= 2: |
| | self.batch_size = 2 |
| | elif self.cpu_cores <= 4: |
| | self.batch_size = 4 |
| | else: |
| | self.batch_size = min(5, self.cpu_cores // 2) |
| | |
| | |
| | if self.total_ram_gb < 8: |
| | self.training_chunk_size = 3 |
| | elif self.total_ram_gb < 16: |
| | self.training_chunk_size = 5 |
| | else: |
| | self.training_chunk_size = 10 |
| | |
| | def _log_resources(self): |
| | """Log detected system resources and calculated thresholds.""" |
| | logging.info("===== System Resources =====") |
| | logging.info(f"CPU: {self.cpu_cores} cores ({self.cpu_physical_cores} physical)") |
| | logging.info(f"RAM: {self.total_ram_gb:.1f} GB total, {self.available_ram_gb:.1f} GB available") |
| | |
| | if self.has_cuda: |
| | logging.info(f"GPU: {self.cuda_device} with {self.cuda_mem_gb:.1f} GB memory") |
| | else: |
| | logging.info("GPU: Not available") |
| | |
| | logging.info("===== Adaptive Settings =====") |
| | logging.info(f"RAM Warning Threshold: {self.ram_usage_warning:.1f} GB") |
| | logging.info(f"RAM Critical Threshold: {self.ram_usage_critical:.1f} GB") |
| | logging.info(f"Max Workers: {self.max_workers}") |
| | logging.info(f"Batch Size: {self.batch_size}") |
| | logging.info(f"Training Chunk Size: {self.training_chunk_size}") |
| | logging.info(f"Max Files Multiplier: {self.max_files_multiplier:.2f}") |
| |
|