""" GPU Configuration Module Centralized GPU memory detection and adaptive configuration management Debug Mode: Set environment variable MAX_CUDA_VRAM to simulate different GPU memory sizes. Example: MAX_CUDA_VRAM=8 python acestep # Simulates 8GB GPU For MPS testing, use MAX_MPS_VRAM to simulate MPS memory. Example: MAX_MPS_VRAM=16 python acestep # Simulates 16GB MPS This is useful for testing GPU tier configurations on high-end hardware. """ import os import sys from dataclasses import dataclass from typing import Optional, List, Dict, Tuple from loguru import logger # Environment variable for debugging/testing different GPU memory configurations DEBUG_MAX_CUDA_VRAM_ENV = "MAX_CUDA_VRAM" DEBUG_MAX_MPS_VRAM_ENV = "MAX_MPS_VRAM" # Tolerance for 16GB detection: reported VRAM like 15.5GB is effectively 16GB hardware # Real-world 16GB GPUs often report 15.7-15.9GB due to system/driver reservations VRAM_16GB_TOLERANCE_GB = 0.5 VRAM_16GB_MIN_GB = 16.0 - VRAM_16GB_TOLERANCE_GB # treat as 16GB class if >= this # PyTorch installation URLs for diagnostics PYTORCH_CUDA_INSTALL_URL = "https://download.pytorch.org/whl/cu121" PYTORCH_ROCM_INSTALL_URL = "https://download.pytorch.org/whl/rocm6.0" @dataclass class GPUConfig: """GPU configuration based on available memory""" tier: str # "tier1", "tier2", etc. or "unlimited" gpu_memory_gb: float # Duration limits (in seconds) max_duration_with_lm: int # When LM is initialized max_duration_without_lm: int # When LM is not initialized # Batch size limits max_batch_size_with_lm: int max_batch_size_without_lm: int # LM configuration init_lm_default: bool # Whether to initialize LM by default available_lm_models: List[str] # Available LM models for this tier # LM memory allocation (GB) for each model size lm_memory_gb: Dict[str, float] # e.g., {"0.6B": 3, "1.7B": 8, "4B": 12} # GPU tier configurations GPU_TIER_CONFIGS = { "tier1": { # <= 4GB "max_duration_with_lm": 180, # 3 minutes "max_duration_without_lm": 180, # 3 minutes "max_batch_size_with_lm": 1, "max_batch_size_without_lm": 1, "init_lm_default": False, "available_lm_models": [], "lm_memory_gb": {}, }, "tier2": { # 4-6GB "max_duration_with_lm": 360, # 6 minutes "max_duration_without_lm": 360, # 6 minutes "max_batch_size_with_lm": 1, "max_batch_size_without_lm": 1, "init_lm_default": False, "available_lm_models": [], "lm_memory_gb": {}, }, "tier3": { # 6-8GB "max_duration_with_lm": 240, # 4 minutes with LM "max_duration_without_lm": 360, # 6 minutes without LM "max_batch_size_with_lm": 1, "max_batch_size_without_lm": 2, "init_lm_default": False, # Don't init by default due to limited memory "available_lm_models": ["acestep-5Hz-lm-0.6B"], "lm_memory_gb": {"0.6B": 3}, }, "tier4": { # 8-12GB "max_duration_with_lm": 240, # 4 minutes with LM "max_duration_without_lm": 360, # 6 minutes without LM "max_batch_size_with_lm": 2, "max_batch_size_without_lm": 4, "init_lm_default": False, # Don't init by default "available_lm_models": ["acestep-5Hz-lm-0.6B"], "lm_memory_gb": {"0.6B": 3}, }, "tier5": { # 12-16GB "max_duration_with_lm": 240, # 4 minutes with LM "max_duration_without_lm": 360, # 6 minutes without LM "max_batch_size_with_lm": 2, "max_batch_size_without_lm": 4, "init_lm_default": True, "available_lm_models": ["acestep-5Hz-lm-0.6B", "acestep-5Hz-lm-1.7B"], "lm_memory_gb": {"0.6B": 3, "1.7B": 8}, }, "tier6": { # 16-24GB "max_duration_with_lm": 480, # 8 minutes "max_duration_without_lm": 480, # 8 minutes "max_batch_size_with_lm": 4, "max_batch_size_without_lm": 8, "init_lm_default": True, "available_lm_models": ["acestep-5Hz-lm-0.6B", "acestep-5Hz-lm-1.7B", "acestep-5Hz-lm-4B"], "lm_memory_gb": {"0.6B": 3, "1.7B": 8, "4B": 12}, }, "unlimited": { # >= 24GB "max_duration_with_lm": 600, # 10 minutes (max supported) "max_duration_without_lm": 600, # 10 minutes "max_batch_size_with_lm": 8, "max_batch_size_without_lm": 8, "init_lm_default": True, "available_lm_models": ["acestep-5Hz-lm-0.6B", "acestep-5Hz-lm-1.7B", "acestep-5Hz-lm-4B"], "lm_memory_gb": {"0.6B": 3, "1.7B": 8, "4B": 12}, }, } def get_gpu_memory_gb() -> float: """ Get GPU memory in GB. Returns 0 if no GPU is available. Debug Mode: Set environment variable MAX_CUDA_VRAM to override the detected GPU memory. Example: MAX_CUDA_VRAM=8 python acestep # Simulates 8GB GPU For MPS testing, set MAX_MPS_VRAM to override MPS memory detection. Example: MAX_MPS_VRAM=16 python acestep # Simulates 16GB MPS This allows testing different GPU tier configurations on high-end hardware. """ # Check for debug override first debug_vram = os.environ.get(DEBUG_MAX_CUDA_VRAM_ENV) if debug_vram is not None: try: simulated_gb = float(debug_vram) logger.warning(f"⚠️ DEBUG MODE: Simulating GPU memory as {simulated_gb:.1f}GB (set via {DEBUG_MAX_CUDA_VRAM_ENV} environment variable)") return simulated_gb except ValueError: logger.warning(f"Invalid {DEBUG_MAX_CUDA_VRAM_ENV} value: {debug_vram}, ignoring") debug_mps_vram = os.environ.get(DEBUG_MAX_MPS_VRAM_ENV) if debug_mps_vram is not None: try: simulated_gb = float(debug_mps_vram) logger.warning(f"⚠️ DEBUG MODE: Simulating MPS memory as {simulated_gb:.1f}GB (set via {DEBUG_MAX_MPS_VRAM_ENV} environment variable)") return simulated_gb except ValueError: logger.warning(f"Invalid {DEBUG_MAX_MPS_VRAM_ENV} value: {debug_mps_vram}, ignoring") try: import torch if torch.cuda.is_available(): # Get total memory of the first GPU in GB total_memory = torch.cuda.get_device_properties(0).total_memory memory_gb = total_memory / (1024**3) # Convert bytes to GB device_name = torch.cuda.get_device_name(0) is_rocm = hasattr(torch.version, 'hip') and torch.version.hip is not None if is_rocm: logger.info(f"ROCm GPU detected: {device_name} ({memory_gb:.1f} GB, HIP {torch.version.hip})") else: logger.info(f"CUDA GPU detected: {device_name} ({memory_gb:.1f} GB)") return memory_gb elif hasattr(torch, 'xpu') and torch.xpu.is_available(): # Get total memory of the first XPU in GB total_memory = torch.xpu.get_device_properties(0).total_memory memory_gb = total_memory / (1024**3) # Convert bytes to GB return memory_gb elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): mps_module = getattr(torch, "mps", None) try: if mps_module is not None and hasattr(mps_module, "recommended_max_memory"): total_memory = mps_module.recommended_max_memory() memory_gb = total_memory / (1024**3) # Convert bytes to GB return memory_gb if mps_module is not None and hasattr(mps_module, "get_device_properties"): props = mps_module.get_device_properties(0) total_memory = getattr(props, "total_memory", None) if total_memory: memory_gb = total_memory / (1024**3) return memory_gb except Exception as e: logger.warning(f"Failed to detect MPS memory: {e}") # Fallback: estimate from system unified memory (Apple Silicon shares CPU/GPU RAM) try: import subprocess result = subprocess.run( ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True, timeout=5 ) total_system_bytes = int(result.stdout.strip()) # MPS can use up to ~75% of unified memory for GPU workloads memory_gb = (total_system_bytes / (1024**3)) * 0.75 return memory_gb except Exception: logger.warning(f"MPS available but total memory not exposed. Set {DEBUG_MAX_MPS_VRAM_ENV} to enable tiering.") # Conservative fallback for M1/M2 return 8.0 else: # No GPU detected - provide diagnostic information _log_gpu_diagnostic_info(torch) return 0 except Exception as e: logger.warning(f"Failed to detect GPU memory: {e}") return 0 def _log_gpu_diagnostic_info(torch_module): """ Log diagnostic information when GPU is not detected to help users troubleshoot. Args: torch_module: The torch module to inspect for build information """ logger.warning("=" * 80) logger.warning("⚠️ GPU NOT DETECTED - DIAGNOSTIC INFORMATION") logger.warning("=" * 80) # Check PyTorch build type is_rocm_build = hasattr(torch_module.version, 'hip') and torch_module.version.hip is not None is_cuda_build = hasattr(torch_module.version, 'cuda') and torch_module.version.cuda is not None if is_rocm_build: logger.warning("✓ PyTorch ROCm build detected") logger.warning(f" HIP version: {torch_module.version.hip}") logger.warning("") logger.warning("❌ torch.cuda.is_available() returned False") logger.warning("") logger.warning("Common causes for AMD/ROCm GPUs:") logger.warning(" 1. ROCm drivers not installed or not properly configured") logger.warning(" 2. GPU not supported by installed ROCm version") logger.warning(" 3. Missing or incorrect HSA_OVERRIDE_GFX_VERSION environment variable") logger.warning(" 4. ROCm runtime libraries not in system path") logger.warning("") # Check for common environment variables hsa_override = os.environ.get('HSA_OVERRIDE_GFX_VERSION') if hsa_override: logger.warning(f" HSA_OVERRIDE_GFX_VERSION is set to: {hsa_override}") else: logger.warning(" ⚠️ HSA_OVERRIDE_GFX_VERSION is not set") logger.warning(" For RDNA3 GPUs (RX 7000 series, RX 9000 series):") logger.warning(" - RX 7900 XT/XTX, RX 9070 XT: set HSA_OVERRIDE_GFX_VERSION=11.0.0") logger.warning(" - RX 7800 XT, RX 7700 XT: set HSA_OVERRIDE_GFX_VERSION=11.0.1") logger.warning(" - RX 7600: set HSA_OVERRIDE_GFX_VERSION=11.0.2") logger.warning("") logger.warning("Troubleshooting steps:") logger.warning(" 1. Verify ROCm installation:") logger.warning(" rocm-smi # Should list your GPU") logger.warning(" 2. Check PyTorch ROCm build:") logger.warning(" python -c \"import torch; print(f'ROCm: {torch.version.hip}')\"") logger.warning(" 3. Set HSA_OVERRIDE_GFX_VERSION for your GPU (see above)") logger.warning(" 4. On Windows: Use start_gradio_ui_rocm.bat which sets required env vars") logger.warning(" 5. See docs/en/ACE-Step1.5-Rocm-Manual-Linux.md for Linux setup") logger.warning(" 6. See requirements-rocm.txt for Windows ROCm setup instructions") elif is_cuda_build: logger.warning("✓ PyTorch CUDA build detected") logger.warning(f" CUDA version: {torch_module.version.cuda}") logger.warning("") logger.warning("❌ torch.cuda.is_available() returned False") logger.warning("") logger.warning("Common causes for NVIDIA GPUs:") logger.warning(" 1. NVIDIA drivers not installed") logger.warning(" 2. CUDA runtime not installed or version mismatch") logger.warning(" 3. GPU not supported by installed CUDA version") logger.warning("") logger.warning("Troubleshooting steps:") logger.warning(" 1. Verify NVIDIA driver installation:") logger.warning(" nvidia-smi # Should list your GPU") logger.warning(" 2. Check CUDA version compatibility") logger.warning(" 3. Reinstall PyTorch with CUDA support:") logger.warning(f" pip install torch --index-url {PYTORCH_CUDA_INSTALL_URL}") else: logger.warning("⚠️ PyTorch build type: CPU-only") logger.warning("") logger.warning("You have installed a CPU-only version of PyTorch!") logger.warning("") logger.warning("For NVIDIA GPUs:") logger.warning(f" pip install torch --index-url {PYTORCH_CUDA_INSTALL_URL}") logger.warning("") logger.warning("For AMD GPUs with ROCm:") logger.warning(" Windows: See requirements-rocm.txt for detailed instructions") logger.warning(f" Linux: pip install torch --index-url {PYTORCH_ROCM_INSTALL_URL}") logger.warning("") logger.warning("For more information, see README.md section 'AMD / ROCm GPUs'") logger.warning("=" * 80) def get_gpu_tier(gpu_memory_gb: float) -> str: """ Determine GPU tier based on available memory. Args: gpu_memory_gb: GPU memory in GB Returns: Tier string: "tier1", "tier2", "tier3", "tier4", "tier5", "tier6", or "unlimited" """ if gpu_memory_gb <= 0: # CPU mode - use tier1 limits return "tier1" elif gpu_memory_gb <= 4: return "tier1" elif gpu_memory_gb <= 6: return "tier2" elif gpu_memory_gb <= 8: return "tier3" elif gpu_memory_gb <= 12: return "tier4" elif gpu_memory_gb < VRAM_16GB_MIN_GB: return "tier5" elif gpu_memory_gb <= 24: if gpu_memory_gb < 16.0: logger.info(f"Detected {gpu_memory_gb:.2f}GB VRAM — treating as 16GB class GPU") return "tier6" else: return "unlimited" def get_gpu_config(gpu_memory_gb: Optional[float] = None) -> GPUConfig: """ Get GPU configuration based on detected or provided GPU memory. Args: gpu_memory_gb: GPU memory in GB. If None, will be auto-detected. Returns: GPUConfig object with all configuration parameters """ if gpu_memory_gb is None: gpu_memory_gb = get_gpu_memory_gb() tier = get_gpu_tier(gpu_memory_gb) config = GPU_TIER_CONFIGS[tier] return GPUConfig( tier=tier, gpu_memory_gb=gpu_memory_gb, max_duration_with_lm=config["max_duration_with_lm"], max_duration_without_lm=config["max_duration_without_lm"], max_batch_size_with_lm=config["max_batch_size_with_lm"], max_batch_size_without_lm=config["max_batch_size_without_lm"], init_lm_default=config["init_lm_default"], available_lm_models=config["available_lm_models"], lm_memory_gb=config["lm_memory_gb"], ) def get_lm_model_size(model_path: str) -> str: """ Extract LM model size from model path. Args: model_path: Model path string (e.g., "acestep-5Hz-lm-0.6B") Returns: Model size string: "0.6B", "1.7B", or "4B" """ if "0.6B" in model_path: return "0.6B" elif "1.7B" in model_path: return "1.7B" elif "4B" in model_path: return "4B" else: # Default to smallest model assumption return "0.6B" def get_lm_gpu_memory_ratio(model_path: str, total_gpu_memory_gb: float) -> Tuple[float, float]: """ Calculate GPU memory utilization ratio for LM model. Args: model_path: LM model path (e.g., "acestep-5Hz-lm-0.6B") total_gpu_memory_gb: Total GPU memory in GB Returns: Tuple of (gpu_memory_utilization_ratio, target_memory_gb) """ model_size = get_lm_model_size(model_path) # Target memory allocation for each model size target_memory = { "0.6B": 3.0, "1.7B": 8.0, "4B": 12.0, } target_gb = target_memory.get(model_size, 3.0) # For large GPUs (>=24GB), don't restrict memory too much if total_gpu_memory_gb >= 24: # Use a reasonable ratio that allows the model to run efficiently ratio = min(0.9, max(0.2, target_gb / total_gpu_memory_gb)) else: # For smaller GPUs, strictly limit memory usage ratio = min(0.9, max(0.1, target_gb / total_gpu_memory_gb)) return ratio, target_gb def check_duration_limit( duration: float, gpu_config: GPUConfig, lm_initialized: bool ) -> Tuple[bool, str]: """ Check if requested duration is within limits for current GPU configuration. Args: duration: Requested duration in seconds gpu_config: Current GPU configuration lm_initialized: Whether LM is initialized Returns: Tuple of (is_valid, warning_message) """ max_duration = gpu_config.max_duration_with_lm if lm_initialized else gpu_config.max_duration_without_lm if duration > max_duration: warning_msg = ( f"⚠️ Requested duration ({duration:.0f}s) exceeds the limit for your GPU " f"({gpu_config.gpu_memory_gb:.1f}GB). Maximum allowed: {max_duration}s " f"({'with' if lm_initialized else 'without'} LM). " f"Duration will be clamped to {max_duration}s." ) return False, warning_msg return True, "" def check_batch_size_limit( batch_size: int, gpu_config: GPUConfig, lm_initialized: bool ) -> Tuple[bool, str]: """ Check if requested batch size is within limits for current GPU configuration. Args: batch_size: Requested batch size gpu_config: Current GPU configuration lm_initialized: Whether LM is initialized Returns: Tuple of (is_valid, warning_message) """ max_batch_size = gpu_config.max_batch_size_with_lm if lm_initialized else gpu_config.max_batch_size_without_lm if batch_size > max_batch_size: warning_msg = ( f"⚠️ Requested batch size ({batch_size}) exceeds the limit for your GPU " f"({gpu_config.gpu_memory_gb:.1f}GB). Maximum allowed: {max_batch_size} " f"({'with' if lm_initialized else 'without'} LM). " f"Batch size will be clamped to {max_batch_size}." ) return False, warning_msg return True, "" def is_lm_model_supported(model_path: str, gpu_config: GPUConfig) -> Tuple[bool, str]: """ Check if the specified LM model is supported for current GPU configuration. Args: model_path: LM model path gpu_config: Current GPU configuration Returns: Tuple of (is_supported, warning_message) """ if not gpu_config.available_lm_models: return False, ( f"⚠️ Your GPU ({gpu_config.gpu_memory_gb:.1f}GB) does not have enough memory " f"to run any LM model. Please disable LM initialization." ) model_size = get_lm_model_size(model_path) # Check if model size is in available models for available_model in gpu_config.available_lm_models: if model_size in available_model: return True, "" return False, ( f"⚠️ LM model {model_path} ({model_size}) is not supported for your GPU " f"({gpu_config.gpu_memory_gb:.1f}GB). Available models: {', '.join(gpu_config.available_lm_models)}" ) def get_recommended_lm_model(gpu_config: GPUConfig) -> Optional[str]: """ Get recommended LM model for current GPU configuration. Args: gpu_config: Current GPU configuration Returns: Recommended LM model path, or None if LM is not supported """ if not gpu_config.available_lm_models: return None # Return the largest available model (last in the list) return gpu_config.available_lm_models[-1] def print_gpu_config_info(gpu_config: GPUConfig): """Print GPU configuration information for debugging.""" logger.info(f"GPU Configuration:") logger.info(f" - GPU Memory: {gpu_config.gpu_memory_gb:.1f} GB") logger.info(f" - Tier: {gpu_config.tier}") logger.info(f" - Max Duration (with LM): {gpu_config.max_duration_with_lm}s ({gpu_config.max_duration_with_lm // 60} min)") logger.info(f" - Max Duration (without LM): {gpu_config.max_duration_without_lm}s ({gpu_config.max_duration_without_lm // 60} min)") logger.info(f" - Max Batch Size (with LM): {gpu_config.max_batch_size_with_lm}") logger.info(f" - Max Batch Size (without LM): {gpu_config.max_batch_size_without_lm}") logger.info(f" - Init LM by Default: {gpu_config.init_lm_default}") logger.info(f" - Available LM Models: {gpu_config.available_lm_models or 'None'}") # Global GPU config instance (initialized lazily) _global_gpu_config: Optional[GPUConfig] = None def get_global_gpu_config() -> GPUConfig: """Get the global GPU configuration, initializing if necessary.""" global _global_gpu_config if _global_gpu_config is None: _global_gpu_config = get_gpu_config() return _global_gpu_config def set_global_gpu_config(config: GPUConfig): """Set the global GPU configuration.""" global _global_gpu_config _global_gpu_config = config