Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| GPU Configuration Module | |
| Centralized GPU memory detection and adaptive configuration management | |
| Debug Mode: | |
| Set environment variable MAX_CUDA_VRAM to simulate different GPU memory sizes. | |
| Example: MAX_CUDA_VRAM=8 python acestep # Simulates 8GB GPU | |
| For MPS testing, use MAX_MPS_VRAM to simulate MPS memory. | |
| Example: MAX_MPS_VRAM=16 python acestep # Simulates 16GB MPS | |
| This is useful for testing GPU tier configurations on high-end hardware. | |
| """ | |
| import os | |
| import sys | |
| from dataclasses import dataclass | |
| from typing import Optional, List, Dict, Tuple | |
| from loguru import logger | |
| # Environment variable for debugging/testing different GPU memory configurations | |
| DEBUG_MAX_CUDA_VRAM_ENV = "MAX_CUDA_VRAM" | |
| DEBUG_MAX_MPS_VRAM_ENV = "MAX_MPS_VRAM" | |
| # Tolerance for 16GB detection: reported VRAM like 15.5GB is effectively 16GB hardware | |
| # Real-world 16GB GPUs often report 15.7-15.9GB due to system/driver reservations | |
| VRAM_16GB_TOLERANCE_GB = 0.5 | |
| VRAM_16GB_MIN_GB = 16.0 - VRAM_16GB_TOLERANCE_GB # treat as 16GB class if >= this | |
| # PyTorch installation URLs for diagnostics | |
| PYTORCH_CUDA_INSTALL_URL = "https://download.pytorch.org/whl/cu121" | |
| PYTORCH_ROCM_INSTALL_URL = "https://download.pytorch.org/whl/rocm6.0" | |
| class GPUConfig: | |
| """GPU configuration based on available memory""" | |
| tier: str # "tier1", "tier2", etc. or "unlimited" | |
| gpu_memory_gb: float | |
| # Duration limits (in seconds) | |
| max_duration_with_lm: int # When LM is initialized | |
| max_duration_without_lm: int # When LM is not initialized | |
| # Batch size limits | |
| max_batch_size_with_lm: int | |
| max_batch_size_without_lm: int | |
| # LM configuration | |
| init_lm_default: bool # Whether to initialize LM by default | |
| available_lm_models: List[str] # Available LM models for this tier | |
| # LM memory allocation (GB) for each model size | |
| lm_memory_gb: Dict[str, float] # e.g., {"0.6B": 3, "1.7B": 8, "4B": 12} | |
| # GPU tier configurations | |
| GPU_TIER_CONFIGS = { | |
| "tier1": { # <= 4GB | |
| "max_duration_with_lm": 180, # 3 minutes | |
| "max_duration_without_lm": 180, # 3 minutes | |
| "max_batch_size_with_lm": 1, | |
| "max_batch_size_without_lm": 1, | |
| "init_lm_default": False, | |
| "available_lm_models": [], | |
| "lm_memory_gb": {}, | |
| }, | |
| "tier2": { # 4-6GB | |
| "max_duration_with_lm": 360, # 6 minutes | |
| "max_duration_without_lm": 360, # 6 minutes | |
| "max_batch_size_with_lm": 1, | |
| "max_batch_size_without_lm": 1, | |
| "init_lm_default": False, | |
| "available_lm_models": [], | |
| "lm_memory_gb": {}, | |
| }, | |
| "tier3": { # 6-8GB | |
| "max_duration_with_lm": 240, # 4 minutes with LM | |
| "max_duration_without_lm": 360, # 6 minutes without LM | |
| "max_batch_size_with_lm": 1, | |
| "max_batch_size_without_lm": 2, | |
| "init_lm_default": False, # Don't init by default due to limited memory | |
| "available_lm_models": ["acestep-5Hz-lm-0.6B"], | |
| "lm_memory_gb": {"0.6B": 3}, | |
| }, | |
| "tier4": { # 8-12GB | |
| "max_duration_with_lm": 240, # 4 minutes with LM | |
| "max_duration_without_lm": 360, # 6 minutes without LM | |
| "max_batch_size_with_lm": 2, | |
| "max_batch_size_without_lm": 4, | |
| "init_lm_default": False, # Don't init by default | |
| "available_lm_models": ["acestep-5Hz-lm-0.6B"], | |
| "lm_memory_gb": {"0.6B": 3}, | |
| }, | |
| "tier5": { # 12-16GB | |
| "max_duration_with_lm": 240, # 4 minutes with LM | |
| "max_duration_without_lm": 360, # 6 minutes without LM | |
| "max_batch_size_with_lm": 2, | |
| "max_batch_size_without_lm": 4, | |
| "init_lm_default": True, | |
| "available_lm_models": ["acestep-5Hz-lm-0.6B", "acestep-5Hz-lm-1.7B"], | |
| "lm_memory_gb": {"0.6B": 3, "1.7B": 8}, | |
| }, | |
| "tier6": { # 16-24GB | |
| "max_duration_with_lm": 480, # 8 minutes | |
| "max_duration_without_lm": 480, # 8 minutes | |
| "max_batch_size_with_lm": 4, | |
| "max_batch_size_without_lm": 8, | |
| "init_lm_default": True, | |
| "available_lm_models": ["acestep-5Hz-lm-0.6B", "acestep-5Hz-lm-1.7B", "acestep-5Hz-lm-4B"], | |
| "lm_memory_gb": {"0.6B": 3, "1.7B": 8, "4B": 12}, | |
| }, | |
| "unlimited": { # >= 24GB | |
| "max_duration_with_lm": 600, # 10 minutes (max supported) | |
| "max_duration_without_lm": 600, # 10 minutes | |
| "max_batch_size_with_lm": 8, | |
| "max_batch_size_without_lm": 8, | |
| "init_lm_default": True, | |
| "available_lm_models": ["acestep-5Hz-lm-0.6B", "acestep-5Hz-lm-1.7B", "acestep-5Hz-lm-4B"], | |
| "lm_memory_gb": {"0.6B": 3, "1.7B": 8, "4B": 12}, | |
| }, | |
| } | |
| def get_gpu_memory_gb() -> float: | |
| """ | |
| Get GPU memory in GB. Returns 0 if no GPU is available. | |
| Debug Mode: | |
| Set environment variable MAX_CUDA_VRAM to override the detected GPU memory. | |
| Example: MAX_CUDA_VRAM=8 python acestep # Simulates 8GB GPU | |
| For MPS testing, set MAX_MPS_VRAM to override MPS memory detection. | |
| Example: MAX_MPS_VRAM=16 python acestep # Simulates 16GB MPS | |
| This allows testing different GPU tier configurations on high-end hardware. | |
| """ | |
| # Check for debug override first | |
| debug_vram = os.environ.get(DEBUG_MAX_CUDA_VRAM_ENV) | |
| if debug_vram is not None: | |
| try: | |
| simulated_gb = float(debug_vram) | |
| logger.warning(f"⚠️ DEBUG MODE: Simulating GPU memory as {simulated_gb:.1f}GB (set via {DEBUG_MAX_CUDA_VRAM_ENV} environment variable)") | |
| return simulated_gb | |
| except ValueError: | |
| logger.warning(f"Invalid {DEBUG_MAX_CUDA_VRAM_ENV} value: {debug_vram}, ignoring") | |
| debug_mps_vram = os.environ.get(DEBUG_MAX_MPS_VRAM_ENV) | |
| if debug_mps_vram is not None: | |
| try: | |
| simulated_gb = float(debug_mps_vram) | |
| logger.warning(f"⚠️ DEBUG MODE: Simulating MPS memory as {simulated_gb:.1f}GB (set via {DEBUG_MAX_MPS_VRAM_ENV} environment variable)") | |
| return simulated_gb | |
| except ValueError: | |
| logger.warning(f"Invalid {DEBUG_MAX_MPS_VRAM_ENV} value: {debug_mps_vram}, ignoring") | |
| try: | |
| import torch | |
| if torch.cuda.is_available(): | |
| # Get total memory of the first GPU in GB | |
| total_memory = torch.cuda.get_device_properties(0).total_memory | |
| memory_gb = total_memory / (1024**3) # Convert bytes to GB | |
| device_name = torch.cuda.get_device_name(0) | |
| is_rocm = hasattr(torch.version, 'hip') and torch.version.hip is not None | |
| if is_rocm: | |
| logger.info(f"ROCm GPU detected: {device_name} ({memory_gb:.1f} GB, HIP {torch.version.hip})") | |
| else: | |
| logger.info(f"CUDA GPU detected: {device_name} ({memory_gb:.1f} GB)") | |
| return memory_gb | |
| elif hasattr(torch, 'xpu') and torch.xpu.is_available(): | |
| # Get total memory of the first XPU in GB | |
| total_memory = torch.xpu.get_device_properties(0).total_memory | |
| memory_gb = total_memory / (1024**3) # Convert bytes to GB | |
| return memory_gb | |
| elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): | |
| mps_module = getattr(torch, "mps", None) | |
| try: | |
| if mps_module is not None and hasattr(mps_module, "recommended_max_memory"): | |
| total_memory = mps_module.recommended_max_memory() | |
| memory_gb = total_memory / (1024**3) # Convert bytes to GB | |
| return memory_gb | |
| if mps_module is not None and hasattr(mps_module, "get_device_properties"): | |
| props = mps_module.get_device_properties(0) | |
| total_memory = getattr(props, "total_memory", None) | |
| if total_memory: | |
| memory_gb = total_memory / (1024**3) | |
| return memory_gb | |
| except Exception as e: | |
| logger.warning(f"Failed to detect MPS memory: {e}") | |
| # Fallback: estimate from system unified memory (Apple Silicon shares CPU/GPU RAM) | |
| try: | |
| import subprocess | |
| result = subprocess.run( | |
| ["sysctl", "-n", "hw.memsize"], | |
| capture_output=True, text=True, timeout=5 | |
| ) | |
| total_system_bytes = int(result.stdout.strip()) | |
| # MPS can use up to ~75% of unified memory for GPU workloads | |
| memory_gb = (total_system_bytes / (1024**3)) * 0.75 | |
| return memory_gb | |
| except Exception: | |
| logger.warning(f"MPS available but total memory not exposed. Set {DEBUG_MAX_MPS_VRAM_ENV} to enable tiering.") | |
| # Conservative fallback for M1/M2 | |
| return 8.0 | |
| else: | |
| # No GPU detected - provide diagnostic information | |
| _log_gpu_diagnostic_info(torch) | |
| return 0 | |
| except Exception as e: | |
| logger.warning(f"Failed to detect GPU memory: {e}") | |
| return 0 | |
| def _log_gpu_diagnostic_info(torch_module): | |
| """ | |
| Log diagnostic information when GPU is not detected to help users troubleshoot. | |
| Args: | |
| torch_module: The torch module to inspect for build information | |
| """ | |
| logger.warning("=" * 80) | |
| logger.warning("⚠️ GPU NOT DETECTED - DIAGNOSTIC INFORMATION") | |
| logger.warning("=" * 80) | |
| # Check PyTorch build type | |
| is_rocm_build = hasattr(torch_module.version, 'hip') and torch_module.version.hip is not None | |
| is_cuda_build = hasattr(torch_module.version, 'cuda') and torch_module.version.cuda is not None | |
| if is_rocm_build: | |
| logger.warning("✓ PyTorch ROCm build detected") | |
| logger.warning(f" HIP version: {torch_module.version.hip}") | |
| logger.warning("") | |
| logger.warning("❌ torch.cuda.is_available() returned False") | |
| logger.warning("") | |
| logger.warning("Common causes for AMD/ROCm GPUs:") | |
| logger.warning(" 1. ROCm drivers not installed or not properly configured") | |
| logger.warning(" 2. GPU not supported by installed ROCm version") | |
| logger.warning(" 3. Missing or incorrect HSA_OVERRIDE_GFX_VERSION environment variable") | |
| logger.warning(" 4. ROCm runtime libraries not in system path") | |
| logger.warning("") | |
| # Check for common environment variables | |
| hsa_override = os.environ.get('HSA_OVERRIDE_GFX_VERSION') | |
| if hsa_override: | |
| logger.warning(f" HSA_OVERRIDE_GFX_VERSION is set to: {hsa_override}") | |
| else: | |
| logger.warning(" ⚠️ HSA_OVERRIDE_GFX_VERSION is not set") | |
| logger.warning(" For RDNA3 GPUs (RX 7000 series, RX 9000 series):") | |
| logger.warning(" - RX 7900 XT/XTX, RX 9070 XT: set HSA_OVERRIDE_GFX_VERSION=11.0.0") | |
| logger.warning(" - RX 7800 XT, RX 7700 XT: set HSA_OVERRIDE_GFX_VERSION=11.0.1") | |
| logger.warning(" - RX 7600: set HSA_OVERRIDE_GFX_VERSION=11.0.2") | |
| logger.warning("") | |
| logger.warning("Troubleshooting steps:") | |
| logger.warning(" 1. Verify ROCm installation:") | |
| logger.warning(" rocm-smi # Should list your GPU") | |
| logger.warning(" 2. Check PyTorch ROCm build:") | |
| logger.warning(" python -c \"import torch; print(f'ROCm: {torch.version.hip}')\"") | |
| logger.warning(" 3. Set HSA_OVERRIDE_GFX_VERSION for your GPU (see above)") | |
| logger.warning(" 4. On Windows: Use start_gradio_ui_rocm.bat which sets required env vars") | |
| logger.warning(" 5. See docs/en/ACE-Step1.5-Rocm-Manual-Linux.md for Linux setup") | |
| logger.warning(" 6. See requirements-rocm.txt for Windows ROCm setup instructions") | |
| elif is_cuda_build: | |
| logger.warning("✓ PyTorch CUDA build detected") | |
| logger.warning(f" CUDA version: {torch_module.version.cuda}") | |
| logger.warning("") | |
| logger.warning("❌ torch.cuda.is_available() returned False") | |
| logger.warning("") | |
| logger.warning("Common causes for NVIDIA GPUs:") | |
| logger.warning(" 1. NVIDIA drivers not installed") | |
| logger.warning(" 2. CUDA runtime not installed or version mismatch") | |
| logger.warning(" 3. GPU not supported by installed CUDA version") | |
| logger.warning("") | |
| logger.warning("Troubleshooting steps:") | |
| logger.warning(" 1. Verify NVIDIA driver installation:") | |
| logger.warning(" nvidia-smi # Should list your GPU") | |
| logger.warning(" 2. Check CUDA version compatibility") | |
| logger.warning(" 3. Reinstall PyTorch with CUDA support:") | |
| logger.warning(f" pip install torch --index-url {PYTORCH_CUDA_INSTALL_URL}") | |
| else: | |
| logger.warning("⚠️ PyTorch build type: CPU-only") | |
| logger.warning("") | |
| logger.warning("You have installed a CPU-only version of PyTorch!") | |
| logger.warning("") | |
| logger.warning("For NVIDIA GPUs:") | |
| logger.warning(f" pip install torch --index-url {PYTORCH_CUDA_INSTALL_URL}") | |
| logger.warning("") | |
| logger.warning("For AMD GPUs with ROCm:") | |
| logger.warning(" Windows: See requirements-rocm.txt for detailed instructions") | |
| logger.warning(f" Linux: pip install torch --index-url {PYTORCH_ROCM_INSTALL_URL}") | |
| logger.warning("") | |
| logger.warning("For more information, see README.md section 'AMD / ROCm GPUs'") | |
| logger.warning("=" * 80) | |
| def get_gpu_tier(gpu_memory_gb: float) -> str: | |
| """ | |
| Determine GPU tier based on available memory. | |
| Args: | |
| gpu_memory_gb: GPU memory in GB | |
| Returns: | |
| Tier string: "tier1", "tier2", "tier3", "tier4", "tier5", "tier6", or "unlimited" | |
| """ | |
| if gpu_memory_gb <= 0: | |
| # CPU mode - use tier1 limits | |
| return "tier1" | |
| elif gpu_memory_gb <= 4: | |
| return "tier1" | |
| elif gpu_memory_gb <= 6: | |
| return "tier2" | |
| elif gpu_memory_gb <= 8: | |
| return "tier3" | |
| elif gpu_memory_gb <= 12: | |
| return "tier4" | |
| elif gpu_memory_gb < VRAM_16GB_MIN_GB: | |
| return "tier5" | |
| elif gpu_memory_gb <= 24: | |
| if gpu_memory_gb < 16.0: | |
| logger.info(f"Detected {gpu_memory_gb:.2f}GB VRAM — treating as 16GB class GPU") | |
| return "tier6" | |
| else: | |
| return "unlimited" | |
| def get_gpu_config(gpu_memory_gb: Optional[float] = None) -> GPUConfig: | |
| """ | |
| Get GPU configuration based on detected or provided GPU memory. | |
| Args: | |
| gpu_memory_gb: GPU memory in GB. If None, will be auto-detected. | |
| Returns: | |
| GPUConfig object with all configuration parameters | |
| """ | |
| if gpu_memory_gb is None: | |
| gpu_memory_gb = get_gpu_memory_gb() | |
| tier = get_gpu_tier(gpu_memory_gb) | |
| config = GPU_TIER_CONFIGS[tier] | |
| return GPUConfig( | |
| tier=tier, | |
| gpu_memory_gb=gpu_memory_gb, | |
| max_duration_with_lm=config["max_duration_with_lm"], | |
| max_duration_without_lm=config["max_duration_without_lm"], | |
| max_batch_size_with_lm=config["max_batch_size_with_lm"], | |
| max_batch_size_without_lm=config["max_batch_size_without_lm"], | |
| init_lm_default=config["init_lm_default"], | |
| available_lm_models=config["available_lm_models"], | |
| lm_memory_gb=config["lm_memory_gb"], | |
| ) | |
| def get_lm_model_size(model_path: str) -> str: | |
| """ | |
| Extract LM model size from model path. | |
| Args: | |
| model_path: Model path string (e.g., "acestep-5Hz-lm-0.6B") | |
| Returns: | |
| Model size string: "0.6B", "1.7B", or "4B" | |
| """ | |
| if "0.6B" in model_path: | |
| return "0.6B" | |
| elif "1.7B" in model_path: | |
| return "1.7B" | |
| elif "4B" in model_path: | |
| return "4B" | |
| else: | |
| # Default to smallest model assumption | |
| return "0.6B" | |
| def get_lm_gpu_memory_ratio(model_path: str, total_gpu_memory_gb: float) -> Tuple[float, float]: | |
| """ | |
| Calculate GPU memory utilization ratio for LM model. | |
| Args: | |
| model_path: LM model path (e.g., "acestep-5Hz-lm-0.6B") | |
| total_gpu_memory_gb: Total GPU memory in GB | |
| Returns: | |
| Tuple of (gpu_memory_utilization_ratio, target_memory_gb) | |
| """ | |
| model_size = get_lm_model_size(model_path) | |
| # Target memory allocation for each model size | |
| target_memory = { | |
| "0.6B": 3.0, | |
| "1.7B": 8.0, | |
| "4B": 12.0, | |
| } | |
| target_gb = target_memory.get(model_size, 3.0) | |
| # For large GPUs (>=24GB), don't restrict memory too much | |
| if total_gpu_memory_gb >= 24: | |
| # Use a reasonable ratio that allows the model to run efficiently | |
| ratio = min(0.9, max(0.2, target_gb / total_gpu_memory_gb)) | |
| else: | |
| # For smaller GPUs, strictly limit memory usage | |
| ratio = min(0.9, max(0.1, target_gb / total_gpu_memory_gb)) | |
| return ratio, target_gb | |
| def check_duration_limit( | |
| duration: float, | |
| gpu_config: GPUConfig, | |
| lm_initialized: bool | |
| ) -> Tuple[bool, str]: | |
| """ | |
| Check if requested duration is within limits for current GPU configuration. | |
| Args: | |
| duration: Requested duration in seconds | |
| gpu_config: Current GPU configuration | |
| lm_initialized: Whether LM is initialized | |
| Returns: | |
| Tuple of (is_valid, warning_message) | |
| """ | |
| max_duration = gpu_config.max_duration_with_lm if lm_initialized else gpu_config.max_duration_without_lm | |
| if duration > max_duration: | |
| warning_msg = ( | |
| f"⚠️ Requested duration ({duration:.0f}s) exceeds the limit for your GPU " | |
| f"({gpu_config.gpu_memory_gb:.1f}GB). Maximum allowed: {max_duration}s " | |
| f"({'with' if lm_initialized else 'without'} LM). " | |
| f"Duration will be clamped to {max_duration}s." | |
| ) | |
| return False, warning_msg | |
| return True, "" | |
| def check_batch_size_limit( | |
| batch_size: int, | |
| gpu_config: GPUConfig, | |
| lm_initialized: bool | |
| ) -> Tuple[bool, str]: | |
| """ | |
| Check if requested batch size is within limits for current GPU configuration. | |
| Args: | |
| batch_size: Requested batch size | |
| gpu_config: Current GPU configuration | |
| lm_initialized: Whether LM is initialized | |
| Returns: | |
| Tuple of (is_valid, warning_message) | |
| """ | |
| max_batch_size = gpu_config.max_batch_size_with_lm if lm_initialized else gpu_config.max_batch_size_without_lm | |
| if batch_size > max_batch_size: | |
| warning_msg = ( | |
| f"⚠️ Requested batch size ({batch_size}) exceeds the limit for your GPU " | |
| f"({gpu_config.gpu_memory_gb:.1f}GB). Maximum allowed: {max_batch_size} " | |
| f"({'with' if lm_initialized else 'without'} LM). " | |
| f"Batch size will be clamped to {max_batch_size}." | |
| ) | |
| return False, warning_msg | |
| return True, "" | |
| def is_lm_model_supported(model_path: str, gpu_config: GPUConfig) -> Tuple[bool, str]: | |
| """ | |
| Check if the specified LM model is supported for current GPU configuration. | |
| Args: | |
| model_path: LM model path | |
| gpu_config: Current GPU configuration | |
| Returns: | |
| Tuple of (is_supported, warning_message) | |
| """ | |
| if not gpu_config.available_lm_models: | |
| return False, ( | |
| f"⚠️ Your GPU ({gpu_config.gpu_memory_gb:.1f}GB) does not have enough memory " | |
| f"to run any LM model. Please disable LM initialization." | |
| ) | |
| model_size = get_lm_model_size(model_path) | |
| # Check if model size is in available models | |
| for available_model in gpu_config.available_lm_models: | |
| if model_size in available_model: | |
| return True, "" | |
| return False, ( | |
| f"⚠️ LM model {model_path} ({model_size}) is not supported for your GPU " | |
| f"({gpu_config.gpu_memory_gb:.1f}GB). Available models: {', '.join(gpu_config.available_lm_models)}" | |
| ) | |
| def get_recommended_lm_model(gpu_config: GPUConfig) -> Optional[str]: | |
| """ | |
| Get recommended LM model for current GPU configuration. | |
| Args: | |
| gpu_config: Current GPU configuration | |
| Returns: | |
| Recommended LM model path, or None if LM is not supported | |
| """ | |
| if not gpu_config.available_lm_models: | |
| return None | |
| # Return the largest available model (last in the list) | |
| return gpu_config.available_lm_models[-1] | |
| def print_gpu_config_info(gpu_config: GPUConfig): | |
| """Print GPU configuration information for debugging.""" | |
| logger.info(f"GPU Configuration:") | |
| logger.info(f" - GPU Memory: {gpu_config.gpu_memory_gb:.1f} GB") | |
| logger.info(f" - Tier: {gpu_config.tier}") | |
| logger.info(f" - Max Duration (with LM): {gpu_config.max_duration_with_lm}s ({gpu_config.max_duration_with_lm // 60} min)") | |
| logger.info(f" - Max Duration (without LM): {gpu_config.max_duration_without_lm}s ({gpu_config.max_duration_without_lm // 60} min)") | |
| logger.info(f" - Max Batch Size (with LM): {gpu_config.max_batch_size_with_lm}") | |
| logger.info(f" - Max Batch Size (without LM): {gpu_config.max_batch_size_without_lm}") | |
| logger.info(f" - Init LM by Default: {gpu_config.init_lm_default}") | |
| logger.info(f" - Available LM Models: {gpu_config.available_lm_models or 'None'}") | |
| # Global GPU config instance (initialized lazily) | |
| _global_gpu_config: Optional[GPUConfig] = None | |
| def get_global_gpu_config() -> GPUConfig: | |
| """Get the global GPU configuration, initializing if necessary.""" | |
| global _global_gpu_config | |
| if _global_gpu_config is None: | |
| _global_gpu_config = get_gpu_config() | |
| return _global_gpu_config | |
| def set_global_gpu_config(config: GPUConfig): | |
| """Set the global GPU configuration.""" | |
| global _global_gpu_config | |
| _global_gpu_config = config | |