Spaces:
Runtime error
Runtime error
| """ | |
| Memory management utilities for Pixagram AI Pixel Art Generator | |
| Provides efficient GPU memory management and model offloading | |
| """ | |
| import torch | |
| import gc | |
| import psutil | |
| import os | |
| class MemoryManager: | |
| """Manages GPU and CPU memory efficiently for model offloading""" | |
| def __init__(self, device='cuda', dtype=torch.float16, verbose=True): | |
| self.device = device | |
| self.dtype = dtype | |
| self.verbose = verbose | |
| self.models_on_gpu = set() | |
| def offload_to_cpu(self, model, model_name="model"): | |
| """Move model to CPU and free GPU memory""" | |
| if model is None: | |
| return model | |
| try: | |
| model = model.to("cpu") | |
| self.models_on_gpu.discard(model_name) | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| torch.cuda.synchronize() | |
| if self.verbose: | |
| print(f"[MEMORY] Offloaded {model_name} to CPU") | |
| self.print_memory_status() | |
| return model | |
| except Exception as e: | |
| print(f"[MEMORY] Error offloading {model_name}: {e}") | |
| return model | |
| def load_to_gpu(self, model, model_name="model"): | |
| """Move model to GPU temporarily""" | |
| if model is None: | |
| return model | |
| try: | |
| model = model.to(self.device) | |
| self.models_on_gpu.add(model_name) | |
| if self.verbose: | |
| print(f"[MEMORY] Loaded {model_name} to GPU") | |
| self.print_memory_status() | |
| return model | |
| except Exception as e: | |
| print(f"[MEMORY] Error loading {model_name} to GPU: {e}") | |
| return model | |
| def cleanup_memory(self, aggressive=True): | |
| """Perform memory cleanup""" | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| torch.cuda.synchronize() | |
| if aggressive: | |
| # Multiple GC passes for thorough cleanup | |
| for _ in range(3): | |
| gc.collect() | |
| else: | |
| gc.collect() | |
| if self.verbose: | |
| self.print_memory_status() | |
| def print_memory_status(self): | |
| """Print current memory usage""" | |
| if torch.cuda.is_available(): | |
| allocated_gb = torch.cuda.memory_allocated() / 1024**3 | |
| reserved_gb = torch.cuda.memory_reserved() / 1024**3 | |
| print(f" GPU: {allocated_gb:.2f}GB allocated, {reserved_gb:.2f}GB reserved") | |
| # CPU memory status | |
| process = psutil.Process(os.getpid()) | |
| cpu_mb = process.memory_info().rss / 1024**2 | |
| print(f" CPU: {cpu_mb:.0f}MB used") | |
| def get_available_gpu_memory(self): | |
| """Get available GPU memory in GB""" | |
| if not torch.cuda.is_available(): | |
| return 0 | |
| return (torch.cuda.get_device_properties(0).total_memory - | |
| torch.cuda.memory_reserved()) / 1024**3 | |
| def can_fit_on_gpu(self, estimated_gb): | |
| """Check if model of estimated size can fit on GPU""" | |
| available = self.get_available_gpu_memory() | |
| # Leave 1GB buffer for safety | |
| return available > (estimated_gb + 1.0) | |
| class ModelOffloader: | |
| """Context manager for temporary GPU loading""" | |
| def __init__(self, model, memory_manager, model_name="model"): | |
| self.model = model | |
| self.memory_manager = memory_manager | |
| self.model_name = model_name | |
| self.was_on_gpu = False | |
| def __enter__(self): | |
| """Move model to GPU""" | |
| if self.model is not None and hasattr(self.model, 'device'): | |
| self.was_on_gpu = (self.model.device.type == 'cuda') | |
| if not self.was_on_gpu: | |
| self.model = self.memory_manager.load_to_gpu(self.model, self.model_name) | |
| return self.model | |
| def __exit__(self, exc_type, exc_val, exc_tb): | |
| """Move model back to CPU if it wasn't on GPU before""" | |
| if self.model is not None and not self.was_on_gpu: | |
| self.model = self.memory_manager.offload_to_cpu(self.model, self.model_name) | |
| def optimize_for_zero_gpu(pipe): | |
| """ | |
| Optimize pipeline for Hugging Face Spaces Zero GPU | |
| This ensures models stay on CPU until @spaces.GPU decorator activates | |
| """ | |
| if hasattr(pipe, 'enable_model_cpu_offload'): | |
| pipe.enable_model_cpu_offload() | |
| print("[MEMORY] Enabled model CPU offloading for Zero GPU") | |
| if hasattr(pipe, 'enable_vae_slicing'): | |
| pipe.enable_vae_slicing() | |
| print("[MEMORY] Enabled VAE slicing for memory efficiency") | |
| if hasattr(pipe, 'enable_vae_tiling'): | |
| pipe.enable_vae_tiling() | |
| print("[MEMORY] Enabled VAE tiling for memory efficiency") | |
| return pipe | |
| def estimate_model_size(model): | |
| """Estimate model size in GB""" | |
| if model is None: | |
| return 0 | |
| total_params = 0 | |
| for param in model.parameters(): | |
| total_params += param.numel() | |
| # Assuming float16 (2 bytes per param) | |
| size_gb = (total_params * 2) / 1024**3 | |
| return size_gb | |
| print("[OK] Memory management utilities loaded") |