""" Memory management utilities for Pixagram AI Pixel Art Generator Provides efficient GPU memory management and model offloading """ import torch import gc import psutil import os class MemoryManager: """Manages GPU and CPU memory efficiently for model offloading""" def __init__(self, device='cuda', dtype=torch.float16, verbose=True): self.device = device self.dtype = dtype self.verbose = verbose self.models_on_gpu = set() def offload_to_cpu(self, model, model_name="model"): """Move model to CPU and free GPU memory""" if model is None: return model try: model = model.to("cpu") self.models_on_gpu.discard(model_name) if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() if self.verbose: print(f"[MEMORY] Offloaded {model_name} to CPU") self.print_memory_status() return model except Exception as e: print(f"[MEMORY] Error offloading {model_name}: {e}") return model def load_to_gpu(self, model, model_name="model"): """Move model to GPU temporarily""" if model is None: return model try: model = model.to(self.device) self.models_on_gpu.add(model_name) if self.verbose: print(f"[MEMORY] Loaded {model_name} to GPU") self.print_memory_status() return model except Exception as e: print(f"[MEMORY] Error loading {model_name} to GPU: {e}") return model def cleanup_memory(self, aggressive=True): """Perform memory cleanup""" if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() if aggressive: # Multiple GC passes for thorough cleanup for _ in range(3): gc.collect() else: gc.collect() if self.verbose: self.print_memory_status() def print_memory_status(self): """Print current memory usage""" if torch.cuda.is_available(): allocated_gb = torch.cuda.memory_allocated() / 1024**3 reserved_gb = torch.cuda.memory_reserved() / 1024**3 print(f" GPU: {allocated_gb:.2f}GB allocated, {reserved_gb:.2f}GB reserved") # CPU memory status process = psutil.Process(os.getpid()) cpu_mb = process.memory_info().rss / 1024**2 print(f" CPU: {cpu_mb:.0f}MB used") def get_available_gpu_memory(self): """Get available GPU memory in GB""" if not torch.cuda.is_available(): return 0 return (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved()) / 1024**3 def can_fit_on_gpu(self, estimated_gb): """Check if model of estimated size can fit on GPU""" available = self.get_available_gpu_memory() # Leave 1GB buffer for safety return available > (estimated_gb + 1.0) class ModelOffloader: """Context manager for temporary GPU loading""" def __init__(self, model, memory_manager, model_name="model"): self.model = model self.memory_manager = memory_manager self.model_name = model_name self.was_on_gpu = False def __enter__(self): """Move model to GPU""" if self.model is not None and hasattr(self.model, 'device'): self.was_on_gpu = (self.model.device.type == 'cuda') if not self.was_on_gpu: self.model = self.memory_manager.load_to_gpu(self.model, self.model_name) return self.model def __exit__(self, exc_type, exc_val, exc_tb): """Move model back to CPU if it wasn't on GPU before""" if self.model is not None and not self.was_on_gpu: self.model = self.memory_manager.offload_to_cpu(self.model, self.model_name) def optimize_for_zero_gpu(pipe): """ Optimize pipeline for Hugging Face Spaces Zero GPU This ensures models stay on CPU until @spaces.GPU decorator activates """ if hasattr(pipe, 'enable_model_cpu_offload'): pipe.enable_model_cpu_offload() print("[MEMORY] Enabled model CPU offloading for Zero GPU") if hasattr(pipe, 'enable_vae_slicing'): pipe.enable_vae_slicing() print("[MEMORY] Enabled VAE slicing for memory efficiency") if hasattr(pipe, 'enable_vae_tiling'): pipe.enable_vae_tiling() print("[MEMORY] Enabled VAE tiling for memory efficiency") return pipe def estimate_model_size(model): """Estimate model size in GB""" if model is None: return 0 total_params = 0 for param in model.parameters(): total_params += param.numel() # Assuming float16 (2 bytes per param) size_gb = (total_params * 2) / 1024**3 return size_gb print("[OK] Memory management utilities loaded")