pixagram-neo-backup

Runtime error

App Files Files Community

primerz commited on Nov 2

Commit

6de0612

verified ·

1 Parent(s): 345b083

Create memory_utils

Browse files

Files changed (1) hide show

memory_utils +160 -0

memory_utils ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+Memory management utilities for Pixagram AI Pixel Art Generator
+Provides efficient GPU memory management and model offloading
+"""
+import torch
+import gc
+import psutil
+import os
+class MemoryManager:
+    """Manages GPU and CPU memory efficiently for model offloading"""
+    def __init__(self, device='cuda', dtype=torch.float16, verbose=True):
+        self.device = device
+        self.dtype = dtype
+        self.verbose = verbose
+        self.models_on_gpu = set()
+    def offload_to_cpu(self, model, model_name="model"):
+        """Move model to CPU and free GPU memory"""
+        if model is None:
+            return model
+        try:
+            model = model.to("cpu")
+            self.models_on_gpu.discard(model_name)
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+            if self.verbose:
+                print(f"[MEMORY] Offloaded {model_name} to CPU")
+                self.print_memory_status()
+            return model
+        except Exception as e:
+            print(f"[MEMORY] Error offloading {model_name}: {e}")
+            return model
+    def load_to_gpu(self, model, model_name="model"):
+        """Move model to GPU temporarily"""
+        if model is None:
+            return model
+        try:
+            model = model.to(self.device)
+            self.models_on_gpu.add(model_name)
+            if self.verbose:
+                print(f"[MEMORY] Loaded {model_name} to GPU")
+                self.print_memory_status()
+            return model
+        except Exception as e:
+            print(f"[MEMORY] Error loading {model_name} to GPU: {e}")
+            return model
+    def cleanup_memory(self, aggressive=True):
+        """Perform memory cleanup"""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+        if aggressive:
+            # Multiple GC passes for thorough cleanup
+            for _ in range(3):
+                gc.collect()
+        else:
+            gc.collect()
+        if self.verbose:
+            self.print_memory_status()
+    def print_memory_status(self):
+        """Print current memory usage"""
+        if torch.cuda.is_available():
+            allocated_gb = torch.cuda.memory_allocated() / 1024**3
+            reserved_gb = torch.cuda.memory_reserved() / 1024**3
+            print(f"  GPU: {allocated_gb:.2f}GB allocated, {reserved_gb:.2f}GB reserved")
+        # CPU memory status
+        process = psutil.Process(os.getpid())
+        cpu_mb = process.memory_info().rss / 1024**2
+        print(f"  CPU: {cpu_mb:.0f}MB used")
+    def get_available_gpu_memory(self):
+        """Get available GPU memory in GB"""
+        if not torch.cuda.is_available():
+            return 0
+        return (torch.cuda.get_device_properties(0).total_memory -
+                torch.cuda.memory_reserved()) / 1024**3
+    def can_fit_on_gpu(self, estimated_gb):
+        """Check if model of estimated size can fit on GPU"""
+        available = self.get_available_gpu_memory()
+        # Leave 1GB buffer for safety
+        return available > (estimated_gb + 1.0)
+class ModelOffloader:
+    """Context manager for temporary GPU loading"""
+    def __init__(self, model, memory_manager, model_name="model"):
+        self.model = model
+        self.memory_manager = memory_manager
+        self.model_name = model_name
+        self.was_on_gpu = False
+    def __enter__(self):
+        """Move model to GPU"""
+        if self.model is not None and hasattr(self.model, 'device'):
+            self.was_on_gpu = (self.model.device.type == 'cuda')
+            if not self.was_on_gpu:
+                self.model = self.memory_manager.load_to_gpu(self.model, self.model_name)
+        return self.model
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Move model back to CPU if it wasn't on GPU before"""
+        if self.model is not None and not self.was_on_gpu:
+            self.model = self.memory_manager.offload_to_cpu(self.model, self.model_name)
+def optimize_for_zero_gpu(pipe):
+    """
+    Optimize pipeline for Hugging Face Spaces Zero GPU
+    This ensures models stay on CPU until @spaces.GPU decorator activates
+    """
+    if hasattr(pipe, 'enable_model_cpu_offload'):
+        pipe.enable_model_cpu_offload()
+        print("[MEMORY] Enabled model CPU offloading for Zero GPU")
+    if hasattr(pipe, 'enable_vae_slicing'):
+        pipe.enable_vae_slicing()
+        print("[MEMORY] Enabled VAE slicing for memory efficiency")
+    if hasattr(pipe, 'enable_vae_tiling'):
+        pipe.enable_vae_tiling()
+        print("[MEMORY] Enabled VAE tiling for memory efficiency")
+    return pipe
+def estimate_model_size(model):
+    """Estimate model size in GB"""
+    if model is None:
+        return 0
+    total_params = 0
+    for param in model.parameters():
+        total_params += param.numel()
+    # Assuming float16 (2 bytes per param)
+    size_gb = (total_params * 2) / 1024**3
+    return size_gb
+print("[OK] Memory management utilities loaded")