"""
Memory management utilities for Pixagram AI Pixel Art Generator
Provides efficient GPU memory management and model offloading
"""
import torch
import gc
import psutil
import os


class MemoryManager:
    """Manages GPU and CPU memory efficiently for model offloading"""
    
    def __init__(self, device='cuda', dtype=torch.float16, verbose=True):
        self.device = device
        self.dtype = dtype
        self.verbose = verbose
        self.models_on_gpu = set()
        
    def offload_to_cpu(self, model, model_name="model"):
        """Move model to CPU and free GPU memory"""
        if model is None:
            return model
            
        try:
            model = model.to("cpu")
            self.models_on_gpu.discard(model_name)
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
            
            if self.verbose:
                print(f"[MEMORY] Offloaded {model_name} to CPU")
                self.print_memory_status()
                
            return model
        except Exception as e:
            print(f"[MEMORY] Error offloading {model_name}: {e}")
            return model
    
    def load_to_gpu(self, model, model_name="model"):
        """Move model to GPU temporarily"""
        if model is None:
            return model
            
        try:
            model = model.to(self.device)
            self.models_on_gpu.add(model_name)
            
            if self.verbose:
                print(f"[MEMORY] Loaded {model_name} to GPU")
                self.print_memory_status()
                
            return model
        except Exception as e:
            print(f"[MEMORY] Error loading {model_name} to GPU: {e}")
            return model
    
    def cleanup_memory(self, aggressive=True):
        """Perform memory cleanup"""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
        
        if aggressive:
            # Multiple GC passes for thorough cleanup
            for _ in range(3):
                gc.collect()
        else:
            gc.collect()
            
        if self.verbose:
            self.print_memory_status()
    
    def print_memory_status(self):
        """Print current memory usage"""
        if torch.cuda.is_available():
            allocated_gb = torch.cuda.memory_allocated() / 1024**3
            reserved_gb = torch.cuda.memory_reserved() / 1024**3
            print(f"  GPU: {allocated_gb:.2f}GB allocated, {reserved_gb:.2f}GB reserved")
        
        # CPU memory status
        process = psutil.Process(os.getpid())
        cpu_mb = process.memory_info().rss / 1024**2
        print(f"  CPU: {cpu_mb:.0f}MB used")
        
    def get_available_gpu_memory(self):
        """Get available GPU memory in GB"""
        if not torch.cuda.is_available():
            return 0
            
        return (torch.cuda.get_device_properties(0).total_memory - 
                torch.cuda.memory_reserved()) / 1024**3
    
    def can_fit_on_gpu(self, estimated_gb):
        """Check if model of estimated size can fit on GPU"""
        available = self.get_available_gpu_memory()
        # Leave 1GB buffer for safety
        return available > (estimated_gb + 1.0)


class ModelOffloader:
    """Context manager for temporary GPU loading"""
    
    def __init__(self, model, memory_manager, model_name="model"):
        self.model = model
        self.memory_manager = memory_manager
        self.model_name = model_name
        self.was_on_gpu = False
        
    def __enter__(self):
        """Move model to GPU"""
        if self.model is not None and hasattr(self.model, 'device'):
            self.was_on_gpu = (self.model.device.type == 'cuda')
            if not self.was_on_gpu:
                self.model = self.memory_manager.load_to_gpu(self.model, self.model_name)
        return self.model
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Move model back to CPU if it wasn't on GPU before"""
        if self.model is not None and not self.was_on_gpu:
            self.model = self.memory_manager.offload_to_cpu(self.model, self.model_name)


def optimize_for_zero_gpu(pipe):
    """
    Optimize pipeline for Hugging Face Spaces Zero GPU
    This ensures models stay on CPU until @spaces.GPU decorator activates
    """
    if hasattr(pipe, 'enable_model_cpu_offload'):
        pipe.enable_model_cpu_offload()
        print("[MEMORY] Enabled model CPU offloading for Zero GPU")
    
    if hasattr(pipe, 'enable_vae_slicing'):
        pipe.enable_vae_slicing()
        print("[MEMORY] Enabled VAE slicing for memory efficiency")
    
    if hasattr(pipe, 'enable_vae_tiling'):
        pipe.enable_vae_tiling()
        print("[MEMORY] Enabled VAE tiling for memory efficiency")
    
    return pipe


def estimate_model_size(model):
    """Estimate model size in GB"""
    if model is None:
        return 0
        
    total_params = 0
    for param in model.parameters():
        total_params += param.numel()
    
    # Assuming float16 (2 bytes per param)
    size_gb = (total_params * 2) / 1024**3
    return size_gb


print("[OK] Memory management utilities loaded")