"""
Model loading and initialization for Pixagram AI Pixel Art Generator
UPDATED VERSION with proper InstantID pipeline support
"""
import torch
import time
from diffusers import (
    ControlNetModel,
    AutoencoderKL,
    LCMScheduler
)
from insightface.app import FaceAnalysis
from controlnet_aux import ZoeDetector
from huggingface_hub import hf_hub_download
from compel import Compel, ReturnedEmbeddingsType

# Use InstantID pipeline
from pipeline_stable_diffusion_xl_instantid_img2img import (
    StableDiffusionXLInstantIDImg2ImgPipeline
)

from config import (
    device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
    FACE_DETECTION_CONFIG, CLIP_SKIP, DOWNLOAD_CONFIG
)


def download_model_with_retry(repo_id, filename, max_retries=None):
    """Download model with retry logic and proper token handling."""
    if max_retries is None:
        max_retries = DOWNLOAD_CONFIG['max_retries']
    
    for attempt in range(max_retries):
        try:
            print(f"  Attempting to download {filename} (attempt {attempt + 1}/{max_retries})...")
            
            kwargs = {"repo_type": "model"}
            if HUGGINGFACE_TOKEN:
                kwargs["token"] = HUGGINGFACE_TOKEN
            
            path = hf_hub_download(
                repo_id=repo_id,
                filename=filename,
                **kwargs
            )
            print(f"  [OK] Downloaded: {filename}")
            return path
            
        except Exception as e:
            print(f"  [WARNING] Download attempt {attempt + 1} failed: {e}")
            
            if attempt < max_retries - 1:
                print(f"  Retrying in {DOWNLOAD_CONFIG['retry_delay']} seconds...")
                time.sleep(DOWNLOAD_CONFIG['retry_delay'])
            else:
                print(f"  [ERROR] Failed to download {filename} after {max_retries} attempts")
                raise
    
    return None


def load_face_analysis():
    """Load face analysis model on CPU to save GPU memory."""
    print("Loading face analysis model on CPU...")
    try:
        # Force CPU execution for face analysis to save GPU memory
        face_app = FaceAnalysis(
            name=FACE_DETECTION_CONFIG['model_name'],
            root='./models/insightface',
            providers=['CPUExecutionProvider']  # CPU only for face detection
        )
        face_app.prepare(
            ctx_id=-1,  # -1 for CPU
            det_size=FACE_DETECTION_CONFIG['det_size']
        )
        print("  [OK] Face analysis model loaded on CPU (GPU memory saved)")
        return face_app, True
    except Exception as e:
        print(f"  [WARNING] Face detection not available: {e}")
        return None, False


def load_depth_detector():
    """Load Zoe Depth detector with optimized memory management."""
    print("Loading Zoe Depth detector...")
    try:
        zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
        # Start on CPU to save memory during initialization
        zoe_depth = zoe_depth.to("cpu")
        print("  [OK] Zoe Depth loaded (on CPU, will move to GPU when needed)")
        return zoe_depth, True
    except Exception as e:
        print(f"  [WARNING] Zoe Depth not available: {e}")
        return None, False


def load_controlnets():
    """
    Load ControlNets for InstantID pipeline.
    Returns both ControlNets (InstantID first, then Depth).
    """
    print("Loading InstantID ControlNet...")
    controlnet_instantid = ControlNetModel.from_pretrained(
        "InstantX/InstantID",
        subfolder="ControlNetModel",
        torch_dtype=dtype
    ).to(device)
    print("  [OK] InstantID ControlNet loaded")
    
    print("Loading Zoe Depth ControlNet...")
    controlnet_depth = ControlNetModel.from_pretrained(
        "diffusers/controlnet-zoe-depth-sdxl-1.0",
        torch_dtype=dtype
    ).to(device)
    print("  [OK] Zoe Depth ControlNet loaded")
    
    return controlnet_instantid, controlnet_depth


def load_sdxl_pipeline(controlnets):
    """
    Load SDXL pipeline with InstantID support.
    controlnets MUST be a list: [identitynet, depthnet]
    """
    print("Loading SDXL checkpoint with InstantID pipeline...")
    try:
        model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'])
        
        # Use InstantID-enabled pipeline
        pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
            model_path,
            controlnet=controlnets,
            torch_dtype=dtype,
            use_safetensors=True
        ).to(device)
        
        # Load IP-Adapter weights for InstantID
        print("Loading IP-Adapter for InstantID...")
        ip_adapter_path = download_model_with_retry(
            "InstantX/InstantID",
            "ip-adapter.bin"
        )
        pipe.load_ip_adapter_instantid(ip_adapter_path)
        pipe.set_ip_adapter_scale(0.8)  # Default scale
        
        print("  [OK] InstantID pipeline loaded successfully")
        return pipe, True
        
    except Exception as e:
        print(f"  [ERROR] Could not load InstantID pipeline: {e}")
        import traceback
        traceback.print_exc()
        
        # Fallback to standard pipeline
        print("  Falling back to standard SDXL pipeline (no InstantID)")
        from diffusers import StableDiffusionXLControlNetImg2ImgPipeline
        pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
            "stabilityai/stable-diffusion-xl-base-1.0",
            controlnet=controlnets,
            torch_dtype=dtype,
            use_safetensors=True
        ).to(device)
        return pipe, False


def load_lora(pipe):
    """Load LORA from HuggingFace Hub."""
    print("Loading LORA (retroart) from HuggingFace Hub...")
    try:
        lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
        pipe.load_lora_weights(lora_path, adapter_name="retroart")
        print(f"  [OK] LORA loaded successfully")
        return True
    except Exception as e:
        print(f"  [WARNING] Could not load LORA: {e}")
        return False


def setup_compel(pipe):
    """Setup Compel for better SDXL prompt handling."""
    print("Setting up Compel for enhanced prompt processing...")
    try:
        compel = Compel(
            tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
            text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
            requires_pooled=[False, True]
        )
        print("  [OK] Compel loaded successfully")
        return compel, True
    except Exception as e:
        print(f"  [WARNING] Compel not available: {e}")
        return None, False


def setup_scheduler(pipe):
    """Setup LCM scheduler."""
    print("Setting up LCM scheduler...")
    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
    print("  [OK] LCM scheduler configured")


def optimize_pipeline(pipe):
    """Apply optimizations to pipeline."""
    if device == "cuda":
        try:
            pipe.enable_xformers_memory_efficient_attention()
            print("  [OK] xformers enabled")
        except Exception as e:
            print(f"  [INFO] xformers not available: {e}")


def load_caption_model():
    """
    Load caption model with proper error handling.
    Tries multiple models in order of quality.
    Models start on CPU and move to GPU only when needed.
    """
    print("Loading caption model...")
    
    # Try GIT-Large first
    try:
        from transformers import AutoProcessor, AutoModelForCausalLM
        
        print("  Attempting GIT-Large (recommended)...")
        caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
        caption_model = AutoModelForCausalLM.from_pretrained(
            "microsoft/git-large-coco",
            torch_dtype=dtype  # Use dtype from config
        ).to("cpu")  # Start on CPU to save GPU memory
        print("  [OK] GIT-Large model loaded (on CPU, will move to GPU when needed)")
        return caption_processor, caption_model, True, 'git'
    except Exception as e1:
        print(f"  [INFO] GIT-Large not available: {e1}")
        
        # Try BLIP base as fallback
        try:
            from transformers import BlipProcessor, BlipForConditionalGeneration
            
            print("  Attempting BLIP base (fallback)...")
            caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
            caption_model = BlipForConditionalGeneration.from_pretrained(
                "Salesforce/blip-image-captioning-base",
                torch_dtype=dtype  # Use dtype from config
            ).to("cpu")  # Start on CPU to save GPU memory
            print("  [OK] BLIP base model loaded (on CPU, will move to GPU when needed)")
            return caption_processor, caption_model, True, 'blip'
        except Exception as e2:
            print(f"  [WARNING] Caption models not available: {e2}")
            return None, None, False, 'none'


def set_clip_skip(pipe):
    """Set CLIP skip value."""
    if hasattr(pipe, 'text_encoder'):
        print(f"  [OK] CLIP skip set to {CLIP_SKIP}")


print("[OK] Model loading functions ready")