"""
Model Manager - Handles loading and caching of YOLO and VLM models
"""

import torch
from transformers import (
    Qwen2_5_VLForConditionalGeneration,
    AutoProcessor,
    BitsAndBytesConfig
)
from ultralytics import YOLO
import os
from typing import Tuple

from config import (
    YOLO_MODEL_PATH,
    VLM_MODEL_ID,
    QUANTIZATION_CONFIG,
    YOLO_CONFIDENCE_THRESHOLD
)


class ModelManager:
    """Singleton class to manage model loading and inference"""
    
    _instance = None
    _initialized = False
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(ModelManager, cls).__new__(cls)
        return cls._instance
    
    def __init__(self):
        if not ModelManager._initialized:
            self.yolo_model = None
            self.vlm_model = None
            self.processor = None
            ModelManager._initialized = True
    
    def load_models(self):
        """Load both YOLO and VLM models into memory"""
        print("🚀 Starting model loading...")
        
        # Load YOLO model
        self.yolo_model = self._load_yolo_model()
        
        # Load VLM model
        self.vlm_model, self.processor = self._load_vlm_model()
        
        # Warm up models to initialize CUDA context
        self._warmup_models()
        
        print("✅ All models loaded successfully!")
    
    def _load_yolo_model(self) -> YOLO:
        """Load trained YOLO model for signature and stamp detection"""
        if not os.path.exists(YOLO_MODEL_PATH):
            raise FileNotFoundError(
                f"YOLO model not found at {YOLO_MODEL_PATH}. "
                "Please ensure best.pt is in utils/models/"
            )
        
        yolo_model = YOLO(str(YOLO_MODEL_PATH))
        print(f"✅ YOLO model loaded from {YOLO_MODEL_PATH}")
        return yolo_model
    
    def _load_vlm_model(self) -> Tuple:
        """
        Load Qwen2.5-VL model with 4-bit quantization
        Downloads from Hugging Face on first run
        """
        print(f"📥 Loading VLM model: {VLM_MODEL_ID}")
        print("   (This will download ~4GB on first run)")
        
        # Configure 4-bit quantization
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=QUANTIZATION_CONFIG["load_in_4bit"],
            bnb_4bit_quant_type=QUANTIZATION_CONFIG["bnb_4bit_quant_type"],
            bnb_4bit_compute_dtype=getattr(torch, QUANTIZATION_CONFIG["bnb_4bit_compute_dtype"]),
            bnb_4bit_use_double_quant=QUANTIZATION_CONFIG["bnb_4bit_use_double_quant"]
        )
        
        # Load processor
        processor = AutoProcessor.from_pretrained(
            VLM_MODEL_ID,
            trust_remote_code=True
        )
        
        # Load model with quantization
        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
            VLM_MODEL_ID,
            quantization_config=bnb_config,
            device_map="auto",
            torch_dtype=torch.bfloat16,
            trust_remote_code=True
        )
        
        model.eval()
        print(f"✅ Qwen2.5-VL model loaded successfully")
        
        return model, processor
    
    def _warmup_models(self):
        """Warm up models with a dummy inference to initialize CUDA context"""
        print("🔥 Warming up models (initializing CUDA context)...")
        import time
        from PIL import Image
        import numpy as np
        
        warmup_start = time.time()
        
        # Create a small dummy image
        dummy_image = Image.fromarray(np.ones((100, 100, 3), dtype=np.uint8) * 255)
        
        try:
            # Warm up VLM
            messages = [
                {
                    "role": "user",
                    "content": [
                        {"type": "image", "image": dummy_image},
                        {"type": "text", "text": "warm up"}
                    ]
                }
            ]
            
            from qwen_vl_utils import process_vision_info
            text = self.processor.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True
            )
            image_inputs, video_inputs = process_vision_info(messages)
            inputs = self.processor(
                text=[text],
                images=image_inputs,
                videos=video_inputs,
                padding=True,
                return_tensors="pt",
            )
            inputs = inputs.to("cuda")
            
            # Run a quick inference
            with torch.no_grad():
                _ = self.vlm_model.generate(**inputs, max_new_tokens=5)
            
            # Clean up
            del inputs
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            
            warmup_time = time.time() - warmup_start
            print(f"✅ Models warmed up in {warmup_time:.2f}s (CUDA context initialized)")
            
        except Exception as e:
            print(f"⚠️ Warmup failed (non-critical): {e}")
    
    def detect_sign_stamp(self, image_path: str):
        """
        Detect signature and stamp in the image using YOLO
        
        Returns:
            tuple: (signature_info, stamp_info, signature_conf, stamp_conf)
        """
        if self.yolo_model is None:
            raise RuntimeError("YOLO model not loaded. Call load_models() first.")
        
        results = self.yolo_model(image_path, verbose=False)[0]
        
        signature_info = {"present": False, "bbox": None}
        stamp_info = {"present": False, "bbox": None}
        signature_conf = 0.0
        stamp_conf = 0.0
        
        if results.boxes is not None:
            for box in results.boxes:
                cls_id = int(box.cls[0])
                conf = float(box.conf[0])
                
                if conf > YOLO_CONFIDENCE_THRESHOLD:
                    bbox = box.xyxy[0].cpu().numpy().tolist()
                    bbox = [int(coord) for coord in bbox]
                    
                    # Class 0: signature, Class 1: stamp
                    if cls_id == 0 and conf > signature_conf:
                        signature_info = {"present": True, "bbox": bbox}
                        signature_conf = conf
                    elif cls_id == 1 and conf > stamp_conf:
                        stamp_info = {"present": True, "bbox": bbox}
                        stamp_conf = conf
        
        return signature_info, stamp_info, signature_conf, stamp_conf
    
    def is_loaded(self) -> bool:
        """Check if models are loaded"""
        return (self.yolo_model is not None and 
                self.vlm_model is not None and 
                self.processor is not None)


# Global model manager instance
model_manager = ModelManager()