import torch
from config import Config
from utils import resize_image_to_1mp, get_caption, draw_kps
from PIL import Image

class Generator:
    def __init__(self, model_handler):
        self.mh = model_handler

    def prepare_control_images(self, image, width, height):
        """
        Generates conditioning maps, ensuring they are resized
        to the exact target dimensions (width, height).
        """
        print(f"Generating control maps for {width}x{height}...")
        
        # Generate depth map
        depth_map_raw = self.mh.leres_detector(image) 
        
        # Generate lineart map
        lineart_map_raw = self.mh.lineart_anime_detector(image)
        
        # Manually resize maps to match the exact output resolution
        depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
        lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
        
        return depth_map, lineart_map

    def predict(
        self, 
        input_image, 
        user_prompt="",
        negative_prompt="",
        guidance_scale=1.5,
        num_inference_steps=6,
        img2img_strength=0.3,
        face_strength=0.3,
        depth_strength=0.3,
        lineart_strength=0.3,
        seed=-1
    ):
        # 1. Pre-process Inputs
        print("Processing Input...")
        processed_image = resize_image_to_1mp(input_image)
        target_width, target_height = processed_image.size
        
        # 2. Get Face Info (replaces get_face_embedding)
        face_info = self.mh.get_face_info(processed_image)
        
        # 3. Generate Prompt
        if not user_prompt.strip():
            try:
                generated_caption = get_caption(processed_image)
                final_prompt = f"{Config.STYLE_TRIGGER}, {generated_caption}"
            except Exception as e:
                print(f"Captioning failed: {e}, using default prompt.")
                final_prompt = f"{Config.STYLE_TRIGGER}, a beautiful pixel art image"
        else:
            final_prompt = f"{Config.STYLE_TRIGGER}, {user_prompt}"
            
        print(f"Prompt: {final_prompt}")
        print(f"Negative Prompt: {negative_prompt}")

        # 4. Generate OTHER Control Maps (Structure)
        print("Generating Control Maps (Depth, LineArt)...")
        depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
        
        # 5. Logic for Face vs No-Face (NOW INCLUDES KPS)
        # ControlNet order: [InstantID_KPS, Zoe_Depth, LineArt]
        
        if face_info is not None:
            print("Face detected: Applying InstantID with keypoints.")
            
            # We use face_info['embedding'] (raw) instead of normed_embedding.
            # Raw embedding has higher magnitude (~20-30) required for the adapter.
            face_emb = torch.tensor(
                face_info['embedding'], 
                dtype=Config.DTYPE,
                device=Config.DEVICE
            ).unsqueeze(0)

            # Create keypoint image
            face_kps = draw_kps(processed_image, face_info['kps'])
            
            # Set strengths
            controlnet_conditioning_scale = [face_strength, depth_strength, lineart_strength] 
            
            # --- UPDATED: Reduced IP Adapter Scale ---
            # Lowered from 0.8 to 0.7 to allow LoRA style (pixel art) to 
            # override realistic skin textures while keeping identity.
            self.mh.pipeline.set_ip_adapter_scale(0.7)
        else:
            print("No face detected: Disabling InstantID.")
            # Create dummy embedding
            face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
            # Create dummy keypoint image (black)
            face_kps = Image.new('RGB', (target_width, target_height), (0, 0, 0))
            
            # Set strengths
            controlnet_conditioning_scale = [0.0, depth_strength, lineart_strength] 
            self.mh.pipeline.set_ip_adapter_scale(0.0)

        # --- UPDATED: Control Guidance End Strategy ---
        # We cap the Face ControlNet duration. 
        # Even if strength is 1.0, we stop it at 0.6 (60%) of the steps.
        # This leaves the final 40% of steps pure for the Pixel Art LoRA 
        # to "pixelize" the face without the ControlNet trying to fix it back to a photo.
        
        face_end_step = min(0.6, face_strength)
        
        control_guidance_end = [
            face_end_step,      # InstantID: Stop early for style
            depth_strength,     # Depth: Keep structure longer
            lineart_strength    # Lineart: Keep outlines longer
        ] 

        # --- Seed/Generator Logic ---
        if seed == -1 or seed is None:
            seed = torch.Generator().seed()
        generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed))
        print(f"Using seed: {seed}")
        # --- END ---

        # 6. Run Inference
        print("Running pipeline...")
        result = self.mh.pipeline(
            prompt=final_prompt,
            negative_prompt=negative_prompt,
            image=processed_image,  # Base img2img image
            control_image=[face_kps, depth_map, lineart_map],
            image_embeds=face_emb,  # Face identity embedding
            generator=generator,
            
            # --- Parameters from UI ---
            strength=img2img_strength,
            num_inference_steps=num_inference_steps, 
            guidance_scale=guidance_scale,
            # --- End Parameters from UI ---
            
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            control_guidance_end=control_guidance_end,
            
            clip_skip=Config.CLIP_SKIP,
            
        ).images[0]
        
        return result