import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from PIL import Image
import cv2
import math
import time
import io
import base64
from diffusers import DDPMScheduler
import mediapipe as mp
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

class OptimizedMediaPipeExtractor:
    def __init__(self):
        self.mp_face_mesh = mp.solutions.face_mesh
        self.face_mesh = self.mp_face_mesh.FaceMesh(
            static_image_mode=True,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.5
        )
        
        self.landmark_indices = {
            'face_outline': [10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109],
            'left_eye': [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246],
            'right_eye': [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398],
            'left_eyebrow': [46, 53, 52, 51, 48, 115, 131, 134, 102, 49, 220, 305],
            'right_eyebrow': [276, 283, 282, 295, 285, 336, 296, 334, 293, 300, 276, 353],
            'nose': [1, 2, 5, 4, 6, 19, 94, 168, 8, 9, 10, 151, 195, 197, 196, 3],
            'lips': [61, 84, 17, 314, 405, 320, 307, 375, 321, 308, 324, 318],
            'chin': [175, 199, 428, 262, 18],
            'forehead': [9, 10, 151, 337, 299, 333, 298, 301]
        }
    
    def extract_features(self, image_path_or_array):
        try:
            features = self._extract_robust_features(image_path_or_array)
            return self._normalize_features(features)
        except Exception as e:
            print(f"Error in feature extraction: {e}")
            return self._get_default_features()

    def _extract_robust_features(self, image_path_or_array):
        if isinstance(image_path_or_array, str):
            image = cv2.imread(image_path_or_array)
            if image is None:
                raise ValueError(f"Failed to load image: {image_path_or_array}")
        else:
            image = image_path_or_array.copy()
            if image is None or image.size == 0:
                raise ValueError("Invalid image array provided")
        
        if len(image.shape) != 3 or image.shape[2] != 3:
            raise ValueError("Image must be a 3-channel color image")
        
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        h, w = rgb_image.shape[:2]
        
        if h < 50 or w < 50:
            raise ValueError("Image too small for feature extraction")
        
        results = self.face_mesh.process(rgb_image)
        
        if not results.multi_face_landmarks:
            raise ValueError("No face detected in image")
        
        landmarks = results.multi_face_landmarks[0]
        face_landmarks = np.array([[lm.x * w, lm.y * h, lm.z] for lm in landmarks.landmark])
        
        if len(face_landmarks) == 0:
            raise ValueError("No valid landmarks detected")
        
        features = torch.zeros(18, dtype=torch.float32)
        
        try:
            left_corner = face_landmarks[33] if len(face_landmarks) > 33 else face_landmarks[0]
            right_corner = face_landmarks[263] if len(face_landmarks) > 263 else face_landmarks[-1]
            face_width = np.max(face_landmarks[:, 0]) - np.min(face_landmarks[:, 0])
            eye_angle = abs(left_corner[1] - right_corner[1]) / (face_width + 1e-8)
            features[0] = min(max(int(eye_angle * 20), 0), 2)
        except:
            features[0] = 1
        
        try:
            eye_openness = self._calculate_eye_openness(face_landmarks)
            features[1] = 1 if eye_openness > 10 else 0
        except:
            features[1] = 1
        
        try:
            eyelid_prominence = self._calculate_eyelid_prominence(face_landmarks)
            features[2] = 1 if eyelid_prominence > 5 else 0
        except:
            features[2] = 1
        
        try:
            face_height = np.max(face_landmarks[:, 1]) - np.min(face_landmarks[:, 1])
            nose_tip = face_landmarks[2] if len(face_landmarks) > 2 else face_landmarks[0]
            chin_bottom = face_landmarks[18] if len(face_landmarks) > 18 else face_landmarks[-1]
            chin_length = np.linalg.norm(nose_tip[:2] - chin_bottom[:2])
            chin_length_normalized = chin_length / (face_height + 1e-8)
            features[3] = min(max(int(chin_length_normalized * 6), 0), 2)
        except:
            features[3] = 1
        
        for i in range(4, 18):
            try:
                if i == 4:
                    features[i] = 1 if self._calculate_eyebrow_thickness(rgb_image, face_landmarks) > 0.3 else 0
                elif i == 5:
                    features[i] = 7
                elif i == 6:
                    features[i] = 2
                elif i == 7:
                    face_width = np.max(face_landmarks[:, 0]) - np.min(face_landmarks[:, 0])
                    face_height = np.max(face_landmarks[:, 1]) - np.min(face_landmarks[:, 1])
                    aspect_ratio = face_width / (face_height + 1e-8)
                    features[i] = min(max(int(aspect_ratio * 3.5), 0), 6)
                elif i == 8:
                    features[i] = 0
                elif i == 9:
                    features[i] = 55
                elif i == 10:
                    features[i] = 2
                elif i == 11:
                    features[i] = 5
                elif i == 12:
                    features[i] = 4
                elif i == 13:
                    features[i] = 0
                elif i == 14:
                    features[i] = 0
                elif i == 15:
                    features[i] = 1
                elif i == 16:
                    features[i] = 1
                elif i == 17:
                    features[i] = 1
            except:
                features[i] = 1
        
        return features

    def _normalize_features(self, features):
        normalized = torch.zeros_like(features, dtype=torch.float32)
        max_values = [2, 1, 1, 2, 1, 13, 3, 6, 14, 110, 4, 10, 9, 11, 6, 2, 2, 2]
        
        for i, max_val in enumerate(max_values):
            if max_val > 0:
                normalized[i] = torch.clamp(features[i] / max_val, 0.0, 1.0)
            else:
                normalized[i] = 0.0
        
        return normalized
    
    def _get_default_features(self):
        default_values = torch.tensor([3, 55, 7, 7, 6, 1, 1, 1, 1, 1, 2, 5, 5, 3, 1, 1, 2, 1], dtype=torch.float32)
        return self._normalize_features(default_values)
    
    def _calculate_eye_openness(self, landmarks):
        try:
            left_top = landmarks[159][1] if len(landmarks) > 159 else 0
            left_bottom = landmarks[145][1] if len(landmarks) > 145 else 0
            left_openness = abs(left_top - left_bottom)
            
            right_top = landmarks[386][1] if len(landmarks) > 386 else 0
            right_bottom = landmarks[374][1] if len(landmarks) > 374 else 0
            right_openness = abs(right_top - right_bottom)
            
            return (left_openness + right_openness) / 2
        except:
            return 10.0
    
    def _calculate_eyelid_prominence(self, landmarks):
        try:
            left_eyelid = landmarks[159][1] - landmarks[158][1] if len(landmarks) > 159 else 5
            right_eyelid = landmarks[386][1] - landmarks[385][1] if len(landmarks) > 386 else 5
            return abs(left_eyelid + right_eyelid) / 2
        except:
            return 5.0
    
    def _calculate_eyebrow_thickness(self, image, landmarks):
        try:
            left_brow_points = [landmarks[i] for i in self.landmark_indices['left_eyebrow'] if i < len(landmarks)]
            if len(left_brow_points) < 2:
                return 0.5
            
            y_coords = [p[1] for p in left_brow_points]
            thickness = (max(y_coords) - min(y_coords)) / image.shape[0]
            return min(thickness * 10, 1.0)
        except:
            return 0.5


class OptimizedConditionedUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=3, attr_dim=18, base_channels=56):
        super().__init__()
        
        self.time_embed_dim = 224
        self.time_embed = nn.Sequential(
            nn.Linear(self.time_embed_dim, 448),
            nn.SiLU(),
            nn.Linear(448, 448)
        )
        
        self.attr_embed = nn.Sequential(
            nn.Linear(attr_dim, 112),
            nn.ReLU(),
            nn.Dropout(0.05),
            nn.Linear(112, 224),
            nn.ReLU(),
            nn.Linear(224, 448)
        )
        
        self.conv_in = nn.Conv2d(in_channels, base_channels, 3, padding=1)
        
        self.down_blocks = nn.ModuleList([
            self._make_down_block(base_channels, base_channels * 2),
            self._make_down_block(base_channels * 2, base_channels * 4),
            self._make_down_block(base_channels * 4, base_channels * 8),
            self._make_down_block(base_channels * 8, base_channels * 8)
        ])
        
        self.mid_block = self._make_conv_block(base_channels * 8 + 448, base_channels * 8)
        
        self.up_blocks = nn.ModuleList([
            nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
                self._make_conv_block(base_channels * 8 + base_channels * 8, base_channels * 8)
            ),
            nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
                self._make_conv_block(base_channels * 8 + base_channels * 4, base_channels * 4)
            ),
            nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
                self._make_conv_block(base_channels * 4 + base_channels * 2, base_channels * 2)
            ),
            nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
                self._make_conv_block(base_channels * 2 + base_channels, base_channels)
            )
        ])
        
        self.conv_out = nn.Sequential(
            nn.GroupNorm(8, base_channels),
            nn.SiLU(),
            nn.Conv2d(base_channels, out_channels, 3, padding=1)
        )
    
    def _make_conv_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.GroupNorm(min(32, max(1, out_ch//4)), out_ch),
            nn.SiLU(),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.GroupNorm(min(32, max(1, out_ch//4)), out_ch),
            nn.SiLU()
        )
    
    def _make_down_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.MaxPool2d(2),
            self._make_conv_block(in_ch, out_ch)
        )
    
    def get_time_embedding(self, timesteps):
        half_dim = self.time_embed_dim // 2
        embeddings = math.log(10000) / (half_dim - 1)
        embeddings = torch.exp(torch.arange(half_dim, device=timesteps.device) * -embeddings)
        embeddings = timesteps[:, None] * embeddings[None, :]
        embeddings = torch.cat([torch.sin(embeddings), torch.cos(embeddings)], dim=1)
        return self.time_embed(embeddings)
    
    def forward(self, x, timesteps, attributes):
        t_emb = self.get_time_embedding(timesteps)
        attr_emb = self.attr_embed(attributes)
        
        combined_emb = t_emb + attr_emb
        
        x = self.conv_in(x)
        skip_connections = [x]
        
        for down_block in self.down_blocks:
            x = down_block(x)
            skip_connections.append(x)
        
        attr_spatial = combined_emb.unsqueeze(-1).unsqueeze(-1)
        attr_spatial = attr_spatial.expand(-1, -1, x.shape[2], x.shape[3])
        x = torch.cat([x, attr_spatial], dim=1)
        
        x = self.mid_block(x)
        
        skip_connections = skip_connections[:-1]
        skip_connections = skip_connections[::-1]
        
        for i, (up_block, skip) in enumerate(zip(self.up_blocks, skip_connections)):
            x = up_block[0](x)
            
            if x.shape[2:] != skip.shape[2:]:
                x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
            
            x = torch.cat([x, skip], dim=1)
            x = up_block[1](x)
        
        return self.conv_out(x)


class CartoonifyProcessor:
    def __init__(self, model_path="wizcodes12/image_to_cartoonify"):
        self.device = device
        self.model = None
        self.noise_scheduler = None
        self.mp_extractor = OptimizedMediaPipeExtractor()
        self.load_model(model_path)
    
    def load_model(self, model_path):
        try:
            from huggingface_hub import hf_hub_download
            
            model_file = hf_hub_download(
                repo_id=model_path,
                filename="image_to_cartoonify.pt",
                repo_type="model"
            )
            
            checkpoint = torch.load(model_file, map_location=self.device)
            
            self.model = OptimizedConditionedUNet(
                in_channels=3,
                out_channels=3,
                attr_dim=18,
                base_channels=64
            ).to(self.device)
            
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.model.eval()
            
            self.noise_scheduler = DDPMScheduler(
                num_train_timesteps=1000,
                beta_start=0.00085,
                beta_end=0.012,
                beta_schedule="scaled_linear",
                prediction_type="epsilon"
            )
            
            print("Model loaded successfully!")
            
        except Exception as e:
            print(f"Error loading model: {e}")
            raise
    
    def process_image(self, image):
        if image is None:
            return None
        
        try:
            if isinstance(image, np.ndarray):
                image = Image.fromarray(image)
            
            image = image.resize((256, 256))
            image_np = np.array(image)
            
            features = self.mp_extractor.extract_features(image_np)
            features = features.unsqueeze(0).to(self.device)
            
            with torch.no_grad():
                generated_image = torch.randn(1, 3, 256, 256).to(self.device)
                
                num_inference_steps = 25 if self.device.type == 'cuda' else 15
                self.noise_scheduler.set_timesteps(num_inference_steps)
                
                for i, t in enumerate(self.noise_scheduler.timesteps):
                    timesteps = torch.full((1,), t, device=self.device).long()
                    noise_pred = self.model(generated_image, timesteps, features)
                    generated_image = self.noise_scheduler.step(noise_pred, t, generated_image).prev_sample
                
                generated_image = (generated_image / 2 + 0.5).clamp(0, 1)
                generated_image = generated_image.cpu().squeeze(0).permute(1, 2, 0).numpy()
                generated_image = (generated_image * 255).astype(np.uint8)
                
                return Image.fromarray(generated_image)
                
        except Exception as e:
            print(f"Error processing image: {e}")
            return None


processor = CartoonifyProcessor()

def create_interface():
    with gr.Blocks(title="Image to Cartoonify - wizcodes12", theme=gr.themes.Soft()) as demo:
        
        gr.HTML("""
        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
            <h1 style="color: white; font-size: 2.5em; margin-bottom: 10px;">🎨 Image to Cartoonify</h1>
            <p style="color: rgba(255,255,255,0.9); font-size: 1.2em;">by wizcodes12</p>
            <p style="color: rgba(255,255,255,0.8);">Transform your photos into stunning cartoon avatars with AI</p>
        </div>
        """)
        
        with gr.Row():
            with gr.Column():
                gr.Markdown("### 📸 Upload Your Photo")
                input_image = gr.Image(
                    label="Upload Image",
                    type="pil",
                    height=300
                )
                
                process_btn = gr.Button(
                    "🎨 Convert to Cartoon",
                    variant="primary",
                    size="lg"
                )
                
                gr.Markdown("### 📋 Instructions")
                gr.Markdown("""
                1. Upload a clear photo with a visible face
                2. Click 'Convert to Cartoon' button
                3. Wait for AI processing (15-30 seconds)
                4. Download your cartoon avatar
                """)
            
            with gr.Column():
                gr.Markdown("### 🎭 Cartoon Result")
                output_image = gr.Image(
                    label="Cartoon Avatar",
                    type="pil",
                    height=400
                )
                
                status = gr.Textbox(
                    label="Status",
                    value="Ready to process your image!",
                    interactive=False
                )
        
        gr.HTML("""
        <div style="text-align: center; padding: 20px; margin-top: 20px; background: rgba(0,0,0,0.05); border-radius: 10px;">
            <p><strong>🚀 Powered by:</strong> Advanced AI Neural Networks</p>
            <p><strong>🎯 Features:</strong> Face Detection • Feature Extraction • Cartoon Style Transfer</p>
            <p><strong>👨‍💻 Developer:</strong> wizcodes12</p>
        </div>
        """)
        
        def process_image_wrapper(image):
            if image is None:
                return None, "Please upload an image first!"
            
            try:
                status_msg = "Processing image... Please wait."
                result = processor.process_image(image)
                
                if result is not None:
                    return result, "Cartoon generated successfully! 🎉"
                else:
                    return None, "Error: Could not process image. Please try another image."
                    
            except Exception as e:
                return None, f"Error: {str(e)}"
        
        process_btn.click(
            fn=process_image_wrapper,
            inputs=[input_image],
            outputs=[output_image, status]
        )
        
        input_image.change(
            fn=lambda x: "Image uploaded! Click 'Convert to Cartoon' to process." if x is not None else "Please upload an image.",
            inputs=[input_image],
            outputs=[status]
        )
    
    return demo

if __name__ == "__main__":
    print("🚀 Starting Image to Cartoonify by wizcodes12...")
    print(f"Device: {device}")
    
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )