import cv2
import numpy as np
import math
import torch
import torchvision.transforms as transforms
from PIL import Image

try:
    from facenet_pytorch import MTCNN
except ImportError:
    MTCNN = None

class GeometricAligner:
    """
    Module 1: Preprocessing & Facial Landmark Alignment Pipeline
    1. Extracts 5 facial landmarks using MTCNN (facenet_pytorch).
    2. Computes the orientation angle and executes an affine transformation to align eyes horizontally.
    3. Crops around the center of mass with a 10% outer padding margin.
    4. Resizes to 512x512 using bi-cubic interpolation and normalizes.
    """
    def __init__(self, device='cpu'):
        if MTCNN is None:
            raise ImportError("facenet_pytorch is required. Install via 'pip install facenet-pytorch'")
            
        # Initialize MTCNN for face and landmark detection
        self.device = device
        self.mtcnn = MTCNN(keep_all=False, device=self.device, margin=0, post_process=False)
        
        self.normalize = transforms.Compose([
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def align_and_crop(self, image_bgr: np.ndarray, return_tensor=True):
        """
        Executes the alignment and cropping pipeline on a BGR numpy image (e.g. from cv2.imread).
        If return_tensor=True, returns a normalized torch Tensor of shape [3, 512, 512].
        If return_tensor=False, returns an RGB numpy array of shape [512, 512, 3].
        Returns None if no face is detected.
        """
        # MTCNN works best with RGB PIL Images or numpy arrays
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(image_rgb)
        
        boxes, probs, landmarks = self.mtcnn.detect(pil_image, landmarks=True)
        if boxes is None or len(boxes) == 0:
            return None
        
        # Take the most prominent face (MTCNN keep_all=False returns the one with highest probability by default)
        # But detect returns arrays, so we take the first index
        bbox = boxes[0]
        pts = landmarks[0] # Shape: (5, 2)
        
        # 1. 5 primary facial landmarks
        # MTCNN landmarks: [left_eye, right_eye, nose, left_mouth, right_mouth]
        left_eye = pts[0]
        right_eye = pts[1]
        
        # 2. Compute orientation angle relative to the base plane
        dY = right_eye[1] - left_eye[1]
        dX = right_eye[0] - left_eye[0]
        angle = np.degrees(np.arctan2(dY, dX))
        
        # Determine center of mass of landmarks for rotation center
        center_of_mass = np.mean(pts, axis=0)
        center_x, center_y = int(center_of_mass[0]), int(center_of_mass[1])
        
        # Execute affine transformation matrix mapped around center of mass
        M = cv2.getRotationMatrix2D((center_x, center_y), angle, 1.0)
        h, w = image_bgr.shape[:2]
        rotated_img = cv2.warpAffine(image_bgr, M, (w, h), flags=cv2.INTER_CUBIC)
        
        # Re-detect on rotated image ensures accurate bounding box after rotation
        rotated_rgb = cv2.cvtColor(rotated_img, cv2.COLOR_BGR2RGB)
        r_boxes, r_probs, r_landmarks = self.mtcnn.detect(Image.fromarray(rotated_rgb), landmarks=True)
        
        if r_boxes is None or len(r_boxes) == 0:
            # Fallback to rotating the bounding box manually
            pts_box = np.array([
                [bbox[0], bbox[1], 1],
                [bbox[2], bbox[1], 1],
                [bbox[2], bbox[3], 1],
                [bbox[0], bbox[3], 1]
            ])
            pts_rot = M.dot(pts_box.T).T
            min_x, min_y = np.min(pts_rot[:, 0]), np.min(pts_rot[:, 1])
            max_x, max_y = np.max(pts_rot[:, 0]), np.max(pts_rot[:, 1])
            rotated_bbox = [min_x, min_y, max_x, max_y]
        else:
            # Use re-detected bounding box (most accurate)
            rotated_bbox = r_boxes[0]
            
        # 3. Isolate the cropping window boundaries with 10% outer padding margin
        bx1, by1, bx2, by2 = rotated_bbox
        bw = bx2 - bx1
        bh = by2 - by1
        
        pad_w = bw * 0.10
        pad_h = bh * 0.10
        
        cx = bx1 + bw / 2
        cy = by1 + bh / 2
        
        # Make the crop square to avoid distortion during 512x512 resize
        side = max(bw + 2 * pad_w, bh + 2 * pad_h)
        
        nx1 = int(max(0, cx - side / 2))
        ny1 = int(max(0, cy - side / 2))
        nx2 = int(min(w, cx + side / 2))
        ny2 = int(min(h, cy + side / 2))
        
        crop_img = rotated_img[ny1:ny2, nx1:nx2]
        
        # 4. Bi-cubic resizing to 512x512
        resized_img = cv2.resize(crop_img, (512, 512), interpolation=cv2.INTER_CUBIC)
        
        # Convert BGR to RGB
        resized_img_rgb = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
        
        if not return_tensor:
            return resized_img_rgb
            
        # Convert to Tensor, normalize to [0, 1] then ImageNet bounds
        tensor_img = torch.from_numpy(resized_img_rgb).permute(2, 0, 1).float() / 255.0
        normalized_img = self.normalize(tensor_img)
        
        return normalized_img