import torch
import torchvision
import numpy as np
import argparse
import copy
import cv2
import os
from contextlib import nullcontext
from huggingface_hub import hf_hub_download

from facenet_pytorch import MTCNN
from models import MobileGenerator, MobileNetV3MultiTask


class Face:
    def __init__(self, keypoint: list[tuple[int, int]]):
        self.keypoint = keypoint

        e0, e1, n, m0, m1 = keypoint
        x_ = e1 - e0
        y_ = 0.5 * (e0 + e1) - 0.5 * (m0 + m1)
        c = 0.5 * (e0 + e1) - 0.1 * y_
        cx, cy = int(c[0]), int(c[1])

        theta = np.arctan2(x_[1], x_[0])

        s = max(4.0 * np.linalg.norm(x_), 3.6 * np.linalg.norm(y_))
        s = int(s)

        # bbox: (x, y, w, h)
        self.bbox = (cx-s//2, cy-s//2, s, s)
        self.theta = theta

    def get_center(self):
        return self.bbox[0] + self.bbox[2] // 2, self.bbox[1] + self.bbox[3] // 2

    def get_size(self):
        return self.bbox[2]

    def set_attributes(self, age: int, gender: str):
        self.age = age
        self.gender = gender

    def update(self, keypoint: list[tuple[int, int]]):
        self.__init__(keypoint)

    def calc_iou(self, other) -> float:
        x1 = max(self.bbox[0], other.bbox[0])
        y1 = max(self.bbox[1], other.bbox[1])
        x2 = min(self.bbox[0] + self.bbox[2], other.bbox[0] + other.bbox[2])
        y2 = min(self.bbox[1] + self.bbox[3], other.bbox[1] + other.bbox[3])

        inter_area = max(0, x2 - x1) * max(0, y2 - y1)
        union_area = self.bbox[2] * self.bbox[3] + other.bbox[2] * other.bbox[3] - inter_area

        if union_area == 0:
            return 0.0
        return inter_area / union_area


class FaceSet:
    latent_ids = np.load(
        hf_hub_download(
            repo_id=os.getenv("HF_GEN_REPO_ID"),
            filename="latent_ids.npz",
            token=os.getenv("HF_HUB_TOKEN")
        )
    )

    def __init__(self):
        self.faces = []
        self.nonused_counter = []

    def append(self, face: Face):
        self.faces.append(face)
        self.nonused_counter.append(0)

    def set_attributes(self, i: int, age: int, gender: str):
        self.faces[i].set_attributes(age, gender)
        if age[0] == 80 and gender[0] == "M":
            age[0] = 70
        self.faces[i].latent_id = self.latent_ids[f"{age[0]}_{gender[0]}_jp"]

    def __len__(self) -> int:
        # s = sum(c == 0 for c in self.nonused_counter)
        # return s
        return len(self.faces)

    def __getitem__(self, idx: int) -> Face:
        return self.faces[idx]

    def __iter__(self):
        # s = sum(c == 0 for c in self.nonused_counter)
        # return iter(self.faces[:s])
        return iter(self.faces)

    def update(self, other, reset_nonused_threshold: int):
        matched_self_indices = []

        for i, other_face in enumerate(other):
            max_iou = 0
            max_j = -1
            for j, self_face in enumerate(self.faces):
                iou = other_face.calc_iou(self_face)
                if iou > max_iou:
                    max_iou = iou
                    max_j = j

            if max_iou > 0.3:
                self.faces[max_j].update(other_face.keypoint)
                self.nonused_counter[max_j] = 0
                matched_self_indices.append(max_j)
            else:
                self.append(other_face)
                matched_self_indices.append(len(self.faces)-1)

        for j in range(len(self.faces)):
            if j not in matched_self_indices:
                self.nonused_counter[j] += 1

        argsort = np.argsort(self.nonused_counter)[::-1]
        self.faces = [self.faces[j] for j in argsort]
        self.nonused_counter = [self.nonused_counter[j] for j in argsort]

        self.faces = [face for j, face in enumerate(self.faces) if self.nonused_counter[j] < reset_nonused_threshold]
        self.nonused_counter = [count for count in self.nonused_counter if count < reset_nonused_threshold]


class FaceCropper:
    def __init__(self):
        self.size = 256
        self.crop_size = 224
        self.detector = MTCNN(select_largest=False, keep_all=True, device="cuda" if torch.cuda.is_available() else "cpu")

        mask = np.zeros((self.crop_size, self.crop_size), dtype=np.uint8)
        mask[8:-8, 8:-8] = 255
        mask = cv2.GaussianBlur(mask, (31, 31), 0)
        self.mask = mask

    def detect_keypoints(self, image: np.ndarray) -> FaceSet:
        height, width = image.shape[:2]

        _, _, points = self.detector.detect(image, landmarks=True)

        faces_list = FaceSet()
        if points is None:
            return faces_list

        for i in range(len(points)):
            left_eye = points[i][0]
            right_eye = points[i][1]
            nose = points[i][2]
            left_mouth = points[i][3]
            right_mouth = points[i][4]

            faces_list.append(Face(keypoint=[left_eye, right_eye, nose, left_mouth, right_mouth]))

        return faces_list

    def crop_and_resize(self, image: np.ndarray, face: Face) -> np.ndarray:
        cx, cy = face.get_center()
        theta = face.theta
        s = face.get_size()

        M = cv2.getRotationMatrix2D((cx, cy), np.degrees(theta), self.size / s * 1.14)
        M[0, 2] += self.crop_size // 2 - cx
        M[1, 2] += self.crop_size // 2 - cy

        cropped = cv2.warpAffine(image, M, (self.crop_size, self.crop_size), flags=cv2.INTER_LINEAR)
        return cropped

    def invert_image(self, image: np.ndarray, cropped: np.ndarray, face: Face) -> np.ndarray:
        cx, cy = face.get_center()
        theta = face.theta
        s = face.get_size()

        x0 = max(0, int(np.floor(cx - s)))
        y0 = max(0, int(np.floor(cy - s)))
        x1 = min(image.shape[1], int(np.ceil(cx + s)))
        y1 = min(image.shape[0], int(np.ceil(cy + s)))

        if x0 >= x1 or y0 >= y1:
            return image

        cropped_image = image[y0:y1, x0:x1]
        cx_local = cx - x0
        cy_local = cy - y0

        M = cv2.getRotationMatrix2D((cx_local, cy_local), np.degrees(theta), self.size / s * 1.14)
        M[0, 2] += self.crop_size // 2 - cx_local
        M[1, 2] += self.crop_size // 2 - cy_local

        M_inv = cv2.invertAffineTransform(M)
        inverted = cv2.warpAffine(cropped, M_inv, (x1-x0, y1-y0), flags=cv2.INTER_LINEAR)

        mask = cv2.warpAffine(self.mask, M_inv, (x1-x0, y1-y0))
        mask = mask.astype(np.float32)[:, :, None] / 255.0

        blended = cropped_image.astype(np.float32) * (1 - mask) + inverted.astype(np.float32) * mask
        result = image.copy()
        result[y0:y1, x0:x1] = blended.astype(np.uint8)
        return result


class FaceSwapper:
    def __init__(self, model_path: str, classifier_checkpoint: str):
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

        self.generator = MobileGenerator(input_nc=3, output_nc=3, latent_dim=512, n_blocks=6)
        self.generator.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"), weights_only=False))
        self.generator.to(self.device).eval()

        self.classifier = MobileNetV3MultiTask(model_name="mobilenetv3_small_100", num_age_classes=10, num_gender_classes=2)
        self.classifier.to(self.device).eval()
        self.classifier.load_state_dict(torch.load(classifier_checkpoint, map_location=torch.device("cpu"), weights_only=False)["model_state_dict"])

        self.mean = torch.tensor([0.485, 0.456, 0.406]).reshape(1,3,1,1)
        self.std = torch.tensor([0.229, 0.224, 0.225]).reshape(1,3,1,1)

    def np2tensor(self, imgs: np.ndarray) -> torch.Tensor:
        if not isinstance(imgs, list):
            imgs = [imgs]

        imgs = np.stack(imgs, axis=0)
        imgs = torch.from_numpy(imgs.astype(np.float32) / 255).permute(0, 3, 1, 2)
        return (imgs - self.mean) / self.std

    def tensor2np(self, imgs: torch.Tensor) -> np.ndarray:
        imgs = imgs * self.std + self.mean
        imgs = imgs.permute(0, 2, 3, 1).detach().numpy()
        imgs = np.clip(imgs, 0, 1)
        return (imgs * 255).astype(np.uint8)

    def classify(self, img: np.ndarray) -> list[tuple[int, str]]:
        autocast_context = torch.autocast("cuda", torch.float16) if self.device.type == "cuda" else nullcontext()
        with torch.no_grad(), autocast_context:
            img_tensor = self.np2tensor(img).to(self.device)
            ages, genders = self.classifier(img_tensor)
            ages = torch.softmax(ages, dim=1)
            genders = torch.softmax(genders, dim=1)
            attributes = []
            for i in range(len(img_tensor)):
                age = ages[i].argmax().item() * 10
                age_logit = ages[i].max().item()
                gender = "F" if genders[i].argmax().item() == 0 else "M"
                gender_logit = genders[i].max().item()
                attributes.append(([age, age_logit], [gender, gender_logit]))
            return attributes

    def swap(self, img_att: np.ndarray, latent_ids: list[np.ndarray]) -> np.ndarray:
        autocast_context = torch.autocast("cuda", torch.float16) if self.device.type == "cuda" else nullcontext()
        with torch.no_grad(), autocast_context:
            img_att = self.np2tensor(img_att).to(self.device)
            latent_ids = torch.from_numpy(np.vstack(latent_ids)).to(self.device)

            output = self.generator(img_att, latent_ids)
            return self.tensor2np(output.to("cpu"))