import PIL.Image
from PIL import ImageDraw, ImageFont
import gradio as gr
import numpy as np
from celeb_indicies import *

import PIL
import os
import platform
import time
from huggingface_hub import hf_hub_download
import huggingface_hub
from diffusers import DiffusionPipeline
import torch
from torch import nn
import torchvision

# By default, dlib will be compiled locally when installed via pip, which takes so long it 
# Causes huggingface to time out during the build process.
# To avoid this, check if we are running on a Linux system, and if so load a binary of dlib compiled for x86_64 Linux
if(platform.system() == 'Linux'):    
    os.system("pip install ./dlib-19.24.99-cp310-cp310-linux_x86_64.whl")
    os.system("pip install face-recognition")

start_time = time.time()
import face_detection
print(f"took {(time.time() - start_time) / 60} minutes to load face_recognition")


# Change these values to switch the model you are using and the name of the weights file in this model
model_repo_id = "CSSE416-final-project/faceRecogModel"
weight_file_id = "modelWeights101.bin"


# 1. Load the model from Hugging Face Hub
def load_model(repo_id):
    # Download the model weights from the repo
    weights_path = hf_hub_download(repo_id=model_repo_id, filename=weight_file_id)

    # Initialize the ResNet-18 architecture
    model = torchvision.models.resnet18(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 100)  # Adjust for your number of classes

    # Load the model weights
    state_dict = torch.load(weights_path, map_location=torch.device("cpu"))
    model.load_state_dict(state_dict)
    model.eval()  # Set the model to evaluation mode
    return model


# 2. Load model
model = load_model(model_repo_id)


# 3. Define how to transform image
transforms = torchvision.transforms.Compose(
[
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.ToTensor()
])


# 4. Preprocess and display the image
def process_image_str(groupImageFilePath: str):
    groupImage = PIL.Image.open(groupImageFilePath)    
    locations, images = face_detection.getCroppedImages(groupImage)
    groupImage_d = ImageDraw.ImageDraw(groupImage)
    font = ImageFont.truetype("Arial Bold.ttf", 30)
    
    labels = "| | Name | Certainty | \n | -------- | ------- | ------- |\n"
    n = 1
    
    for image, location in zip(images, locations):
        # Process the image
        intputTensor = transforms(image).unsqueeze(0)
        
        # Do AI stuff here and format output
        with torch.no_grad():
            outputs_t = model(intputTensor)
            cert, pred_t = torch.max(torch.softmax(outputs_t, dim=1), dim=1)
            groupImage_d.rectangle(location, outline=(0, 255, 0), width=2)
            groupImage_d.text((location[0] + 4, location[1] + 2), f"{n}", fill=(0, 255, 0), font=font)
            labels += f"| {n} | {celeb_list[pred_t.item()]} | {int(cert.item() * 100)}% | \n"
        
        n += 1
    
    return [gr.Image(groupImage), gr.Markdown(labels)]
    # return gr.Image(images[0])


# 5. Create the Gradio interface
interface = gr.Interface(
    fn=process_image_str,  # Function to process the image
    inputs=gr.Image(type='filepath', label="Input Image"),  # Upload input
    outputs=[gr.Image(label="Output"), gr.Markdown(label="Output Legend")],  # Display output
    allow_flagging='never',
    title="Celebrity Face Detector",
    description="Upload a picture of a celebrity or group of celebrities to identify them (ex. Jeff Bezos)"
)

# 6. Launch the app
if __name__ == "__main__":
    interface.launch()