#pytorch
import tkinter as tk
from tkinter import ttk
from PIL import Image, ImageTk
import cv2
import threading

# Your existing imports and code...

# Tkinter window setup
root = tk.Tk()
root.title("Face Recognition System")

# Frame for displaying the video stream
frame = ttk.Frame(root, padding="3 3 12 12")
frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))

# Label to display the video
video_label = ttk.Label(frame)
video_label.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))

def update_video():
    _, frame = cap.read()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    im = Image.fromarray(frame)
    img = ImageTk.PhotoImage(image=im)
    video_label.imgtk = img
    video_label.configure(image=img)
    video_label.after(10, update_video)

def run_face_recognition():
    # Your face recognition code here
    # This function should be run in a separate thread


from concurrent.futures import thread
from sqlalchemy import null
import torch
from torchvision import transforms
import time
from threading import Thread

#other lib
import sys
import numpy as np
import os
import cv2
import csv
import datetime

sys.path.insert(0, "yolov5_face")
from models.experimental import attempt_load
from utils.datasets import letterbox
from utils.general import check_img_size, non_max_suppression_face, scale_coords

# Check device
device = torch.device("cpu")

# Get model detect
## Case 1:
# model = attempt_load("yolov5_face/yolov5s-face.pt", map_location=device)

## Case 2:
model = attempt_load("yolov5_face/yolov5m-face.pt", map_location=device)

# Get model recognition
## Case 1: 
from insightface.insight_face import iresnet100
weight = torch.load("insightface/resnet100_backbone.pth", map_location = device)
model_emb = iresnet100()

## Case 2: 
#from insightface.insight_face import iresnet18
#weight = torch.load("insightface/resnet18_backbone.pth", map_location = device)
#model_emb = iresnet18()

model_emb.load_state_dict(weight)
model_emb.to(device)
model_emb.eval()
detected_faces = []

face_preprocess = transforms.Compose([
                                    transforms.ToTensor(), # input PIL => (3,56,56), /255.0
                                    transforms.Resize((112, 112)),
                                    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
                                    ])

isThread = True
score = 0
name = null

csv_filename = "recognized_faces.csv"
recognized_names = []
# Resize image
def resize_image(img0, img_size):
    h0, w0 = img0.shape[:2]  # orig hw
    r = img_size / max(h0, w0)  # resize image to img_size

    if r != 1:  # always resize down, only resize up if training with augmentation
        interp = cv2.INTER_AREA if r < 1  else cv2.INTER_LINEAR
        img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)

    imgsz = check_img_size(img_size, s=model.stride.max())  # check img_size
    img = letterbox(img0, new_shape=imgsz)[0]

    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1).copy()  # BGR to RGB, to 3x416x416

    img = torch.from_numpy(img).to(device)
    img = img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    
    return img

def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2, 4, 6, 8]] -= pad[0]  # x padding
    coords[:, [1, 3, 5, 7, 9]] -= pad[1]  # y padding
    coords[:, :10] /= gain
    #clip_coords(coords, img0_shape)
    coords[:, 0].clamp_(0, img0_shape[1])  # x1
    coords[:, 1].clamp_(0, img0_shape[0])  # y1
    coords[:, 2].clamp_(0, img0_shape[1])  # x2
    coords[:, 3].clamp_(0, img0_shape[0])  # y2
    coords[:, 4].clamp_(0, img0_shape[1])  # x3
    coords[:, 5].clamp_(0, img0_shape[0])  # y3
    coords[:, 6].clamp_(0, img0_shape[1])  # x4
    coords[:, 7].clamp_(0, img0_shape[0])  # y4
    coords[:, 8].clamp_(0, img0_shape[1])  # x5
    coords[:, 9].clamp_(0, img0_shape[0])  # y5
    return coords

def get_face(input_image):
    # Parameters
    size_convert = 128
    conf_thres = 0.4
    iou_thres = 0.5
    
    # Resize image
    img = resize_image(input_image.copy(), size_convert)

    # Via yolov5-face
    with torch.no_grad():
        pred = model(img[None, :])[0]

    # Apply NMS
    det = non_max_suppression_face(pred, conf_thres, iou_thres)[0]
    bboxs = np.int32(scale_coords(img.shape[1:], det[:, :4], input_image.shape).round().cpu().numpy())
    
    landmarks = np.int32(scale_coords_landmarks(img.shape[1:], det[:, 5:15], input_image.shape).round().cpu().numpy())    
    
    return bboxs, landmarks

def get_feature(face_image, training = True): 
    # Convert to RGB
    face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
    
    # Preprocessing image BGR
    face_image = face_preprocess(face_image).to(device)
    
    # Via model to get feature
    with torch.no_grad():
        if training:
            emb_img_face = model_emb(face_image[None, :])[0].cpu().numpy()
        else:
            emb_img_face = model_emb(face_image[None, :]).cpu().numpy()
    
    # Convert to array
    images_emb = emb_img_face/np.linalg.norm(emb_img_face)
    return images_emb

def read_features(root_fearure_path = "static/feature/face_features.npz"):
    data = np.load(root_fearure_path, allow_pickle=True)
    images_name = data["arr1"]
    images_emb = data["arr2"]
    
    return images_name, images_emb

def recognition(face_image, index):
    
    global recognized_names  # Use the global list to maintain recognized names
    # Get feature from face
    query_emb = (get_feature(face_image, training=False))
    
    # Read features
    images_names, images_embs = read_features()   

    scores = (query_emb @ images_embs.T)[0]

    id_min = np.argmax(scores)
    score = scores[id_min]
    name = images_names[id_min]
     # Set the caption based on the score
    if score < 0.35:
        caption = "UNKNOWN"
    else:
        caption = name

    # Save the recognized face to the CSV file
    if score >= 0.35:
        if caption not in recognized_names:
            recognized_names.append(caption)
        
            # Save the recognized face to the CSV file
            now = datetime.datetime.now()
            date = now.strftime("%Y-%m-%d")
            time = now.strftime("%H:%M:%S")
            
            with open(csv_filename, 'a', newline='') as file:
                writer = csv.writer(file)
                writer.writerow([caption, date, time])

    print(f"Face {index}: Score: {score:.2f}, Name: {caption}")
    return score, caption


def create_csv_file(filename):
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Name", "Date", "Time"])

# Create the CSV file if it doesn't exist
if not os.path.exists(csv_filename):
    create_csv_file(csv_filename)

def recognize_from_images(image_folder):
    if not os.path.exists(image_folder):
        print(f"Image folder '{image_folder}' doesn't exist.")
        return

    for image_name in os.listdir(image_folder):
        if image_name.endswith(("png", 'jpg', 'jpeg')):
            image_path = os.path.join(image_folder, image_name)
            input_image = cv2.imread(image_path)

            # Get faces
            bboxs, _ = get_face(input_image)

            # Get boxes
            for i, (x1, y1, x2, y2) in enumerate(bboxs):
                face_image = input_image[y1:y2, x1:x2]
                recognition(face_image, i)

def main():
     # Check if "test_image" folder is empty or not
    test_image_folder = "test_image"
    if os.path.exists(test_image_folder) and any(
        image_name.endswith(("png", 'jpg', 'jpeg'))
        for image_name in os.listdir(test_image_folder)
    ):
        # Recognize faces from images in the folder
        recognize_from_images(test_image_folder)
    else:
        # Recognize faces from the camera
        cap = cv2.VideoCapture(0)
    
    start = time.time_ns()
    frame_count = 0
    fps = -1
     # Start the face recognition thread
     thread = threading.Thread(target=run_face_recognition)
     thread.start()

     # Start the video update loop
     update_video()

     # Start the Tkinter main loop
     root.mainloop()

     # Make sure to release resources
     cap.release()
     cv2.destroyAllWindows()
    # Save video
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    
    size = (frame_width, frame_height)
    video = cv2.VideoWriter('./static/results/face-recognition2.mp4',cv2.VideoWriter_fourcc(*'mp4v'), 6, size)
    
    # Read until video is completed
    while(True):
        # Capture frame-by-frame
        _, frame = cap.read()
        
        # Get faces
        bboxs, landmarks = get_face(frame)
        h, w, c = frame.shape
        
        tl = 1 or round(0.002 * (h + w) / 2) + 1  # line/font thickness
        clors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255)]
        
        # Get boxs
        for i, (x1, y1, x2, y2) in enumerate(bboxs):
            # Get location face
            x1, y1, x2, y2 = bboxs[i]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2)
            
            # Landmarks
            for x in range(5):
                point_x = int(landmarks[i][2 * x])
                point_y = int(landmarks[i][2 * x + 1])
                cv2.circle(frame, (point_x, point_y), tl+1, clors[x], -1)
            
            # Recognition
            face_image = frame[y1:y2, x1:x2]
            recognition(face_image, i)
        
             # Draw the name and score
            if i < len(detected_faces):
                score, name = detected_faces[i]
                if score < 0.25 or name is None: 
                    caption = "UN_KNOWN"
                else:
                    caption = f"{name.split('_')[0].upper()}:{score:.2f}"

                t_size = cv2.getTextSize(caption, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]

                cv2.rectangle(frame, (x1, y1), (x1 + t_size[0], y1 + t_size[1]), (0, 146, 230), -1)
                cv2.putText(frame, caption, (x1, y1 + t_size[1]), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
                
        # Count fps 
        frame_count += 1
        
        if frame_count >= 30:
            end = time.time_ns()
            fps = 1e9 * frame_count / (end - start)
            frame_count = 0
            start = time.time_ns()
    
        if fps > 0:
            fps_label = "FPS: %.2f" % fps
            cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        video.write(frame)
        cv2.imshow("Face Recognition", frame)
        
        # Press Q on keyboard to  exit
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break  
    
    video.release()
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(0)

if __name__=="__main__":
    main()