#pytorch import tkinter as tk from tkinter import ttk from PIL import Image, ImageTk import cv2 import threading # Your existing imports and code... # Tkinter window setup root = tk.Tk() root.title("Face Recognition System") # Frame for displaying the video stream frame = ttk.Frame(root, padding="3 3 12 12") frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # Label to display the video video_label = ttk.Label(frame) video_label.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S)) def update_video(): _, frame = cap.read() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) im = Image.fromarray(frame) img = ImageTk.PhotoImage(image=im) video_label.imgtk = img video_label.configure(image=img) video_label.after(10, update_video) def run_face_recognition(): # Your face recognition code here # This function should be run in a separate thread from concurrent.futures import thread from sqlalchemy import null import torch from torchvision import transforms import time from threading import Thread #other lib import sys import numpy as np import os import cv2 import csv import datetime sys.path.insert(0, "yolov5_face") from models.experimental import attempt_load from utils.datasets import letterbox from utils.general import check_img_size, non_max_suppression_face, scale_coords # Check device device = torch.device("cpu") # Get model detect ## Case 1: # model = attempt_load("yolov5_face/yolov5s-face.pt", map_location=device) ## Case 2: model = attempt_load("yolov5_face/yolov5m-face.pt", map_location=device) # Get model recognition ## Case 1: from insightface.insight_face import iresnet100 weight = torch.load("insightface/resnet100_backbone.pth", map_location = device) model_emb = iresnet100() ## Case 2: #from insightface.insight_face import iresnet18 #weight = torch.load("insightface/resnet18_backbone.pth", map_location = device) #model_emb = iresnet18() model_emb.load_state_dict(weight) model_emb.to(device) model_emb.eval() detected_faces = [] face_preprocess = transforms.Compose([ transforms.ToTensor(), # input PIL => (3,56,56), /255.0 transforms.Resize((112, 112)), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) isThread = True score = 0 name = null csv_filename = "recognized_faces.csv" recognized_names = [] # Resize image def resize_image(img0, img_size): h0, w0 = img0.shape[:2] # orig hw r = img_size / max(h0, w0) # resize image to img_size if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp) imgsz = check_img_size(img_size, s=model.stride.max()) # check img_size img = letterbox(img0, new_shape=imgsz)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 img = torch.from_numpy(img).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return img def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding coords[:, :10] /= gain #clip_coords(coords, img0_shape) coords[:, 0].clamp_(0, img0_shape[1]) # x1 coords[:, 1].clamp_(0, img0_shape[0]) # y1 coords[:, 2].clamp_(0, img0_shape[1]) # x2 coords[:, 3].clamp_(0, img0_shape[0]) # y2 coords[:, 4].clamp_(0, img0_shape[1]) # x3 coords[:, 5].clamp_(0, img0_shape[0]) # y3 coords[:, 6].clamp_(0, img0_shape[1]) # x4 coords[:, 7].clamp_(0, img0_shape[0]) # y4 coords[:, 8].clamp_(0, img0_shape[1]) # x5 coords[:, 9].clamp_(0, img0_shape[0]) # y5 return coords def get_face(input_image): # Parameters size_convert = 128 conf_thres = 0.4 iou_thres = 0.5 # Resize image img = resize_image(input_image.copy(), size_convert) # Via yolov5-face with torch.no_grad(): pred = model(img[None, :])[0] # Apply NMS det = non_max_suppression_face(pred, conf_thres, iou_thres)[0] bboxs = np.int32(scale_coords(img.shape[1:], det[:, :4], input_image.shape).round().cpu().numpy()) landmarks = np.int32(scale_coords_landmarks(img.shape[1:], det[:, 5:15], input_image.shape).round().cpu().numpy()) return bboxs, landmarks def get_feature(face_image, training = True): # Convert to RGB face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB) # Preprocessing image BGR face_image = face_preprocess(face_image).to(device) # Via model to get feature with torch.no_grad(): if training: emb_img_face = model_emb(face_image[None, :])[0].cpu().numpy() else: emb_img_face = model_emb(face_image[None, :]).cpu().numpy() # Convert to array images_emb = emb_img_face/np.linalg.norm(emb_img_face) return images_emb def read_features(root_fearure_path = "static/feature/face_features.npz"): data = np.load(root_fearure_path, allow_pickle=True) images_name = data["arr1"] images_emb = data["arr2"] return images_name, images_emb def recognition(face_image, index): global recognized_names # Use the global list to maintain recognized names # Get feature from face query_emb = (get_feature(face_image, training=False)) # Read features images_names, images_embs = read_features() scores = (query_emb @ images_embs.T)[0] id_min = np.argmax(scores) score = scores[id_min] name = images_names[id_min] # Set the caption based on the score if score < 0.35: caption = "UNKNOWN" else: caption = name # Save the recognized face to the CSV file if score >= 0.35: if caption not in recognized_names: recognized_names.append(caption) # Save the recognized face to the CSV file now = datetime.datetime.now() date = now.strftime("%Y-%m-%d") time = now.strftime("%H:%M:%S") with open(csv_filename, 'a', newline='') as file: writer = csv.writer(file) writer.writerow([caption, date, time]) print(f"Face {index}: Score: {score:.2f}, Name: {caption}") return score, caption def create_csv_file(filename): with open(filename, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(["Name", "Date", "Time"]) # Create the CSV file if it doesn't exist if not os.path.exists(csv_filename): create_csv_file(csv_filename) def recognize_from_images(image_folder): if not os.path.exists(image_folder): print(f"Image folder '{image_folder}' doesn't exist.") return for image_name in os.listdir(image_folder): if image_name.endswith(("png", 'jpg', 'jpeg')): image_path = os.path.join(image_folder, image_name) input_image = cv2.imread(image_path) # Get faces bboxs, _ = get_face(input_image) # Get boxes for i, (x1, y1, x2, y2) in enumerate(bboxs): face_image = input_image[y1:y2, x1:x2] recognition(face_image, i) def main(): # Check if "test_image" folder is empty or not test_image_folder = "test_image" if os.path.exists(test_image_folder) and any( image_name.endswith(("png", 'jpg', 'jpeg')) for image_name in os.listdir(test_image_folder) ): # Recognize faces from images in the folder recognize_from_images(test_image_folder) else: # Recognize faces from the camera cap = cv2.VideoCapture(0) start = time.time_ns() frame_count = 0 fps = -1 # Start the face recognition thread thread = threading.Thread(target=run_face_recognition) thread.start() # Start the video update loop update_video() # Start the Tkinter main loop root.mainloop() # Make sure to release resources cap.release() cv2.destroyAllWindows() # Save video frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) size = (frame_width, frame_height) video = cv2.VideoWriter('./static/results/face-recognition2.mp4',cv2.VideoWriter_fourcc(*'mp4v'), 6, size) # Read until video is completed while(True): # Capture frame-by-frame _, frame = cap.read() # Get faces bboxs, landmarks = get_face(frame) h, w, c = frame.shape tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness clors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255)] # Get boxs for i, (x1, y1, x2, y2) in enumerate(bboxs): # Get location face x1, y1, x2, y2 = bboxs[i] cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2) # Landmarks for x in range(5): point_x = int(landmarks[i][2 * x]) point_y = int(landmarks[i][2 * x + 1]) cv2.circle(frame, (point_x, point_y), tl+1, clors[x], -1) # Recognition face_image = frame[y1:y2, x1:x2] recognition(face_image, i) # Draw the name and score if i < len(detected_faces): score, name = detected_faces[i] if score < 0.25 or name is None: caption = "UN_KNOWN" else: caption = f"{name.split('_')[0].upper()}:{score:.2f}" t_size = cv2.getTextSize(caption, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.rectangle(frame, (x1, y1), (x1 + t_size[0], y1 + t_size[1]), (0, 146, 230), -1) cv2.putText(frame, caption, (x1, y1 + t_size[1]), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) # Count fps frame_count += 1 if frame_count >= 30: end = time.time_ns() fps = 1e9 * frame_count / (end - start) frame_count = 0 start = time.time_ns() if fps > 0: fps_label = "FPS: %.2f" % fps cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) video.write(frame) cv2.imshow("Face Recognition", frame) # Press Q on keyboard to exit if cv2.waitKey(25) & 0xFF == ord('q'): break video.release() cap.release() cv2.destroyAllWindows() cv2.waitKey(0) if __name__=="__main__": main()