test / recognize.py
Surya152002's picture
Upload 12 files
40f2021
#pytorch
import tkinter as tk
from tkinter import ttk
from PIL import Image, ImageTk
import cv2
import threading
# Your existing imports and code...
# Tkinter window setup
root = tk.Tk()
root.title("Face Recognition System")
# Frame for displaying the video stream
frame = ttk.Frame(root, padding="3 3 12 12")
frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# Label to display the video
video_label = ttk.Label(frame)
video_label.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
def update_video():
_, frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
im = Image.fromarray(frame)
img = ImageTk.PhotoImage(image=im)
video_label.imgtk = img
video_label.configure(image=img)
video_label.after(10, update_video)
def run_face_recognition():
# Your face recognition code here
# This function should be run in a separate thread
from concurrent.futures import thread
from sqlalchemy import null
import torch
from torchvision import transforms
import time
from threading import Thread
#other lib
import sys
import numpy as np
import os
import cv2
import csv
import datetime
sys.path.insert(0, "yolov5_face")
from models.experimental import attempt_load
from utils.datasets import letterbox
from utils.general import check_img_size, non_max_suppression_face, scale_coords
# Check device
device = torch.device("cpu")
# Get model detect
## Case 1:
# model = attempt_load("yolov5_face/yolov5s-face.pt", map_location=device)
## Case 2:
model = attempt_load("yolov5_face/yolov5m-face.pt", map_location=device)
# Get model recognition
## Case 1:
from insightface.insight_face import iresnet100
weight = torch.load("insightface/resnet100_backbone.pth", map_location = device)
model_emb = iresnet100()
## Case 2:
#from insightface.insight_face import iresnet18
#weight = torch.load("insightface/resnet18_backbone.pth", map_location = device)
#model_emb = iresnet18()
model_emb.load_state_dict(weight)
model_emb.to(device)
model_emb.eval()
detected_faces = []
face_preprocess = transforms.Compose([
transforms.ToTensor(), # input PIL => (3,56,56), /255.0
transforms.Resize((112, 112)),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
isThread = True
score = 0
name = null
csv_filename = "recognized_faces.csv"
recognized_names = []
# Resize image
def resize_image(img0, img_size):
h0, w0 = img0.shape[:2] # orig hw
r = img_size / max(h0, w0) # resize image to img_size
if r != 1: # always resize down, only resize up if training with augmentation
interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)
imgsz = check_img_size(img_size, s=model.stride.max()) # check img_size
img = letterbox(img0, new_shape=imgsz)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416
img = torch.from_numpy(img).to(device)
img = img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return img
def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding
coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding
coords[:, :10] /= gain
#clip_coords(coords, img0_shape)
coords[:, 0].clamp_(0, img0_shape[1]) # x1
coords[:, 1].clamp_(0, img0_shape[0]) # y1
coords[:, 2].clamp_(0, img0_shape[1]) # x2
coords[:, 3].clamp_(0, img0_shape[0]) # y2
coords[:, 4].clamp_(0, img0_shape[1]) # x3
coords[:, 5].clamp_(0, img0_shape[0]) # y3
coords[:, 6].clamp_(0, img0_shape[1]) # x4
coords[:, 7].clamp_(0, img0_shape[0]) # y4
coords[:, 8].clamp_(0, img0_shape[1]) # x5
coords[:, 9].clamp_(0, img0_shape[0]) # y5
return coords
def get_face(input_image):
# Parameters
size_convert = 128
conf_thres = 0.4
iou_thres = 0.5
# Resize image
img = resize_image(input_image.copy(), size_convert)
# Via yolov5-face
with torch.no_grad():
pred = model(img[None, :])[0]
# Apply NMS
det = non_max_suppression_face(pred, conf_thres, iou_thres)[0]
bboxs = np.int32(scale_coords(img.shape[1:], det[:, :4], input_image.shape).round().cpu().numpy())
landmarks = np.int32(scale_coords_landmarks(img.shape[1:], det[:, 5:15], input_image.shape).round().cpu().numpy())
return bboxs, landmarks
def get_feature(face_image, training = True):
# Convert to RGB
face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
# Preprocessing image BGR
face_image = face_preprocess(face_image).to(device)
# Via model to get feature
with torch.no_grad():
if training:
emb_img_face = model_emb(face_image[None, :])[0].cpu().numpy()
else:
emb_img_face = model_emb(face_image[None, :]).cpu().numpy()
# Convert to array
images_emb = emb_img_face/np.linalg.norm(emb_img_face)
return images_emb
def read_features(root_fearure_path = "static/feature/face_features.npz"):
data = np.load(root_fearure_path, allow_pickle=True)
images_name = data["arr1"]
images_emb = data["arr2"]
return images_name, images_emb
def recognition(face_image, index):
global recognized_names # Use the global list to maintain recognized names
# Get feature from face
query_emb = (get_feature(face_image, training=False))
# Read features
images_names, images_embs = read_features()
scores = (query_emb @ images_embs.T)[0]
id_min = np.argmax(scores)
score = scores[id_min]
name = images_names[id_min]
# Set the caption based on the score
if score < 0.35:
caption = "UNKNOWN"
else:
caption = name
# Save the recognized face to the CSV file
if score >= 0.35:
if caption not in recognized_names:
recognized_names.append(caption)
# Save the recognized face to the CSV file
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
time = now.strftime("%H:%M:%S")
with open(csv_filename, 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([caption, date, time])
print(f"Face {index}: Score: {score:.2f}, Name: {caption}")
return score, caption
def create_csv_file(filename):
with open(filename, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["Name", "Date", "Time"])
# Create the CSV file if it doesn't exist
if not os.path.exists(csv_filename):
create_csv_file(csv_filename)
def recognize_from_images(image_folder):
if not os.path.exists(image_folder):
print(f"Image folder '{image_folder}' doesn't exist.")
return
for image_name in os.listdir(image_folder):
if image_name.endswith(("png", 'jpg', 'jpeg')):
image_path = os.path.join(image_folder, image_name)
input_image = cv2.imread(image_path)
# Get faces
bboxs, _ = get_face(input_image)
# Get boxes
for i, (x1, y1, x2, y2) in enumerate(bboxs):
face_image = input_image[y1:y2, x1:x2]
recognition(face_image, i)
def main():
# Check if "test_image" folder is empty or not
test_image_folder = "test_image"
if os.path.exists(test_image_folder) and any(
image_name.endswith(("png", 'jpg', 'jpeg'))
for image_name in os.listdir(test_image_folder)
):
# Recognize faces from images in the folder
recognize_from_images(test_image_folder)
else:
# Recognize faces from the camera
cap = cv2.VideoCapture(0)
start = time.time_ns()
frame_count = 0
fps = -1
# Start the face recognition thread
thread = threading.Thread(target=run_face_recognition)
thread.start()
# Start the video update loop
update_video()
# Start the Tkinter main loop
root.mainloop()
# Make sure to release resources
cap.release()
cv2.destroyAllWindows()
# Save video
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)
video = cv2.VideoWriter('./static/results/face-recognition2.mp4',cv2.VideoWriter_fourcc(*'mp4v'), 6, size)
# Read until video is completed
while(True):
# Capture frame-by-frame
_, frame = cap.read()
# Get faces
bboxs, landmarks = get_face(frame)
h, w, c = frame.shape
tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness
clors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255)]
# Get boxs
for i, (x1, y1, x2, y2) in enumerate(bboxs):
# Get location face
x1, y1, x2, y2 = bboxs[i]
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2)
# Landmarks
for x in range(5):
point_x = int(landmarks[i][2 * x])
point_y = int(landmarks[i][2 * x + 1])
cv2.circle(frame, (point_x, point_y), tl+1, clors[x], -1)
# Recognition
face_image = frame[y1:y2, x1:x2]
recognition(face_image, i)
# Draw the name and score
if i < len(detected_faces):
score, name = detected_faces[i]
if score < 0.25 or name is None:
caption = "UN_KNOWN"
else:
caption = f"{name.split('_')[0].upper()}:{score:.2f}"
t_size = cv2.getTextSize(caption, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
cv2.rectangle(frame, (x1, y1), (x1 + t_size[0], y1 + t_size[1]), (0, 146, 230), -1)
cv2.putText(frame, caption, (x1, y1 + t_size[1]), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
# Count fps
frame_count += 1
if frame_count >= 30:
end = time.time_ns()
fps = 1e9 * frame_count / (end - start)
frame_count = 0
start = time.time_ns()
if fps > 0:
fps_label = "FPS: %.2f" % fps
cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
video.write(frame)
cv2.imshow("Face Recognition", frame)
# Press Q on keyboard to exit
if cv2.waitKey(25) & 0xFF == ord('q'):
break
video.release()
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(0)
if __name__=="__main__":
main()