#pytorch import torch from torchvision import transforms #other lib import sys import numpy as np import os import cv2 import time sys.path.insert(0, "yolov5_face") from models.experimental import attempt_load from utils.datasets import letterbox from utils.general import check_img_size, non_max_suppression_face, scale_coords # Check device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Get model detect ## Case 1: # model = attempt_load("yolov5_face/yolov5s-face.pt", map_location=device) ## Case 2: model = attempt_load("yolov5_face/yolov5n-0.5.pt", map_location=device) # Resize image def resize_image(img0, img_size): h0, w0 = img0.shape[:2] # orig hw r = img_size / max(h0, w0) # resize image to img_size if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp) imgsz = check_img_size(img_size, s=model.stride.max()) # check img_size img = letterbox(img0, new_shape=imgsz)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 img = torch.from_numpy(img).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return img def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding coords[:, :10] /= gain #clip_coords(coords, img0_shape) coords[:, 0].clamp_(0, img0_shape[1]) # x1 coords[:, 1].clamp_(0, img0_shape[0]) # y1 coords[:, 2].clamp_(0, img0_shape[1]) # x2 coords[:, 3].clamp_(0, img0_shape[0]) # y2 coords[:, 4].clamp_(0, img0_shape[1]) # x3 coords[:, 5].clamp_(0, img0_shape[0]) # y3 coords[:, 6].clamp_(0, img0_shape[1]) # x4 coords[:, 7].clamp_(0, img0_shape[0]) # y4 coords[:, 8].clamp_(0, img0_shape[1]) # x5 coords[:, 9].clamp_(0, img0_shape[0]) # y5 return coords def get_face(input_image): # Parameters size_convert = 128 conf_thres = 0.4 iou_thres = 0.5 # Resize image img = resize_image(input_image.copy(), size_convert) # Via yolov5-face with torch.no_grad(): pred = model(img[None, :])[0] # Apply NMS det = non_max_suppression_face(pred, conf_thres, iou_thres)[0] bboxs = np.int32(scale_coords(img.shape[1:], det[:, :4], input_image.shape).round().cpu().numpy()) landmarks = np.int32(scale_coords_landmarks(img.shape[1:], det[:, 5:15], input_image.shape).round().cpu().numpy()) return bboxs, landmarks def main(): # Open camera cap = cv2.VideoCapture(0) start = time.time_ns() frame_count = 0 fps = -1 # Save video frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) size = (frame_width, frame_height) video = cv2.VideoWriter('results/face-detection.mp4',cv2.VideoWriter_fourcc(*'mp4v'), 30, size) # Read until video is completed while(True): # Capture frame-by-frame _, frame = cap.read() # Get faces bboxs, landmarks = get_face(frame) h,w,c = frame.shape tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness clors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255)] # Get boxs for i in range(len(bboxs)): # Get location face x1, y1, x2, y2 = bboxs[i] cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2) # Landmarks for x in range(5): point_x = int(landmarks[i][2 * x]) point_y = int(landmarks[i][2 * x + 1]) cv2.circle(frame, (point_x, point_y), tl+1, clors[x], -1) # Count fps frame_count += 1 if frame_count >= 30: end = time.time_ns() fps = 1e9 * frame_count / (end - start) frame_count = 0 start = time.time_ns() if fps > 0: fps_label = "FPS: %.2f" % fps cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) #Save video video.write(frame) #Show result cv2.imshow("Face Detection", frame) # Press Q on keyboard to exit if cv2.waitKey(25) & 0xFF == ord('q'): break video.release() cap.release() cv2.destroyAllWindows() cv2.waitKey(0) if __name__=="__main__": main()