Spaces:
Sleeping
Sleeping
| import cv2 | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| try: | |
| from facenet_pytorch import MTCNN | |
| except ImportError: | |
| print("facenet_pytorch not installed. Please install it using: pip install facenet-pytorch") | |
| MTCNN = None | |
| class FaceDetector: | |
| def __init__(self, device='cuda', image_size=224, margin=0): | |
| self.device = device | |
| if MTCNN is not None: | |
| self.mtcnn = MTCNN( | |
| image_size=image_size, | |
| margin=margin, | |
| keep_all=True, # We'll select the largest ourselves | |
| device=device, | |
| post_process=False # We want the raw image, not normalized | |
| ) | |
| else: | |
| self.mtcnn = None | |
| def process_video(self, video_path, fps=5): | |
| """ | |
| Extract faces from video. | |
| Returns a list of (face_rgb, frame_idx) tuples. | |
| """ | |
| if self.mtcnn is None: | |
| raise ImportError("MTCNN not initialized.") | |
| cap = cv2.VideoCapture(video_path) | |
| video_fps = cap.get(cv2.CAP_PROP_FPS) | |
| frame_interval = int(max(1, round(video_fps / fps))) | |
| faces = [] | |
| frame_idx = 0 | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| if frame_idx % frame_interval == 0: | |
| # Convert BGR to RGB | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| pil_img = Image.fromarray(frame_rgb) | |
| # Detect faces | |
| boxes, _ = self.mtcnn.detect(pil_img) | |
| if boxes is not None and len(boxes) > 0: | |
| areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in boxes] | |
| largest_idx = np.argmax(areas) | |
| box = boxes[largest_idx] | |
| b = [int(max(0, c)) for c in box] | |
| face_img = frame_rgb[b[1]:b[3], b[0]:b[2]] | |
| if face_img.size > 0: | |
| face_img = cv2.resize(face_img, (224, 224)) | |
| faces.append(face_img) | |
| frame_idx += 1 | |
| cap.release() | |
| return faces | |
| def process_image(self, image_path): | |
| """ | |
| Process a single image path. | |
| """ | |
| if self.mtcnn is None: | |
| raise ImportError("MTCNN not initialized.") | |
| frame = cv2.imread(image_path) | |
| if frame is None: | |
| return None | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| pil_img = Image.fromarray(frame_rgb) | |
| boxes, _ = self.mtcnn.detect(pil_img) | |
| if boxes is not None and len(boxes) > 0: | |
| areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in boxes] | |
| largest_idx = np.argmax(areas) | |
| box = boxes[largest_idx] | |
| b = [int(max(0, c)) for c in box] | |
| face_img = frame_rgb[b[1]:b[3], b[0]:b[2]] | |
| if face_img.size > 0: | |
| face_img = cv2.resize(face_img, (224, 224)) | |
| return face_img | |
| return None | |