ChitranshSahu's picture
First Commit
7a77962
import cv2
import torch
import numpy as np
from PIL import Image
try:
from facenet_pytorch import MTCNN
except ImportError:
print("facenet_pytorch not installed. Please install it using: pip install facenet-pytorch")
MTCNN = None
class FaceDetector:
def __init__(self, device='cuda', image_size=224, margin=0):
self.device = device
if MTCNN is not None:
self.mtcnn = MTCNN(
image_size=image_size,
margin=margin,
keep_all=True, # We'll select the largest ourselves
device=device,
post_process=False # We want the raw image, not normalized
)
else:
self.mtcnn = None
def process_video(self, video_path, fps=5):
"""
Extract faces from video.
Returns a list of (face_rgb, frame_idx) tuples.
"""
if self.mtcnn is None:
raise ImportError("MTCNN not initialized.")
cap = cv2.VideoCapture(video_path)
video_fps = cap.get(cv2.CAP_PROP_FPS)
frame_interval = int(max(1, round(video_fps / fps)))
faces = []
frame_idx = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
if frame_idx % frame_interval == 0:
# Convert BGR to RGB
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(frame_rgb)
# Detect faces
boxes, _ = self.mtcnn.detect(pil_img)
if boxes is not None and len(boxes) > 0:
areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in boxes]
largest_idx = np.argmax(areas)
box = boxes[largest_idx]
b = [int(max(0, c)) for c in box]
face_img = frame_rgb[b[1]:b[3], b[0]:b[2]]
if face_img.size > 0:
face_img = cv2.resize(face_img, (224, 224))
faces.append(face_img)
frame_idx += 1
cap.release()
return faces
def process_image(self, image_path):
"""
Process a single image path.
"""
if self.mtcnn is None:
raise ImportError("MTCNN not initialized.")
frame = cv2.imread(image_path)
if frame is None:
return None
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(frame_rgb)
boxes, _ = self.mtcnn.detect(pil_img)
if boxes is not None and len(boxes) > 0:
areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in boxes]
largest_idx = np.argmax(areas)
box = boxes[largest_idx]
b = [int(max(0, c)) for c in box]
face_img = frame_rgb[b[1]:b[3], b[0]:b[2]]
if face_img.size > 0:
face_img = cv2.resize(face_img, (224, 224))
return face_img
return None