Spaces:

joyjonesmark
/

emotion-recognition

Sleeping

App Files Files Community

emotion-recognition / src /preprocessing /face_detector.py

joyjonesmark

Initial deploy with models

e5abc2e 7 days ago

raw

history blame contribute delete

9.8 kB

	"""
	Face detection using MTCNN for the Emotion Recognition System.
	"""
	import cv2
	import numpy as np
	from typing import List, Tuple, Optional
	from pathlib import Path
	from PIL import Image

	try:
	from mtcnn import MTCNN
	MTCNN_AVAILABLE = True
	except ImportError:
	MTCNN_AVAILABLE = False
	print("Warning: MTCNN not installed. Install with: pip install mtcnn")

	import sys
	sys.path.append(str(Path(__file__).parent.parent.parent))
	from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER


	class FaceDetector:
	"""
	Face detection and extraction using MTCNN.
	"""

	def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9):
	"""
	Initialize the face detector.

	Args:
	min_face_size: Minimum face size to detect
	confidence_threshold: Minimum confidence for face detection
	"""
	self.min_face_size = min_face_size
	self.confidence_threshold = confidence_threshold

	if MTCNN_AVAILABLE:
	try:
	# Try newer MTCNN API
	self.detector = MTCNN(min_face_size=min_face_size)
	except TypeError:
	try:
	# Try older MTCNN API without parameters
	self.detector = MTCNN()
	except Exception:
	self.detector = None
	else:
	self.detector = None
	# Fallback to OpenCV Haar Cascade
	cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	self.cascade = cv2.CascadeClassifier(cascade_path)

	def detect_faces(self, image: np.ndarray) -> List[dict]:
	"""
	Detect faces in an image.

	Args:
	image: Input image (BGR or RGB format)

	Returns:
	List of dictionaries with 'box' (x, y, w, h) and 'confidence'
	"""
	# Convert BGR to RGB if needed
	if len(image.shape) == 3 and image.shape[2] == 3:
	rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	else:
	rgb_image = image

	faces = []

	if self.detector is not None:
	# Use MTCNN
	detections = self.detector.detect_faces(rgb_image)
	for detection in detections:
	if detection['confidence'] >= self.confidence_threshold:
	faces.append({
	'box': detection['box'], # [x, y, width, height]
	'confidence': detection['confidence'],
	'keypoints': detection.get('keypoints', {})
	})
	else:
	# Fallback to Haar Cascade
	gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image
	detected = self.cascade.detectMultiScale(
	gray,
	scaleFactor=1.1,
	minNeighbors=5,
	minSize=(self.min_face_size, self.min_face_size)
	)
	for (x, y, w, h) in detected:
	faces.append({
	'box': [x, y, w, h],
	'confidence': 1.0, # Haar doesn't provide confidence
	'keypoints': {}
	})

	return faces

	def extract_face(
	self,
	image: np.ndarray,
	box: List[int],
	target_size: Tuple[int, int] = IMAGE_SIZE,
	margin: float = 0.2,
	to_grayscale: bool = True
	) -> np.ndarray:
	"""
	Extract and preprocess a face region from an image.

	Args:
	image: Input image
	box: Face bounding box [x, y, width, height]
	target_size: Target size for the extracted face
	margin: Margin to add around the face (fraction of face size)
	to_grayscale: Whether to convert to grayscale

	Returns:
	Preprocessed face image
	"""
	x, y, w, h = box

	# Add margin
	margin_x = int(w * margin)
	margin_y = int(h * margin)

	# Calculate new coordinates with margin
	x1 = max(0, x - margin_x)
	y1 = max(0, y - margin_y)
	x2 = min(image.shape[1], x + w + margin_x)
	y2 = min(image.shape[0], y + h + margin_y)

	# Extract face region
	face = image[y1:y2, x1:x2]

	# Convert to grayscale if needed
	if to_grayscale and len(face.shape) == 3:
	face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)

	# Resize to target size
	face = cv2.resize(face, target_size)

	return face

	def detect_and_extract(
	self,
	image: np.ndarray,
	target_size: Tuple[int, int] = IMAGE_SIZE,
	to_grayscale: bool = True,
	return_all: bool = False
	) -> Tuple[Optional[np.ndarray], List[dict]]:
	"""
	Detect faces and extract them from an image.

	Args:
	image: Input image
	target_size: Target size for extracted faces
	to_grayscale: Whether to convert to grayscale
	return_all: If True, return all faces; else return only the largest

	Returns:
	Tuple of (extracted_face(s), face_info)
	"""
	faces = self.detect_faces(image)

	if not faces:
	return None, []

	if return_all:
	extracted = []
	for face_info in faces:
	face = self.extract_face(
	image, face_info['box'],
	target_size=target_size,
	to_grayscale=to_grayscale
	)
	extracted.append(face)
	return extracted, faces
	else:
	# Return largest face
	largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3])
	face = self.extract_face(
	image, largest_face['box'],
	target_size=target_size,
	to_grayscale=to_grayscale
	)
	return face, [largest_face]

	def preprocess_for_model(
	self,
	face: np.ndarray,
	for_transfer_learning: bool = False
	) -> np.ndarray:
	"""
	Preprocess an extracted face for model prediction.

	Args:
	face: Extracted face image
	for_transfer_learning: If True, prepare for transfer learning models

	Returns:
	Preprocessed face ready for model input
	"""
	target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE

	# Resize if needed
	if face.shape[:2] != target_size:
	face = cv2.resize(face, target_size)

	# Normalize
	face = face.astype(np.float32) / 255.0

	# Add channel dimension if grayscale
	if len(face.shape) == 2:
	if for_transfer_learning:
	# Convert to RGB by repeating grayscale
	face = np.stack([face, face, face], axis=-1)
	else:
	face = np.expand_dims(face, axis=-1)

	# Add batch dimension
	face = np.expand_dims(face, axis=0)

	return face

	def draw_detections(
	self,
	image: np.ndarray,
	faces: List[dict],
	emotions: Optional[List[str]] = None,
	confidences: Optional[List[float]] = None
	) -> np.ndarray:
	"""
	Draw face detections and emotion labels on an image.

	Args:
	image: Input image
	faces: List of face detection results
	emotions: Optional list of emotion labels
	confidences: Optional list of confidence scores

	Returns:
	Image with drawn detections
	"""
	result = image.copy()

	for i, face_info in enumerate(faces):
	x, y, w, h = face_info['box']

	# Draw rectangle
	cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)

	# Draw emotion label if provided
	if emotions and i < len(emotions):
	label = emotions[i]
	if confidences and i < len(confidences):
	label = f"{label}: {confidences[i]:.2f}"

	# Draw label background
	(label_w, label_h), _ = cv2.getTextSize(
	label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2
	)
	cv2.rectangle(
	result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1
	)

	# Draw label text
	cv2.putText(
	result, label, (x, y - 5),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2
	)

	return result


	def load_image(image_path: str) -> np.ndarray:
	"""
	Load an image from file.

	Args:
	image_path: Path to the image file

	Returns:
	Image as numpy array (BGR format)
	"""
	image = cv2.imread(str(image_path))
	if image is None:
	raise ValueError(f"Could not load image: {image_path}")
	return image


	def load_image_pil(image_path: str) -> Image.Image:
	"""
	Load an image using PIL.

	Args:
	image_path: Path to the image file

	Returns:
	PIL Image object
	"""
	return Image.open(image_path)


	if __name__ == "__main__":
	# Test face detection
	import sys

	detector = FaceDetector()
	print(f"MTCNN available: {MTCNN_AVAILABLE}")
	print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}")