""" Face detection using MTCNN for the Emotion Recognition System. """ import cv2 import numpy as np from typing import List, Tuple, Optional from pathlib import Path from PIL import Image try: from mtcnn import MTCNN MTCNN_AVAILABLE = True except ImportError: MTCNN_AVAILABLE = False print("Warning: MTCNN not installed. Install with: pip install mtcnn") import sys sys.path.append(str(Path(__file__).parent.parent.parent)) from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER class FaceDetector: """ Face detection and extraction using MTCNN. """ def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9): """ Initialize the face detector. Args: min_face_size: Minimum face size to detect confidence_threshold: Minimum confidence for face detection """ self.min_face_size = min_face_size self.confidence_threshold = confidence_threshold if MTCNN_AVAILABLE: try: # Try newer MTCNN API self.detector = MTCNN(min_face_size=min_face_size) except TypeError: try: # Try older MTCNN API without parameters self.detector = MTCNN() except Exception: self.detector = None else: self.detector = None # Fallback to OpenCV Haar Cascade cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' self.cascade = cv2.CascadeClassifier(cascade_path) def detect_faces(self, image: np.ndarray) -> List[dict]: """ Detect faces in an image. Args: image: Input image (BGR or RGB format) Returns: List of dictionaries with 'box' (x, y, w, h) and 'confidence' """ # Convert BGR to RGB if needed if len(image.shape) == 3 and image.shape[2] == 3: rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) else: rgb_image = image faces = [] if self.detector is not None: # Use MTCNN detections = self.detector.detect_faces(rgb_image) for detection in detections: if detection['confidence'] >= self.confidence_threshold: faces.append({ 'box': detection['box'], # [x, y, width, height] 'confidence': detection['confidence'], 'keypoints': detection.get('keypoints', {}) }) else: # Fallback to Haar Cascade gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image detected = self.cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(self.min_face_size, self.min_face_size) ) for (x, y, w, h) in detected: faces.append({ 'box': [x, y, w, h], 'confidence': 1.0, # Haar doesn't provide confidence 'keypoints': {} }) return faces def extract_face( self, image: np.ndarray, box: List[int], target_size: Tuple[int, int] = IMAGE_SIZE, margin: float = 0.2, to_grayscale: bool = True ) -> np.ndarray: """ Extract and preprocess a face region from an image. Args: image: Input image box: Face bounding box [x, y, width, height] target_size: Target size for the extracted face margin: Margin to add around the face (fraction of face size) to_grayscale: Whether to convert to grayscale Returns: Preprocessed face image """ x, y, w, h = box # Add margin margin_x = int(w * margin) margin_y = int(h * margin) # Calculate new coordinates with margin x1 = max(0, x - margin_x) y1 = max(0, y - margin_y) x2 = min(image.shape[1], x + w + margin_x) y2 = min(image.shape[0], y + h + margin_y) # Extract face region face = image[y1:y2, x1:x2] # Convert to grayscale if needed if to_grayscale and len(face.shape) == 3: face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) # Resize to target size face = cv2.resize(face, target_size) return face def detect_and_extract( self, image: np.ndarray, target_size: Tuple[int, int] = IMAGE_SIZE, to_grayscale: bool = True, return_all: bool = False ) -> Tuple[Optional[np.ndarray], List[dict]]: """ Detect faces and extract them from an image. Args: image: Input image target_size: Target size for extracted faces to_grayscale: Whether to convert to grayscale return_all: If True, return all faces; else return only the largest Returns: Tuple of (extracted_face(s), face_info) """ faces = self.detect_faces(image) if not faces: return None, [] if return_all: extracted = [] for face_info in faces: face = self.extract_face( image, face_info['box'], target_size=target_size, to_grayscale=to_grayscale ) extracted.append(face) return extracted, faces else: # Return largest face largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3]) face = self.extract_face( image, largest_face['box'], target_size=target_size, to_grayscale=to_grayscale ) return face, [largest_face] def preprocess_for_model( self, face: np.ndarray, for_transfer_learning: bool = False ) -> np.ndarray: """ Preprocess an extracted face for model prediction. Args: face: Extracted face image for_transfer_learning: If True, prepare for transfer learning models Returns: Preprocessed face ready for model input """ target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE # Resize if needed if face.shape[:2] != target_size: face = cv2.resize(face, target_size) # Normalize face = face.astype(np.float32) / 255.0 # Add channel dimension if grayscale if len(face.shape) == 2: if for_transfer_learning: # Convert to RGB by repeating grayscale face = np.stack([face, face, face], axis=-1) else: face = np.expand_dims(face, axis=-1) # Add batch dimension face = np.expand_dims(face, axis=0) return face def draw_detections( self, image: np.ndarray, faces: List[dict], emotions: Optional[List[str]] = None, confidences: Optional[List[float]] = None ) -> np.ndarray: """ Draw face detections and emotion labels on an image. Args: image: Input image faces: List of face detection results emotions: Optional list of emotion labels confidences: Optional list of confidence scores Returns: Image with drawn detections """ result = image.copy() for i, face_info in enumerate(faces): x, y, w, h = face_info['box'] # Draw rectangle cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2) # Draw emotion label if provided if emotions and i < len(emotions): label = emotions[i] if confidences and i < len(confidences): label = f"{label}: {confidences[i]:.2f}" # Draw label background (label_w, label_h), _ = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2 ) cv2.rectangle( result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1 ) # Draw label text cv2.putText( result, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2 ) return result def load_image(image_path: str) -> np.ndarray: """ Load an image from file. Args: image_path: Path to the image file Returns: Image as numpy array (BGR format) """ image = cv2.imread(str(image_path)) if image is None: raise ValueError(f"Could not load image: {image_path}") return image def load_image_pil(image_path: str) -> Image.Image: """ Load an image using PIL. Args: image_path: Path to the image file Returns: PIL Image object """ return Image.open(image_path) if __name__ == "__main__": # Test face detection import sys detector = FaceDetector() print(f"MTCNN available: {MTCNN_AVAILABLE}") print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}")