Spaces:

joyjonesmark
/

emotion-recognition

Sleeping

File size: 9,797 Bytes

e5abc2e

"""
Face detection using MTCNN for the Emotion Recognition System.
"""
import cv2
import numpy as np
from typing import List, Tuple, Optional
from pathlib import Path
from PIL import Image

try:
    from mtcnn import MTCNN
    MTCNN_AVAILABLE = True
except ImportError:
    MTCNN_AVAILABLE = False
    print("Warning: MTCNN not installed. Install with: pip install mtcnn")

import sys
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER


class FaceDetector:
    """
    Face detection and extraction using MTCNN.
    """
    
    def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9):
        """
        Initialize the face detector.
        
        Args:
            min_face_size: Minimum face size to detect
            confidence_threshold: Minimum confidence for face detection
        """
        self.min_face_size = min_face_size
        self.confidence_threshold = confidence_threshold
        
        if MTCNN_AVAILABLE:
            try:
                # Try newer MTCNN API
                self.detector = MTCNN(min_face_size=min_face_size)
            except TypeError:
                try:
                    # Try older MTCNN API without parameters
                    self.detector = MTCNN()
                except Exception:
                    self.detector = None
        else:
            self.detector = None
            # Fallback to OpenCV Haar Cascade
            cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            self.cascade = cv2.CascadeClassifier(cascade_path)
    
    def detect_faces(self, image: np.ndarray) -> List[dict]:
        """
        Detect faces in an image.
        
        Args:
            image: Input image (BGR or RGB format)
            
        Returns:
            List of dictionaries with 'box' (x, y, w, h) and 'confidence'
        """
        # Convert BGR to RGB if needed
        if len(image.shape) == 3 and image.shape[2] == 3:
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        else:
            rgb_image = image
        
        faces = []
        
        if self.detector is not None:
            # Use MTCNN
            detections = self.detector.detect_faces(rgb_image)
            for detection in detections:
                if detection['confidence'] >= self.confidence_threshold:
                    faces.append({
                        'box': detection['box'],  # [x, y, width, height]
                        'confidence': detection['confidence'],
                        'keypoints': detection.get('keypoints', {})
                    })
        else:
            # Fallback to Haar Cascade
            gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image
            detected = self.cascade.detectMultiScale(
                gray,
                scaleFactor=1.1,
                minNeighbors=5,
                minSize=(self.min_face_size, self.min_face_size)
            )
            for (x, y, w, h) in detected:
                faces.append({
                    'box': [x, y, w, h],
                    'confidence': 1.0,  # Haar doesn't provide confidence
                    'keypoints': {}
                })
        
        return faces
    
    def extract_face(
        self,
        image: np.ndarray,
        box: List[int],
        target_size: Tuple[int, int] = IMAGE_SIZE,
        margin: float = 0.2,
        to_grayscale: bool = True
    ) -> np.ndarray:
        """
        Extract and preprocess a face region from an image.
        
        Args:
            image: Input image
            box: Face bounding box [x, y, width, height]
            target_size: Target size for the extracted face
            margin: Margin to add around the face (fraction of face size)
            to_grayscale: Whether to convert to grayscale
            
        Returns:
            Preprocessed face image
        """
        x, y, w, h = box
        
        # Add margin
        margin_x = int(w * margin)
        margin_y = int(h * margin)
        
        # Calculate new coordinates with margin
        x1 = max(0, x - margin_x)
        y1 = max(0, y - margin_y)
        x2 = min(image.shape[1], x + w + margin_x)
        y2 = min(image.shape[0], y + h + margin_y)
        
        # Extract face region
        face = image[y1:y2, x1:x2]
        
        # Convert to grayscale if needed
        if to_grayscale and len(face.shape) == 3:
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
        
        # Resize to target size
        face = cv2.resize(face, target_size)
        
        return face
    
    def detect_and_extract(
        self,
        image: np.ndarray,
        target_size: Tuple[int, int] = IMAGE_SIZE,
        to_grayscale: bool = True,
        return_all: bool = False
    ) -> Tuple[Optional[np.ndarray], List[dict]]:
        """
        Detect faces and extract them from an image.
        
        Args:
            image: Input image
            target_size: Target size for extracted faces
            to_grayscale: Whether to convert to grayscale
            return_all: If True, return all faces; else return only the largest
            
        Returns:
            Tuple of (extracted_face(s), face_info)
        """
        faces = self.detect_faces(image)
        
        if not faces:
            return None, []
        
        if return_all:
            extracted = []
            for face_info in faces:
                face = self.extract_face(
                    image, face_info['box'],
                    target_size=target_size,
                    to_grayscale=to_grayscale
                )
                extracted.append(face)
            return extracted, faces
        else:
            # Return largest face
            largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3])
            face = self.extract_face(
                image, largest_face['box'],
                target_size=target_size,
                to_grayscale=to_grayscale
            )
            return face, [largest_face]
    
    def preprocess_for_model(
        self,
        face: np.ndarray,
        for_transfer_learning: bool = False
    ) -> np.ndarray:
        """
        Preprocess an extracted face for model prediction.
        
        Args:
            face: Extracted face image
            for_transfer_learning: If True, prepare for transfer learning models
            
        Returns:
            Preprocessed face ready for model input
        """
        target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE
        
        # Resize if needed
        if face.shape[:2] != target_size:
            face = cv2.resize(face, target_size)
        
        # Normalize
        face = face.astype(np.float32) / 255.0
        
        # Add channel dimension if grayscale
        if len(face.shape) == 2:
            if for_transfer_learning:
                # Convert to RGB by repeating grayscale
                face = np.stack([face, face, face], axis=-1)
            else:
                face = np.expand_dims(face, axis=-1)
        
        # Add batch dimension
        face = np.expand_dims(face, axis=0)
        
        return face
    
    def draw_detections(
        self,
        image: np.ndarray,
        faces: List[dict],
        emotions: Optional[List[str]] = None,
        confidences: Optional[List[float]] = None
    ) -> np.ndarray:
        """
        Draw face detections and emotion labels on an image.
        
        Args:
            image: Input image
            faces: List of face detection results
            emotions: Optional list of emotion labels
            confidences: Optional list of confidence scores
            
        Returns:
            Image with drawn detections
        """
        result = image.copy()
        
        for i, face_info in enumerate(faces):
            x, y, w, h = face_info['box']
            
            # Draw rectangle
            cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)
            
            # Draw emotion label if provided
            if emotions and i < len(emotions):
                label = emotions[i]
                if confidences and i < len(confidences):
                    label = f"{label}: {confidences[i]:.2f}"
                
                # Draw label background
                (label_w, label_h), _ = cv2.getTextSize(
                    label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2
                )
                cv2.rectangle(
                    result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1
                )
                
                # Draw label text
                cv2.putText(
                    result, label, (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2
                )
        
        return result


def load_image(image_path: str) -> np.ndarray:
    """
    Load an image from file.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        Image as numpy array (BGR format)
    """
    image = cv2.imread(str(image_path))
    if image is None:
        raise ValueError(f"Could not load image: {image_path}")
    return image


def load_image_pil(image_path: str) -> Image.Image:
    """
    Load an image using PIL.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        PIL Image object
    """
    return Image.open(image_path)


if __name__ == "__main__":
    # Test face detection
    import sys
    
    detector = FaceDetector()
    print(f"MTCNN available: {MTCNN_AVAILABLE}")
    print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}")