Spaces:
Sleeping
Sleeping
| """ | |
| Face detection using MTCNN for the Emotion Recognition System. | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from typing import List, Tuple, Optional | |
| from pathlib import Path | |
| from PIL import Image | |
| try: | |
| from mtcnn import MTCNN | |
| MTCNN_AVAILABLE = True | |
| except ImportError: | |
| MTCNN_AVAILABLE = False | |
| print("Warning: MTCNN not installed. Install with: pip install mtcnn") | |
| import sys | |
| sys.path.append(str(Path(__file__).parent.parent.parent)) | |
| from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER | |
| class FaceDetector: | |
| """ | |
| Face detection and extraction using MTCNN. | |
| """ | |
| def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9): | |
| """ | |
| Initialize the face detector. | |
| Args: | |
| min_face_size: Minimum face size to detect | |
| confidence_threshold: Minimum confidence for face detection | |
| """ | |
| self.min_face_size = min_face_size | |
| self.confidence_threshold = confidence_threshold | |
| if MTCNN_AVAILABLE: | |
| try: | |
| # Try newer MTCNN API | |
| self.detector = MTCNN(min_face_size=min_face_size) | |
| except TypeError: | |
| try: | |
| # Try older MTCNN API without parameters | |
| self.detector = MTCNN() | |
| except Exception: | |
| self.detector = None | |
| else: | |
| self.detector = None | |
| # Fallback to OpenCV Haar Cascade | |
| cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' | |
| self.cascade = cv2.CascadeClassifier(cascade_path) | |
| def detect_faces(self, image: np.ndarray) -> List[dict]: | |
| """ | |
| Detect faces in an image. | |
| Args: | |
| image: Input image (BGR or RGB format) | |
| Returns: | |
| List of dictionaries with 'box' (x, y, w, h) and 'confidence' | |
| """ | |
| # Convert BGR to RGB if needed | |
| if len(image.shape) == 3 and image.shape[2] == 3: | |
| rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| else: | |
| rgb_image = image | |
| faces = [] | |
| if self.detector is not None: | |
| # Use MTCNN | |
| detections = self.detector.detect_faces(rgb_image) | |
| for detection in detections: | |
| if detection['confidence'] >= self.confidence_threshold: | |
| faces.append({ | |
| 'box': detection['box'], # [x, y, width, height] | |
| 'confidence': detection['confidence'], | |
| 'keypoints': detection.get('keypoints', {}) | |
| }) | |
| else: | |
| # Fallback to Haar Cascade | |
| gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image | |
| detected = self.cascade.detectMultiScale( | |
| gray, | |
| scaleFactor=1.1, | |
| minNeighbors=5, | |
| minSize=(self.min_face_size, self.min_face_size) | |
| ) | |
| for (x, y, w, h) in detected: | |
| faces.append({ | |
| 'box': [x, y, w, h], | |
| 'confidence': 1.0, # Haar doesn't provide confidence | |
| 'keypoints': {} | |
| }) | |
| return faces | |
| def extract_face( | |
| self, | |
| image: np.ndarray, | |
| box: List[int], | |
| target_size: Tuple[int, int] = IMAGE_SIZE, | |
| margin: float = 0.2, | |
| to_grayscale: bool = True | |
| ) -> np.ndarray: | |
| """ | |
| Extract and preprocess a face region from an image. | |
| Args: | |
| image: Input image | |
| box: Face bounding box [x, y, width, height] | |
| target_size: Target size for the extracted face | |
| margin: Margin to add around the face (fraction of face size) | |
| to_grayscale: Whether to convert to grayscale | |
| Returns: | |
| Preprocessed face image | |
| """ | |
| x, y, w, h = box | |
| # Add margin | |
| margin_x = int(w * margin) | |
| margin_y = int(h * margin) | |
| # Calculate new coordinates with margin | |
| x1 = max(0, x - margin_x) | |
| y1 = max(0, y - margin_y) | |
| x2 = min(image.shape[1], x + w + margin_x) | |
| y2 = min(image.shape[0], y + h + margin_y) | |
| # Extract face region | |
| face = image[y1:y2, x1:x2] | |
| # Convert to grayscale if needed | |
| if to_grayscale and len(face.shape) == 3: | |
| face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) | |
| # Resize to target size | |
| face = cv2.resize(face, target_size) | |
| return face | |
| def detect_and_extract( | |
| self, | |
| image: np.ndarray, | |
| target_size: Tuple[int, int] = IMAGE_SIZE, | |
| to_grayscale: bool = True, | |
| return_all: bool = False | |
| ) -> Tuple[Optional[np.ndarray], List[dict]]: | |
| """ | |
| Detect faces and extract them from an image. | |
| Args: | |
| image: Input image | |
| target_size: Target size for extracted faces | |
| to_grayscale: Whether to convert to grayscale | |
| return_all: If True, return all faces; else return only the largest | |
| Returns: | |
| Tuple of (extracted_face(s), face_info) | |
| """ | |
| faces = self.detect_faces(image) | |
| if not faces: | |
| return None, [] | |
| if return_all: | |
| extracted = [] | |
| for face_info in faces: | |
| face = self.extract_face( | |
| image, face_info['box'], | |
| target_size=target_size, | |
| to_grayscale=to_grayscale | |
| ) | |
| extracted.append(face) | |
| return extracted, faces | |
| else: | |
| # Return largest face | |
| largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3]) | |
| face = self.extract_face( | |
| image, largest_face['box'], | |
| target_size=target_size, | |
| to_grayscale=to_grayscale | |
| ) | |
| return face, [largest_face] | |
| def preprocess_for_model( | |
| self, | |
| face: np.ndarray, | |
| for_transfer_learning: bool = False | |
| ) -> np.ndarray: | |
| """ | |
| Preprocess an extracted face for model prediction. | |
| Args: | |
| face: Extracted face image | |
| for_transfer_learning: If True, prepare for transfer learning models | |
| Returns: | |
| Preprocessed face ready for model input | |
| """ | |
| target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE | |
| # Resize if needed | |
| if face.shape[:2] != target_size: | |
| face = cv2.resize(face, target_size) | |
| # Normalize | |
| face = face.astype(np.float32) / 255.0 | |
| # Add channel dimension if grayscale | |
| if len(face.shape) == 2: | |
| if for_transfer_learning: | |
| # Convert to RGB by repeating grayscale | |
| face = np.stack([face, face, face], axis=-1) | |
| else: | |
| face = np.expand_dims(face, axis=-1) | |
| # Add batch dimension | |
| face = np.expand_dims(face, axis=0) | |
| return face | |
| def draw_detections( | |
| self, | |
| image: np.ndarray, | |
| faces: List[dict], | |
| emotions: Optional[List[str]] = None, | |
| confidences: Optional[List[float]] = None | |
| ) -> np.ndarray: | |
| """ | |
| Draw face detections and emotion labels on an image. | |
| Args: | |
| image: Input image | |
| faces: List of face detection results | |
| emotions: Optional list of emotion labels | |
| confidences: Optional list of confidence scores | |
| Returns: | |
| Image with drawn detections | |
| """ | |
| result = image.copy() | |
| for i, face_info in enumerate(faces): | |
| x, y, w, h = face_info['box'] | |
| # Draw rectangle | |
| cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
| # Draw emotion label if provided | |
| if emotions and i < len(emotions): | |
| label = emotions[i] | |
| if confidences and i < len(confidences): | |
| label = f"{label}: {confidences[i]:.2f}" | |
| # Draw label background | |
| (label_w, label_h), _ = cv2.getTextSize( | |
| label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2 | |
| ) | |
| cv2.rectangle( | |
| result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1 | |
| ) | |
| # Draw label text | |
| cv2.putText( | |
| result, label, (x, y - 5), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2 | |
| ) | |
| return result | |
| def load_image(image_path: str) -> np.ndarray: | |
| """ | |
| Load an image from file. | |
| Args: | |
| image_path: Path to the image file | |
| Returns: | |
| Image as numpy array (BGR format) | |
| """ | |
| image = cv2.imread(str(image_path)) | |
| if image is None: | |
| raise ValueError(f"Could not load image: {image_path}") | |
| return image | |
| def load_image_pil(image_path: str) -> Image.Image: | |
| """ | |
| Load an image using PIL. | |
| Args: | |
| image_path: Path to the image file | |
| Returns: | |
| PIL Image object | |
| """ | |
| return Image.open(image_path) | |
| if __name__ == "__main__": | |
| # Test face detection | |
| import sys | |
| detector = FaceDetector() | |
| print(f"MTCNN available: {MTCNN_AVAILABLE}") | |
| print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}") | |