joyjonesmark's picture
Initial deploy with models
e5abc2e
"""
Face detection using MTCNN for the Emotion Recognition System.
"""
import cv2
import numpy as np
from typing import List, Tuple, Optional
from pathlib import Path
from PIL import Image
try:
from mtcnn import MTCNN
MTCNN_AVAILABLE = True
except ImportError:
MTCNN_AVAILABLE = False
print("Warning: MTCNN not installed. Install with: pip install mtcnn")
import sys
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER
class FaceDetector:
"""
Face detection and extraction using MTCNN.
"""
def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9):
"""
Initialize the face detector.
Args:
min_face_size: Minimum face size to detect
confidence_threshold: Minimum confidence for face detection
"""
self.min_face_size = min_face_size
self.confidence_threshold = confidence_threshold
if MTCNN_AVAILABLE:
try:
# Try newer MTCNN API
self.detector = MTCNN(min_face_size=min_face_size)
except TypeError:
try:
# Try older MTCNN API without parameters
self.detector = MTCNN()
except Exception:
self.detector = None
else:
self.detector = None
# Fallback to OpenCV Haar Cascade
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
self.cascade = cv2.CascadeClassifier(cascade_path)
def detect_faces(self, image: np.ndarray) -> List[dict]:
"""
Detect faces in an image.
Args:
image: Input image (BGR or RGB format)
Returns:
List of dictionaries with 'box' (x, y, w, h) and 'confidence'
"""
# Convert BGR to RGB if needed
if len(image.shape) == 3 and image.shape[2] == 3:
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
rgb_image = image
faces = []
if self.detector is not None:
# Use MTCNN
detections = self.detector.detect_faces(rgb_image)
for detection in detections:
if detection['confidence'] >= self.confidence_threshold:
faces.append({
'box': detection['box'], # [x, y, width, height]
'confidence': detection['confidence'],
'keypoints': detection.get('keypoints', {})
})
else:
# Fallback to Haar Cascade
gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image
detected = self.cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(self.min_face_size, self.min_face_size)
)
for (x, y, w, h) in detected:
faces.append({
'box': [x, y, w, h],
'confidence': 1.0, # Haar doesn't provide confidence
'keypoints': {}
})
return faces
def extract_face(
self,
image: np.ndarray,
box: List[int],
target_size: Tuple[int, int] = IMAGE_SIZE,
margin: float = 0.2,
to_grayscale: bool = True
) -> np.ndarray:
"""
Extract and preprocess a face region from an image.
Args:
image: Input image
box: Face bounding box [x, y, width, height]
target_size: Target size for the extracted face
margin: Margin to add around the face (fraction of face size)
to_grayscale: Whether to convert to grayscale
Returns:
Preprocessed face image
"""
x, y, w, h = box
# Add margin
margin_x = int(w * margin)
margin_y = int(h * margin)
# Calculate new coordinates with margin
x1 = max(0, x - margin_x)
y1 = max(0, y - margin_y)
x2 = min(image.shape[1], x + w + margin_x)
y2 = min(image.shape[0], y + h + margin_y)
# Extract face region
face = image[y1:y2, x1:x2]
# Convert to grayscale if needed
if to_grayscale and len(face.shape) == 3:
face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
# Resize to target size
face = cv2.resize(face, target_size)
return face
def detect_and_extract(
self,
image: np.ndarray,
target_size: Tuple[int, int] = IMAGE_SIZE,
to_grayscale: bool = True,
return_all: bool = False
) -> Tuple[Optional[np.ndarray], List[dict]]:
"""
Detect faces and extract them from an image.
Args:
image: Input image
target_size: Target size for extracted faces
to_grayscale: Whether to convert to grayscale
return_all: If True, return all faces; else return only the largest
Returns:
Tuple of (extracted_face(s), face_info)
"""
faces = self.detect_faces(image)
if not faces:
return None, []
if return_all:
extracted = []
for face_info in faces:
face = self.extract_face(
image, face_info['box'],
target_size=target_size,
to_grayscale=to_grayscale
)
extracted.append(face)
return extracted, faces
else:
# Return largest face
largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3])
face = self.extract_face(
image, largest_face['box'],
target_size=target_size,
to_grayscale=to_grayscale
)
return face, [largest_face]
def preprocess_for_model(
self,
face: np.ndarray,
for_transfer_learning: bool = False
) -> np.ndarray:
"""
Preprocess an extracted face for model prediction.
Args:
face: Extracted face image
for_transfer_learning: If True, prepare for transfer learning models
Returns:
Preprocessed face ready for model input
"""
target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE
# Resize if needed
if face.shape[:2] != target_size:
face = cv2.resize(face, target_size)
# Normalize
face = face.astype(np.float32) / 255.0
# Add channel dimension if grayscale
if len(face.shape) == 2:
if for_transfer_learning:
# Convert to RGB by repeating grayscale
face = np.stack([face, face, face], axis=-1)
else:
face = np.expand_dims(face, axis=-1)
# Add batch dimension
face = np.expand_dims(face, axis=0)
return face
def draw_detections(
self,
image: np.ndarray,
faces: List[dict],
emotions: Optional[List[str]] = None,
confidences: Optional[List[float]] = None
) -> np.ndarray:
"""
Draw face detections and emotion labels on an image.
Args:
image: Input image
faces: List of face detection results
emotions: Optional list of emotion labels
confidences: Optional list of confidence scores
Returns:
Image with drawn detections
"""
result = image.copy()
for i, face_info in enumerate(faces):
x, y, w, h = face_info['box']
# Draw rectangle
cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Draw emotion label if provided
if emotions and i < len(emotions):
label = emotions[i]
if confidences and i < len(confidences):
label = f"{label}: {confidences[i]:.2f}"
# Draw label background
(label_w, label_h), _ = cv2.getTextSize(
label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2
)
cv2.rectangle(
result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1
)
# Draw label text
cv2.putText(
result, label, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2
)
return result
def load_image(image_path: str) -> np.ndarray:
"""
Load an image from file.
Args:
image_path: Path to the image file
Returns:
Image as numpy array (BGR format)
"""
image = cv2.imread(str(image_path))
if image is None:
raise ValueError(f"Could not load image: {image_path}")
return image
def load_image_pil(image_path: str) -> Image.Image:
"""
Load an image using PIL.
Args:
image_path: Path to the image file
Returns:
PIL Image object
"""
return Image.open(image_path)
if __name__ == "__main__":
# Test face detection
import sys
detector = FaceDetector()
print(f"MTCNN available: {MTCNN_AVAILABLE}")
print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}")