Spaces:
Sleeping
Sleeping
File size: 9,797 Bytes
e5abc2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
"""
Face detection using MTCNN for the Emotion Recognition System.
"""
import cv2
import numpy as np
from typing import List, Tuple, Optional
from pathlib import Path
from PIL import Image
try:
from mtcnn import MTCNN
MTCNN_AVAILABLE = True
except ImportError:
MTCNN_AVAILABLE = False
print("Warning: MTCNN not installed. Install with: pip install mtcnn")
import sys
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER
class FaceDetector:
"""
Face detection and extraction using MTCNN.
"""
def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9):
"""
Initialize the face detector.
Args:
min_face_size: Minimum face size to detect
confidence_threshold: Minimum confidence for face detection
"""
self.min_face_size = min_face_size
self.confidence_threshold = confidence_threshold
if MTCNN_AVAILABLE:
try:
# Try newer MTCNN API
self.detector = MTCNN(min_face_size=min_face_size)
except TypeError:
try:
# Try older MTCNN API without parameters
self.detector = MTCNN()
except Exception:
self.detector = None
else:
self.detector = None
# Fallback to OpenCV Haar Cascade
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
self.cascade = cv2.CascadeClassifier(cascade_path)
def detect_faces(self, image: np.ndarray) -> List[dict]:
"""
Detect faces in an image.
Args:
image: Input image (BGR or RGB format)
Returns:
List of dictionaries with 'box' (x, y, w, h) and 'confidence'
"""
# Convert BGR to RGB if needed
if len(image.shape) == 3 and image.shape[2] == 3:
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
rgb_image = image
faces = []
if self.detector is not None:
# Use MTCNN
detections = self.detector.detect_faces(rgb_image)
for detection in detections:
if detection['confidence'] >= self.confidence_threshold:
faces.append({
'box': detection['box'], # [x, y, width, height]
'confidence': detection['confidence'],
'keypoints': detection.get('keypoints', {})
})
else:
# Fallback to Haar Cascade
gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image
detected = self.cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(self.min_face_size, self.min_face_size)
)
for (x, y, w, h) in detected:
faces.append({
'box': [x, y, w, h],
'confidence': 1.0, # Haar doesn't provide confidence
'keypoints': {}
})
return faces
def extract_face(
self,
image: np.ndarray,
box: List[int],
target_size: Tuple[int, int] = IMAGE_SIZE,
margin: float = 0.2,
to_grayscale: bool = True
) -> np.ndarray:
"""
Extract and preprocess a face region from an image.
Args:
image: Input image
box: Face bounding box [x, y, width, height]
target_size: Target size for the extracted face
margin: Margin to add around the face (fraction of face size)
to_grayscale: Whether to convert to grayscale
Returns:
Preprocessed face image
"""
x, y, w, h = box
# Add margin
margin_x = int(w * margin)
margin_y = int(h * margin)
# Calculate new coordinates with margin
x1 = max(0, x - margin_x)
y1 = max(0, y - margin_y)
x2 = min(image.shape[1], x + w + margin_x)
y2 = min(image.shape[0], y + h + margin_y)
# Extract face region
face = image[y1:y2, x1:x2]
# Convert to grayscale if needed
if to_grayscale and len(face.shape) == 3:
face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
# Resize to target size
face = cv2.resize(face, target_size)
return face
def detect_and_extract(
self,
image: np.ndarray,
target_size: Tuple[int, int] = IMAGE_SIZE,
to_grayscale: bool = True,
return_all: bool = False
) -> Tuple[Optional[np.ndarray], List[dict]]:
"""
Detect faces and extract them from an image.
Args:
image: Input image
target_size: Target size for extracted faces
to_grayscale: Whether to convert to grayscale
return_all: If True, return all faces; else return only the largest
Returns:
Tuple of (extracted_face(s), face_info)
"""
faces = self.detect_faces(image)
if not faces:
return None, []
if return_all:
extracted = []
for face_info in faces:
face = self.extract_face(
image, face_info['box'],
target_size=target_size,
to_grayscale=to_grayscale
)
extracted.append(face)
return extracted, faces
else:
# Return largest face
largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3])
face = self.extract_face(
image, largest_face['box'],
target_size=target_size,
to_grayscale=to_grayscale
)
return face, [largest_face]
def preprocess_for_model(
self,
face: np.ndarray,
for_transfer_learning: bool = False
) -> np.ndarray:
"""
Preprocess an extracted face for model prediction.
Args:
face: Extracted face image
for_transfer_learning: If True, prepare for transfer learning models
Returns:
Preprocessed face ready for model input
"""
target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE
# Resize if needed
if face.shape[:2] != target_size:
face = cv2.resize(face, target_size)
# Normalize
face = face.astype(np.float32) / 255.0
# Add channel dimension if grayscale
if len(face.shape) == 2:
if for_transfer_learning:
# Convert to RGB by repeating grayscale
face = np.stack([face, face, face], axis=-1)
else:
face = np.expand_dims(face, axis=-1)
# Add batch dimension
face = np.expand_dims(face, axis=0)
return face
def draw_detections(
self,
image: np.ndarray,
faces: List[dict],
emotions: Optional[List[str]] = None,
confidences: Optional[List[float]] = None
) -> np.ndarray:
"""
Draw face detections and emotion labels on an image.
Args:
image: Input image
faces: List of face detection results
emotions: Optional list of emotion labels
confidences: Optional list of confidence scores
Returns:
Image with drawn detections
"""
result = image.copy()
for i, face_info in enumerate(faces):
x, y, w, h = face_info['box']
# Draw rectangle
cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Draw emotion label if provided
if emotions and i < len(emotions):
label = emotions[i]
if confidences and i < len(confidences):
label = f"{label}: {confidences[i]:.2f}"
# Draw label background
(label_w, label_h), _ = cv2.getTextSize(
label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2
)
cv2.rectangle(
result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1
)
# Draw label text
cv2.putText(
result, label, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2
)
return result
def load_image(image_path: str) -> np.ndarray:
"""
Load an image from file.
Args:
image_path: Path to the image file
Returns:
Image as numpy array (BGR format)
"""
image = cv2.imread(str(image_path))
if image is None:
raise ValueError(f"Could not load image: {image_path}")
return image
def load_image_pil(image_path: str) -> Image.Image:
"""
Load an image using PIL.
Args:
image_path: Path to the image file
Returns:
PIL Image object
"""
return Image.open(image_path)
if __name__ == "__main__":
# Test face detection
import sys
detector = FaceDetector()
print(f"MTCNN available: {MTCNN_AVAILABLE}")
print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}")
|