File size: 9,797 Bytes
e5abc2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
"""
Face detection using MTCNN for the Emotion Recognition System.
"""
import cv2
import numpy as np
from typing import List, Tuple, Optional
from pathlib import Path
from PIL import Image

try:
    from mtcnn import MTCNN
    MTCNN_AVAILABLE = True
except ImportError:
    MTCNN_AVAILABLE = False
    print("Warning: MTCNN not installed. Install with: pip install mtcnn")

import sys
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER


class FaceDetector:
    """
    Face detection and extraction using MTCNN.
    """
    
    def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9):
        """
        Initialize the face detector.
        
        Args:
            min_face_size: Minimum face size to detect
            confidence_threshold: Minimum confidence for face detection
        """
        self.min_face_size = min_face_size
        self.confidence_threshold = confidence_threshold
        
        if MTCNN_AVAILABLE:
            try:
                # Try newer MTCNN API
                self.detector = MTCNN(min_face_size=min_face_size)
            except TypeError:
                try:
                    # Try older MTCNN API without parameters
                    self.detector = MTCNN()
                except Exception:
                    self.detector = None
        else:
            self.detector = None
            # Fallback to OpenCV Haar Cascade
            cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            self.cascade = cv2.CascadeClassifier(cascade_path)
    
    def detect_faces(self, image: np.ndarray) -> List[dict]:
        """
        Detect faces in an image.
        
        Args:
            image: Input image (BGR or RGB format)
            
        Returns:
            List of dictionaries with 'box' (x, y, w, h) and 'confidence'
        """
        # Convert BGR to RGB if needed
        if len(image.shape) == 3 and image.shape[2] == 3:
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        else:
            rgb_image = image
        
        faces = []
        
        if self.detector is not None:
            # Use MTCNN
            detections = self.detector.detect_faces(rgb_image)
            for detection in detections:
                if detection['confidence'] >= self.confidence_threshold:
                    faces.append({
                        'box': detection['box'],  # [x, y, width, height]
                        'confidence': detection['confidence'],
                        'keypoints': detection.get('keypoints', {})
                    })
        else:
            # Fallback to Haar Cascade
            gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image
            detected = self.cascade.detectMultiScale(
                gray,
                scaleFactor=1.1,
                minNeighbors=5,
                minSize=(self.min_face_size, self.min_face_size)
            )
            for (x, y, w, h) in detected:
                faces.append({
                    'box': [x, y, w, h],
                    'confidence': 1.0,  # Haar doesn't provide confidence
                    'keypoints': {}
                })
        
        return faces
    
    def extract_face(
        self,
        image: np.ndarray,
        box: List[int],
        target_size: Tuple[int, int] = IMAGE_SIZE,
        margin: float = 0.2,
        to_grayscale: bool = True
    ) -> np.ndarray:
        """
        Extract and preprocess a face region from an image.
        
        Args:
            image: Input image
            box: Face bounding box [x, y, width, height]
            target_size: Target size for the extracted face
            margin: Margin to add around the face (fraction of face size)
            to_grayscale: Whether to convert to grayscale
            
        Returns:
            Preprocessed face image
        """
        x, y, w, h = box
        
        # Add margin
        margin_x = int(w * margin)
        margin_y = int(h * margin)
        
        # Calculate new coordinates with margin
        x1 = max(0, x - margin_x)
        y1 = max(0, y - margin_y)
        x2 = min(image.shape[1], x + w + margin_x)
        y2 = min(image.shape[0], y + h + margin_y)
        
        # Extract face region
        face = image[y1:y2, x1:x2]
        
        # Convert to grayscale if needed
        if to_grayscale and len(face.shape) == 3:
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
        
        # Resize to target size
        face = cv2.resize(face, target_size)
        
        return face
    
    def detect_and_extract(
        self,
        image: np.ndarray,
        target_size: Tuple[int, int] = IMAGE_SIZE,
        to_grayscale: bool = True,
        return_all: bool = False
    ) -> Tuple[Optional[np.ndarray], List[dict]]:
        """
        Detect faces and extract them from an image.
        
        Args:
            image: Input image
            target_size: Target size for extracted faces
            to_grayscale: Whether to convert to grayscale
            return_all: If True, return all faces; else return only the largest
            
        Returns:
            Tuple of (extracted_face(s), face_info)
        """
        faces = self.detect_faces(image)
        
        if not faces:
            return None, []
        
        if return_all:
            extracted = []
            for face_info in faces:
                face = self.extract_face(
                    image, face_info['box'],
                    target_size=target_size,
                    to_grayscale=to_grayscale
                )
                extracted.append(face)
            return extracted, faces
        else:
            # Return largest face
            largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3])
            face = self.extract_face(
                image, largest_face['box'],
                target_size=target_size,
                to_grayscale=to_grayscale
            )
            return face, [largest_face]
    
    def preprocess_for_model(
        self,
        face: np.ndarray,
        for_transfer_learning: bool = False
    ) -> np.ndarray:
        """
        Preprocess an extracted face for model prediction.
        
        Args:
            face: Extracted face image
            for_transfer_learning: If True, prepare for transfer learning models
            
        Returns:
            Preprocessed face ready for model input
        """
        target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE
        
        # Resize if needed
        if face.shape[:2] != target_size:
            face = cv2.resize(face, target_size)
        
        # Normalize
        face = face.astype(np.float32) / 255.0
        
        # Add channel dimension if grayscale
        if len(face.shape) == 2:
            if for_transfer_learning:
                # Convert to RGB by repeating grayscale
                face = np.stack([face, face, face], axis=-1)
            else:
                face = np.expand_dims(face, axis=-1)
        
        # Add batch dimension
        face = np.expand_dims(face, axis=0)
        
        return face
    
    def draw_detections(
        self,
        image: np.ndarray,
        faces: List[dict],
        emotions: Optional[List[str]] = None,
        confidences: Optional[List[float]] = None
    ) -> np.ndarray:
        """
        Draw face detections and emotion labels on an image.
        
        Args:
            image: Input image
            faces: List of face detection results
            emotions: Optional list of emotion labels
            confidences: Optional list of confidence scores
            
        Returns:
            Image with drawn detections
        """
        result = image.copy()
        
        for i, face_info in enumerate(faces):
            x, y, w, h = face_info['box']
            
            # Draw rectangle
            cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)
            
            # Draw emotion label if provided
            if emotions and i < len(emotions):
                label = emotions[i]
                if confidences and i < len(confidences):
                    label = f"{label}: {confidences[i]:.2f}"
                
                # Draw label background
                (label_w, label_h), _ = cv2.getTextSize(
                    label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2
                )
                cv2.rectangle(
                    result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1
                )
                
                # Draw label text
                cv2.putText(
                    result, label, (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2
                )
        
        return result


def load_image(image_path: str) -> np.ndarray:
    """
    Load an image from file.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        Image as numpy array (BGR format)
    """
    image = cv2.imread(str(image_path))
    if image is None:
        raise ValueError(f"Could not load image: {image_path}")
    return image


def load_image_pil(image_path: str) -> Image.Image:
    """
    Load an image using PIL.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        PIL Image object
    """
    return Image.open(image_path)


if __name__ == "__main__":
    # Test face detection
    import sys
    
    detector = FaceDetector()
    print(f"MTCNN available: {MTCNN_AVAILABLE}")
    print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}")