chariscait commited on
Commit
a73c7d6
·
verified ·
1 Parent(s): f7916f6

Upload face_detector.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. face_detector.py +240 -0
face_detector.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Face Emotion Detector — Real inference using EfficientNet or MobileNet.
2
+
3
+ Supports multiple backends:
4
+ 1. transformers (HuggingFace) — most accurate, GPU recommended
5
+ 2. ONNX Runtime — fastest CPU inference
6
+ 3. MediaPipe + OpenCV — lightweight fallback
7
+
8
+ No anger classification: FER 'angry' maps to 'disgust' in EmoSphere.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import time
14
+ import io
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ import numpy as np
19
+
20
+ try:
21
+ import cv2
22
+ HAS_CV2 = True
23
+ except ImportError:
24
+ HAS_CV2 = False
25
+
26
+ try:
27
+ from PIL import Image
28
+ HAS_PIL = True
29
+ except ImportError:
30
+ HAS_PIL = False
31
+
32
+ try:
33
+ from transformers import pipeline
34
+ HAS_TRANSFORMERS = True
35
+ except ImportError:
36
+ HAS_TRANSFORMERS = False
37
+
38
+ try:
39
+ import mediapipe as mp
40
+ HAS_MEDIAPIPE = True
41
+ except ImportError:
42
+ HAS_MEDIAPIPE = False
43
+
44
+ from models import (
45
+ EmotionLabel, EMOTION_LABELS, EmotionScore,
46
+ EmotionDetectionResult, CulturalRegion, CULTURAL_ADJUSTMENT,
47
+ )
48
+
49
+
50
+ # FER model label → EmoSphere label mapping
51
+ # Note: 'angry' → 'disgust' (EmoSphere does NOT do anger detection)
52
+ FER_TO_EMOSPHERE = {
53
+ "angry": EmotionLabel.DISGUST,
54
+ "disgust": EmotionLabel.DISGUST,
55
+ "fear": EmotionLabel.FEAR,
56
+ "happy": EmotionLabel.JOY,
57
+ "sad": EmotionLabel.SADNESS,
58
+ "surprise": EmotionLabel.SURPRISE,
59
+ "neutral": EmotionLabel.NEUTRAL,
60
+ }
61
+
62
+ # HuggingFace model options (tested, public, no auth needed)
63
+ FACE_MODELS = [
64
+ "trpakov/vit-face-expression", # ViT, good accuracy
65
+ "dima806/facial_emotions_image_detection", # EfficientNet based
66
+ ]
67
+
68
+
69
+ class FaceEmotionDetector:
70
+ """Real face emotion detection with HuggingFace transformers."""
71
+
72
+ def __init__(self, model_name: str | None = None, device: str = "cpu"):
73
+ self.model_name = model_name or FACE_MODELS[0]
74
+ self.device = device
75
+ self.pipe = None
76
+ self.face_cascade = None
77
+ self.loaded = False
78
+
79
+ def load(self) -> None:
80
+ """Load the face emotion classification pipeline."""
81
+ if self.loaded:
82
+ return
83
+
84
+ # Load face detector (OpenCV cascade for face cropping)
85
+ if HAS_CV2:
86
+ cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
87
+ self.face_cascade = cv2.CascadeClassifier(cascade_path)
88
+
89
+ # Load emotion classifier
90
+ if HAS_TRANSFORMERS:
91
+ try:
92
+ self.pipe = pipeline(
93
+ "image-classification",
94
+ model=self.model_name,
95
+ device=self.device,
96
+ top_k=None, # Return all classes
97
+ )
98
+ print(f"[FaceDetector] Loaded model: {self.model_name}")
99
+ except Exception as e:
100
+ print(f"[FaceDetector] Failed to load {self.model_name}: {e}")
101
+ # Try fallback model
102
+ try:
103
+ self.pipe = pipeline(
104
+ "image-classification",
105
+ model=FACE_MODELS[1],
106
+ device=self.device,
107
+ top_k=None,
108
+ )
109
+ self.model_name = FACE_MODELS[1]
110
+ print(f"[FaceDetector] Loaded fallback: {self.model_name}")
111
+ except Exception as e2:
112
+ print(f"[FaceDetector] All models failed: {e2}")
113
+ print("[FaceDetector] Running in simulation mode")
114
+ else:
115
+ print("[FaceDetector] transformers not available, simulation mode")
116
+
117
+ self.loaded = True
118
+
119
+ def _decode_image(self, image_data: bytes) -> Optional[Image.Image]:
120
+ """Decode bytes to PIL Image."""
121
+ if not HAS_PIL:
122
+ return None
123
+ try:
124
+ return Image.open(io.BytesIO(image_data)).convert("RGB")
125
+ except Exception:
126
+ return None
127
+
128
+ def _detect_face(self, image: Image.Image) -> Optional[Image.Image]:
129
+ """Detect and crop face from image. Returns cropped face or full image."""
130
+ if not HAS_CV2 or self.face_cascade is None:
131
+ return image
132
+
133
+ img_array = np.array(image)
134
+ gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
135
+ faces = self.face_cascade.detectMultiScale(
136
+ gray, scaleFactor=1.1, minNeighbors=5, minSize=(48, 48)
137
+ )
138
+
139
+ if len(faces) == 0:
140
+ return image # No face found, use full image
141
+
142
+ # Use largest face
143
+ x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
144
+ # Add 20% padding
145
+ pad = int(max(w, h) * 0.2)
146
+ x1 = max(0, x - pad)
147
+ y1 = max(0, y - pad)
148
+ x2 = min(img_array.shape[1], x + w + pad)
149
+ y2 = min(img_array.shape[0], y + h + pad)
150
+
151
+ face_crop = image.crop((x1, y1, x2, y2))
152
+ return face_crop
153
+
154
+ def _map_scores(
155
+ self, predictions: list[dict], cultural_region: CulturalRegion
156
+ ) -> dict[EmotionLabel, float]:
157
+ """Map model predictions to EmoSphere emotion labels."""
158
+ scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
159
+
160
+ for pred in predictions:
161
+ model_label = pred["label"].lower().strip()
162
+ score = pred["score"]
163
+
164
+ # Map to EmoSphere label
165
+ emo_label = FER_TO_EMOSPHERE.get(model_label)
166
+ if emo_label:
167
+ # Accumulate (angry + disgust both go to disgust)
168
+ scores[emo_label] = max(scores[emo_label], score)
169
+
170
+ # Fill unmapped labels (love, calm) from contextual hints
171
+ # Joy with low intensity → calm; high joy → love component
172
+ if scores[EmotionLabel.JOY] > 0.3:
173
+ scores[EmotionLabel.LOVE] = scores[EmotionLabel.JOY] * 0.15
174
+ scores[EmotionLabel.CALM] = scores[EmotionLabel.JOY] * 0.1
175
+ if scores[EmotionLabel.NEUTRAL] > 0.4:
176
+ scores[EmotionLabel.CALM] = scores[EmotionLabel.NEUTRAL] * 0.3
177
+
178
+ # Cultural adjustment
179
+ factor = CULTURAL_ADJUSTMENT.get(cultural_region, 1.0)
180
+ if factor != 1.0:
181
+ for label in EMOTION_LABELS:
182
+ scores[label] = min(scores[label] ** (1.0 / factor), 1.0)
183
+
184
+ # Normalize
185
+ total = sum(scores.values())
186
+ if total > 0:
187
+ scores = {k: v / total for k, v in scores.items()}
188
+
189
+ return scores
190
+
191
+ def _simulate(self) -> dict[EmotionLabel, float]:
192
+ """Fallback simulation when no model is available."""
193
+ raw = np.random.dirichlet(np.ones(len(EMOTION_LABELS)) * 0.5)
194
+ return {label: float(raw[i]) for i, label in enumerate(EMOTION_LABELS)}
195
+
196
+ def detect(
197
+ self,
198
+ image_data: bytes | np.ndarray,
199
+ cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL,
200
+ ) -> EmotionDetectionResult:
201
+ """Detect emotion from face image."""
202
+ start = time.time()
203
+
204
+ if self.pipe is not None and HAS_PIL:
205
+ # Real inference
206
+ if isinstance(image_data, bytes):
207
+ image = self._decode_image(image_data)
208
+ else:
209
+ image = Image.fromarray(
210
+ (image_data * 255).astype(np.uint8) if image_data.max() <= 1.0
211
+ else image_data.astype(np.uint8)
212
+ )
213
+
214
+ if image is None:
215
+ scores = self._simulate()
216
+ else:
217
+ # Detect and crop face
218
+ face = self._detect_face(image)
219
+ # Run model
220
+ predictions = self.pipe(face)
221
+ scores = self._map_scores(predictions, cultural_region)
222
+ else:
223
+ scores = self._simulate()
224
+
225
+ # Build result
226
+ emotion_scores = [
227
+ EmotionScore(label=label, score=scores[label], confidence=scores[label] * 0.9)
228
+ for label in EMOTION_LABELS
229
+ ]
230
+ dominant = max(scores, key=scores.get) # type: ignore
231
+
232
+ return EmotionDetectionResult(
233
+ dominant=dominant,
234
+ dominant_score=scores[dominant],
235
+ scores=emotion_scores,
236
+ modality="face",
237
+ confidence=scores[dominant] * 0.85,
238
+ processing_time_ms=(time.time() - start) * 1000,
239
+ cultural_region=cultural_region,
240
+ )