Spaces:

chariscait
/

EmoSphere

Sleeping

App Files Files Community

chariscait commited on Apr 13

Commit

a73c7d6

verified ·

1 Parent(s): f7916f6

Upload face_detector.py with huggingface_hub

Browse files

Files changed (1) hide show

face_detector.py +240 -0

face_detector.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""Face Emotion Detector — Real inference using EfficientNet or MobileNet.
+Supports multiple backends:
+  1. transformers (HuggingFace) — most accurate, GPU recommended
+  2. ONNX Runtime — fastest CPU inference
+  3. MediaPipe + OpenCV — lightweight fallback
+No anger classification: FER 'angry' maps to 'disgust' in EmoSphere.
+"""
+from __future__ import annotations
+import time
+import io
+from pathlib import Path
+from typing import Optional
+import numpy as np
+try:
+    import cv2
+    HAS_CV2 = True
+except ImportError:
+    HAS_CV2 = False
+try:
+    from PIL import Image
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+try:
+    from transformers import pipeline
+    HAS_TRANSFORMERS = True
+except ImportError:
+    HAS_TRANSFORMERS = False
+try:
+    import mediapipe as mp
+    HAS_MEDIAPIPE = True
+except ImportError:
+    HAS_MEDIAPIPE = False
+from models import (
+    EmotionLabel, EMOTION_LABELS, EmotionScore,
+    EmotionDetectionResult, CulturalRegion, CULTURAL_ADJUSTMENT,
+)
+# FER model label → EmoSphere label mapping
+# Note: 'angry' → 'disgust' (EmoSphere does NOT do anger detection)
+FER_TO_EMOSPHERE = {
+    "angry":    EmotionLabel.DISGUST,
+    "disgust":  EmotionLabel.DISGUST,
+    "fear":     EmotionLabel.FEAR,
+    "happy":    EmotionLabel.JOY,
+    "sad":      EmotionLabel.SADNESS,
+    "surprise": EmotionLabel.SURPRISE,
+    "neutral":  EmotionLabel.NEUTRAL,
+}
+# HuggingFace model options (tested, public, no auth needed)
+FACE_MODELS = [
+    "trpakov/vit-face-expression",           # ViT, good accuracy
+    "dima806/facial_emotions_image_detection", # EfficientNet based
+]
+class FaceEmotionDetector:
+    """Real face emotion detection with HuggingFace transformers."""
+    def __init__(self, model_name: str | None = None, device: str = "cpu"):
+        self.model_name = model_name or FACE_MODELS[0]
+        self.device = device
+        self.pipe = None
+        self.face_cascade = None
+        self.loaded = False
+    def load(self) -> None:
+        """Load the face emotion classification pipeline."""
+        if self.loaded:
+            return
+        # Load face detector (OpenCV cascade for face cropping)
+        if HAS_CV2:
+            cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+            self.face_cascade = cv2.CascadeClassifier(cascade_path)
+        # Load emotion classifier
+        if HAS_TRANSFORMERS:
+            try:
+                self.pipe = pipeline(
+                    "image-classification",
+                    model=self.model_name,
+                    device=self.device,
+                    top_k=None,  # Return all classes
+                )
+                print(f"[FaceDetector] Loaded model: {self.model_name}")
+            except Exception as e:
+                print(f"[FaceDetector] Failed to load {self.model_name}: {e}")
+                # Try fallback model
+                try:
+                    self.pipe = pipeline(
+                        "image-classification",
+                        model=FACE_MODELS[1],
+                        device=self.device,
+                        top_k=None,
+                    )
+                    self.model_name = FACE_MODELS[1]
+                    print(f"[FaceDetector] Loaded fallback: {self.model_name}")
+                except Exception as e2:
+                    print(f"[FaceDetector] All models failed: {e2}")
+                    print("[FaceDetector] Running in simulation mode")
+        else:
+            print("[FaceDetector] transformers not available, simulation mode")
+        self.loaded = True
+    def _decode_image(self, image_data: bytes) -> Optional[Image.Image]:
+        """Decode bytes to PIL Image."""
+        if not HAS_PIL:
+            return None
+        try:
+            return Image.open(io.BytesIO(image_data)).convert("RGB")
+        except Exception:
+            return None
+    def _detect_face(self, image: Image.Image) -> Optional[Image.Image]:
+        """Detect and crop face from image. Returns cropped face or full image."""
+        if not HAS_CV2 or self.face_cascade is None:
+            return image
+        img_array = np.array(image)
+        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+        faces = self.face_cascade.detectMultiScale(
+            gray, scaleFactor=1.1, minNeighbors=5, minSize=(48, 48)
+        )
+        if len(faces) == 0:
+            return image  # No face found, use full image
+        # Use largest face
+        x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
+        # Add 20% padding
+        pad = int(max(w, h) * 0.2)
+        x1 = max(0, x - pad)
+        y1 = max(0, y - pad)
+        x2 = min(img_array.shape[1], x + w + pad)
+        y2 = min(img_array.shape[0], y + h + pad)
+        face_crop = image.crop((x1, y1, x2, y2))
+        return face_crop
+    def _map_scores(
+        self, predictions: list[dict], cultural_region: CulturalRegion
+    ) -> dict[EmotionLabel, float]:
+        """Map model predictions to EmoSphere emotion labels."""
+        scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
+        for pred in predictions:
+            model_label = pred["label"].lower().strip()
+            score = pred["score"]
+            # Map to EmoSphere label
+            emo_label = FER_TO_EMOSPHERE.get(model_label)
+            if emo_label:
+                # Accumulate (angry + disgust both go to disgust)
+                scores[emo_label] = max(scores[emo_label], score)
+        # Fill unmapped labels (love, calm) from contextual hints
+        # Joy with low intensity → calm; high joy → love component
+        if scores[EmotionLabel.JOY] > 0.3:
+            scores[EmotionLabel.LOVE] = scores[EmotionLabel.JOY] * 0.15
+            scores[EmotionLabel.CALM] = scores[EmotionLabel.JOY] * 0.1
+        if scores[EmotionLabel.NEUTRAL] > 0.4:
+            scores[EmotionLabel.CALM] = scores[EmotionLabel.NEUTRAL] * 0.3
+        # Cultural adjustment
+        factor = CULTURAL_ADJUSTMENT.get(cultural_region, 1.0)
+        if factor != 1.0:
+            for label in EMOTION_LABELS:
+                scores[label] = min(scores[label] ** (1.0 / factor), 1.0)
+        # Normalize
+        total = sum(scores.values())
+        if total > 0:
+            scores = {k: v / total for k, v in scores.items()}
+        return scores
+    def _simulate(self) -> dict[EmotionLabel, float]:
+        """Fallback simulation when no model is available."""
+        raw = np.random.dirichlet(np.ones(len(EMOTION_LABELS)) * 0.5)
+        return {label: float(raw[i]) for i, label in enumerate(EMOTION_LABELS)}
+    def detect(
+        self,
+        image_data: bytes | np.ndarray,
+        cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL,
+    ) -> EmotionDetectionResult:
+        """Detect emotion from face image."""
+        start = time.time()
+        if self.pipe is not None and HAS_PIL:
+            # Real inference
+            if isinstance(image_data, bytes):
+                image = self._decode_image(image_data)
+            else:
+                image = Image.fromarray(
+                    (image_data * 255).astype(np.uint8) if image_data.max() <= 1.0
+                    else image_data.astype(np.uint8)
+                )
+            if image is None:
+                scores = self._simulate()
+            else:
+                # Detect and crop face
+                face = self._detect_face(image)
+                # Run model
+                predictions = self.pipe(face)
+                scores = self._map_scores(predictions, cultural_region)
+        else:
+            scores = self._simulate()
+        # Build result
+        emotion_scores = [
+            EmotionScore(label=label, score=scores[label], confidence=scores[label] * 0.9)
+            for label in EMOTION_LABELS
+        ]
+        dominant = max(scores, key=scores.get)  # type: ignore
+        return EmotionDetectionResult(
+            dominant=dominant,
+            dominant_score=scores[dominant],
+            scores=emotion_scores,
+            modality="face",
+            confidence=scores[dominant] * 0.85,
+            processing_time_ms=(time.time() - start) * 1000,
+            cultural_region=cultural_region,
+        )