FaceInsight_AI / src /inference /emotion_detector.py
vaisagan's picture
Upload src/inference/emotion_detector.py with huggingface_hub
dfc458b verified
"""
Emotion detection using a PyTorch vision transformer (no TensorFlow needed).
Model: dima806/face_emotions_image_detection (ViT-tiny, ~28MB)
Labels: angry, disgust, fear, happy, sad, surprise, neutral
Downloaded from HuggingFace Hub on first use.
"""
from __future__ import annotations
import os
# Must be set before any transformers import to block TF (crashes without AVX on Rosetta)
os.environ.setdefault("USE_TF", "0")
os.environ.setdefault("USE_JAX", "0")
os.environ.setdefault("USE_TORCH", "1")
os.environ.setdefault("TRANSFORMERS_NO_TF", "1")
os.environ.setdefault("TRANSFORMERS_NO_JAX", "1")
from typing import Dict, Tuple
import numpy as np
import torch
from PIL import Image
EMOTION_LABELS = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
DISPLAY_LABELS = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
# Map model output labels β†’ display labels (capitalized)
_LABEL_MAP = {l.lower(): l.capitalize() for l in DISPLAY_LABELS}
# Also map variants
_LABEL_MAP.update({"happiness": "Happy", "sadness": "Sad",
"anger": "Angry", "neutral": "Neutral",
"disgust": "Disgust", "fear": "Fear", "surprise": "Surprise"})
MODEL_ID = "dima806/face_emotions_image_detection"
class EmotionDetector:
"""
Lightweight ViT-based facial expression recognizer.
Input : face crop as RGB numpy array (any size).
Output: dict mapping emotion label β†’ probability.
"""
def __init__(self) -> None:
self._pipe = None # lazy load
def _load(self):
import os
# Prevent transformers from loading TensorFlow (crashes on machines without AVX)
os.environ["USE_TF"] = "0"
os.environ["USE_JAX"] = "0"
os.environ["USE_TORCH"] = "1"
os.environ["TRANSFORMERS_NO_TF"] = "1"
os.environ["TRANSFORMERS_NO_JAX"] = "1"
from transformers import pipeline
print("[emotion] Loading emotion model (first run)…")
pipe = pipeline(
"image-classification",
model = MODEL_ID,
top_k = None,
device = -1, # CPU
)
print("[emotion] Loaded.")
return pipe
# ── public ────────────────────────────────────────────────────────────
def predict(self, face_rgb: np.ndarray) -> Dict[str, float]:
"""
Returns dict: { 'Happy': 0.87, 'Sad': 0.05, … }
"""
if self._pipe is None:
self._pipe = self._load()
pil = Image.fromarray(face_rgb).convert("RGB")
results = self._pipe(pil)
# Normalise labels β†’ Title Case and fill any missing emotions
out: Dict[str, float] = {lbl: 0.0 for lbl in DISPLAY_LABELS}
for r in results:
label = _LABEL_MAP.get(r["label"].lower(), r["label"].capitalize())
out[label] = float(r["score"])
return out
def top_emotion(self, face_rgb: np.ndarray) -> "Tuple[str, float]":
probs = self.predict(face_rgb)
label = max(probs, key=probs.get)
return label, probs[label]