Spaces:
Sleeping
Sleeping
| """ | |
| Emotion detection using a PyTorch vision transformer (no TensorFlow needed). | |
| Model: dima806/face_emotions_image_detection (ViT-tiny, ~28MB) | |
| Labels: angry, disgust, fear, happy, sad, surprise, neutral | |
| Downloaded from HuggingFace Hub on first use. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| # Must be set before any transformers import to block TF (crashes without AVX on Rosetta) | |
| os.environ.setdefault("USE_TF", "0") | |
| os.environ.setdefault("USE_JAX", "0") | |
| os.environ.setdefault("USE_TORCH", "1") | |
| os.environ.setdefault("TRANSFORMERS_NO_TF", "1") | |
| os.environ.setdefault("TRANSFORMERS_NO_JAX", "1") | |
| from typing import Dict, Tuple | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| EMOTION_LABELS = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"] | |
| DISPLAY_LABELS = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"] | |
| # Map model output labels β display labels (capitalized) | |
| _LABEL_MAP = {l.lower(): l.capitalize() for l in DISPLAY_LABELS} | |
| # Also map variants | |
| _LABEL_MAP.update({"happiness": "Happy", "sadness": "Sad", | |
| "anger": "Angry", "neutral": "Neutral", | |
| "disgust": "Disgust", "fear": "Fear", "surprise": "Surprise"}) | |
| MODEL_ID = "dima806/face_emotions_image_detection" | |
| class EmotionDetector: | |
| """ | |
| Lightweight ViT-based facial expression recognizer. | |
| Input : face crop as RGB numpy array (any size). | |
| Output: dict mapping emotion label β probability. | |
| """ | |
| def __init__(self) -> None: | |
| self._pipe = None # lazy load | |
| def _load(self): | |
| import os | |
| # Prevent transformers from loading TensorFlow (crashes on machines without AVX) | |
| os.environ["USE_TF"] = "0" | |
| os.environ["USE_JAX"] = "0" | |
| os.environ["USE_TORCH"] = "1" | |
| os.environ["TRANSFORMERS_NO_TF"] = "1" | |
| os.environ["TRANSFORMERS_NO_JAX"] = "1" | |
| from transformers import pipeline | |
| print("[emotion] Loading emotion model (first run)β¦") | |
| pipe = pipeline( | |
| "image-classification", | |
| model = MODEL_ID, | |
| top_k = None, | |
| device = -1, # CPU | |
| ) | |
| print("[emotion] Loaded.") | |
| return pipe | |
| # ββ public ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def predict(self, face_rgb: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Returns dict: { 'Happy': 0.87, 'Sad': 0.05, β¦ } | |
| """ | |
| if self._pipe is None: | |
| self._pipe = self._load() | |
| pil = Image.fromarray(face_rgb).convert("RGB") | |
| results = self._pipe(pil) | |
| # Normalise labels β Title Case and fill any missing emotions | |
| out: Dict[str, float] = {lbl: 0.0 for lbl in DISPLAY_LABELS} | |
| for r in results: | |
| label = _LABEL_MAP.get(r["label"].lower(), r["label"].capitalize()) | |
| out[label] = float(r["score"]) | |
| return out | |
| def top_emotion(self, face_rgb: np.ndarray) -> "Tuple[str, float]": | |
| probs = self.predict(face_rgb) | |
| label = max(probs, key=probs.get) | |
| return label, probs[label] | |