Spaces:

vaisagan
/

FaceInsight_AI

Sleeping

App Files Files Community

FaceInsight_AI / src /inference /emotion_detector.py

vaisagan

Upload src/inference/emotion_detector.py with huggingface_hub

dfc458b verified 7 days ago

raw

history blame contribute delete

3.22 kB

	"""
	Emotion detection using a PyTorch vision transformer (no TensorFlow needed).

	Model: dima806/face_emotions_image_detection (ViT-tiny, ~28MB)
	Labels: angry, disgust, fear, happy, sad, surprise, neutral
	Downloaded from HuggingFace Hub on first use.
	"""

	from __future__ import annotations

	import os
	# Must be set before any transformers import to block TF (crashes without AVX on Rosetta)
	os.environ.setdefault("USE_TF", "0")
	os.environ.setdefault("USE_JAX", "0")
	os.environ.setdefault("USE_TORCH", "1")
	os.environ.setdefault("TRANSFORMERS_NO_TF", "1")
	os.environ.setdefault("TRANSFORMERS_NO_JAX", "1")

	from typing import Dict, Tuple

	import numpy as np
	import torch
	from PIL import Image


	EMOTION_LABELS = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
	DISPLAY_LABELS = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]

	# Map model output labels → display labels (capitalized)
	_LABEL_MAP = {l.lower(): l.capitalize() for l in DISPLAY_LABELS}
	# Also map variants
	_LABEL_MAP.update({"happiness": "Happy", "sadness": "Sad",
	"anger": "Angry", "neutral": "Neutral",
	"disgust": "Disgust", "fear": "Fear", "surprise": "Surprise"})

	MODEL_ID = "dima806/face_emotions_image_detection"


	class EmotionDetector:
	"""
	Lightweight ViT-based facial expression recognizer.
	Input : face crop as RGB numpy array (any size).
	Output: dict mapping emotion label → probability.
	"""

	def __init__(self) -> None:
	self._pipe = None # lazy load

	def _load(self):
	import os
	# Prevent transformers from loading TensorFlow (crashes on machines without AVX)
	os.environ["USE_TF"] = "0"
	os.environ["USE_JAX"] = "0"
	os.environ["USE_TORCH"] = "1"
	os.environ["TRANSFORMERS_NO_TF"] = "1"
	os.environ["TRANSFORMERS_NO_JAX"] = "1"
	from transformers import pipeline
	print("[emotion] Loading emotion model (first run)…")
	pipe = pipeline(
	"image-classification",
	model = MODEL_ID,
	top_k = None,
	device = -1, # CPU
	)
	print("[emotion] Loaded.")
	return pipe

	# ── public ────────────────────────────────────────────────────────────

	def predict(self, face_rgb: np.ndarray) -> Dict[str, float]:
	"""
	Returns dict: { 'Happy': 0.87, 'Sad': 0.05, … }
	"""
	if self._pipe is None:
	self._pipe = self._load()

	pil = Image.fromarray(face_rgb).convert("RGB")
	results = self._pipe(pil)

	# Normalise labels → Title Case and fill any missing emotions
	out: Dict[str, float] = {lbl: 0.0 for lbl in DISPLAY_LABELS}
	for r in results:
	label = _LABEL_MAP.get(r["label"].lower(), r["label"].capitalize())
	out[label] = float(r["score"])
	return out

	def top_emotion(self, face_rgb: np.ndarray) -> "Tuple[str, float]":
	probs = self.predict(face_rgb)
	label = max(probs, key=probs.get)
	return label, probs[label]