Spaces:

chariscait
/

EmoSphere

Running

App Files Files Community

EmoSphere / face_detector.py

chariscait

Restore face confidence levels

9d253fc verified about 1 month ago

raw

history blame contribute delete

8.21 kB

	"""Face Emotion Detector — Real inference using EfficientNet or MobileNet.

	Supports multiple backends:
	1. transformers (HuggingFace) — most accurate, GPU recommended
	2. ONNX Runtime — fastest CPU inference
	3. MediaPipe + OpenCV — lightweight fallback
	"""

	from __future__ import annotations

	import time
	import io
	from pathlib import Path
	from typing import Optional

	import numpy as np

	try:
	import cv2
	HAS_CV2 = True
	except ImportError:
	HAS_CV2 = False

	try:
	from PIL import Image
	HAS_PIL = True
	except ImportError:
	HAS_PIL = False

	try:
	from transformers import pipeline
	HAS_TRANSFORMERS = True
	except ImportError:
	HAS_TRANSFORMERS = False

	try:
	import mediapipe as mp
	HAS_MEDIAPIPE = True
	except ImportError:
	HAS_MEDIAPIPE = False

	from models import (
	EmotionLabel, EMOTION_LABELS, EmotionScore,
	EmotionDetectionResult, CulturalRegion, CULTURAL_ADJUSTMENT,
	)


	# FER model label → EmoSphere label mapping
	FER_TO_EMOSPHERE = {
	"angry": EmotionLabel.ANGER,
	"disgust": EmotionLabel.DISGUST,
	"fear": EmotionLabel.FEAR,
	"happy": EmotionLabel.JOY,
	"sad": EmotionLabel.SADNESS,
	"surprise": EmotionLabel.SURPRISE,
	"neutral": EmotionLabel.NEUTRAL,
	}

	# HuggingFace model options (tested, public, no auth needed)
	FACE_MODELS = [
	"trpakov/vit-face-expression", # ViT, good accuracy
	"dima806/facial_emotions_image_detection", # EfficientNet based
	]


	class FaceEmotionDetector:
	"""Real face emotion detection with HuggingFace transformers."""

	def __init__(self, model_name: str \| None = None, device: str = "cpu"):
	self.model_name = model_name or FACE_MODELS[0]
	self.device = device
	self.pipe = None
	self.face_cascade = None
	self.loaded = False

	def load(self) -> None:
	"""Load the face emotion classification pipeline."""
	if self.loaded:
	return

	# Load face detector (OpenCV cascade for face cropping)
	if HAS_CV2:
	cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
	self.face_cascade = cv2.CascadeClassifier(cascade_path)

	# Load emotion classifier
	if HAS_TRANSFORMERS:
	try:
	self.pipe = pipeline(
	"image-classification",
	model=self.model_name,
	device=self.device,
	top_k=None, # Return all classes
	)
	print(f"[FaceDetector] Loaded model: {self.model_name}")
	except Exception as e:
	print(f"[FaceDetector] Failed to load {self.model_name}: {e}")
	# Try fallback model
	try:
	self.pipe = pipeline(
	"image-classification",
	model=FACE_MODELS[1],
	device=self.device,
	top_k=None,
	)
	self.model_name = FACE_MODELS[1]
	print(f"[FaceDetector] Loaded fallback: {self.model_name}")
	except Exception as e2:
	print(f"[FaceDetector] All models failed: {e2}")
	print("[FaceDetector] Running in simulation mode")
	else:
	print("[FaceDetector] transformers not available, simulation mode")

	self.loaded = True

	def _decode_image(self, image_data: bytes) -> Optional[Image.Image]:
	"""Decode bytes to PIL Image."""
	if not HAS_PIL:
	return None
	try:
	return Image.open(io.BytesIO(image_data)).convert("RGB")
	except Exception:
	return None

	def _detect_face(self, image: Image.Image) -> Optional[Image.Image]:
	"""Detect and crop face from image. Returns cropped face or full image."""
	if not HAS_CV2 or self.face_cascade is None:
	return image

	img_array = np.array(image)
	gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
	faces = self.face_cascade.detectMultiScale(
	gray, scaleFactor=1.1, minNeighbors=5, minSize=(48, 48)
	)

	if len(faces) == 0:
	return image # No face found, use full image

	# Use largest face
	x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
	# Add 20% padding
	pad = int(max(w, h) * 0.2)
	x1 = max(0, x - pad)
	y1 = max(0, y - pad)
	x2 = min(img_array.shape[1], x + w + pad)
	y2 = min(img_array.shape[0], y + h + pad)

	face_crop = image.crop((x1, y1, x2, y2))
	return face_crop

	def _map_scores(
	self, predictions: list[dict], cultural_region: CulturalRegion
	) -> dict[EmotionLabel, float]:
	"""Map model predictions to EmoSphere emotion labels."""
	scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}

	for pred in predictions:
	model_label = pred["label"].lower().strip()
	score = pred["score"]

	# Map to EmoSphere label
	emo_label = FER_TO_EMOSPHERE.get(model_label)
	if emo_label:
	# Accumulate (angry + disgust both go to disgust)
	scores[emo_label] = max(scores[emo_label], score)

	# Fill anger from disgust context
	if scores[EmotionLabel.DISGUST] > 0.2:
	scores[EmotionLabel.ANGER] = scores[EmotionLabel.DISGUST] * 0.3

	# Fill unmapped labels (love, calm) from contextual hints
	# Joy with low intensity → calm; high joy → love component
	if scores[EmotionLabel.JOY] > 0.3:
	scores[EmotionLabel.LOVE] = scores[EmotionLabel.JOY] * 0.15
	scores[EmotionLabel.CALM] = scores[EmotionLabel.JOY] * 0.1
	if scores[EmotionLabel.NEUTRAL] > 0.4:
	scores[EmotionLabel.CALM] = scores[EmotionLabel.NEUTRAL] * 0.3

	# Cultural adjustment
	factor = CULTURAL_ADJUSTMENT.get(cultural_region, 1.0)
	if factor != 1.0:
	for label in EMOTION_LABELS:
	scores[label] = min(scores[label] ** (1.0 / factor), 1.0)

	# Normalize
	total = sum(scores.values())
	if total > 0:
	scores = {k: v / total for k, v in scores.items()}

	return scores

	def _simulate(self) -> dict[EmotionLabel, float]:
	"""Fallback simulation when no model is available."""
	raw = np.random.dirichlet(np.ones(len(EMOTION_LABELS)) * 0.5)
	return {label: float(raw[i]) for i, label in enumerate(EMOTION_LABELS)}

	def detect(
	self,
	image_data: bytes \| np.ndarray,
	cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL,
	) -> EmotionDetectionResult:
	"""Detect emotion from face image."""
	start = time.time()

	if self.pipe is not None and HAS_PIL:
	# Real inference
	if isinstance(image_data, bytes):
	image = self._decode_image(image_data)
	else:
	image = Image.fromarray(
	(image_data * 255).astype(np.uint8) if image_data.max() <= 1.0
	else image_data.astype(np.uint8)
	)

	if image is None:
	scores = self._simulate()
	else:
	# Detect and crop face
	face = self._detect_face(image)
	# Run model
	predictions = self.pipe(face)
	scores = self._map_scores(predictions, cultural_region)
	else:
	scores = self._simulate()

	# Build result
	emotion_scores = [
	EmotionScore(label=label, score=scores[label], confidence=scores[label] * 0.85)
	for label in EMOTION_LABELS
	]
	dominant = max(scores, key=scores.get) # type: ignore

	return EmotionDetectionResult(
	dominant=dominant,
	dominant_score=scores[dominant],
	scores=emotion_scores,
	modality="face",
	confidence=scores[dominant] * 0.80,
	processing_time_ms=(time.time() - start) * 1000,
	cultural_region=cultural_region,
	)