Sentinel-Multimodal-Emotion-AI / backend /models /image_emotion_model.py
r-vasanthkumar73-dev's picture
Deploying backend and frontend folder modules.
099d157 verified
Raw
History Blame Contribute Delete
6.32 kB
"""
Image Emotion Classifier β€” ML-based, for Deep Archive Scan.
Uses a pre-trained ViT (Vision Transformer) model from HuggingFace
that was specifically trained on facial expression datasets.
Model: trpakov/vit-face-expression (7-class: angry, disgust, fear, happy, neutral, sad, surprise)
Falls back to DeepFace, then to a lightweight CNN heuristic if HuggingFace is unavailable.
"""
import numpy as np
_vit_pipe = None # HuggingFace ViT pipeline
_deepface_ready = False
# Label maps (HF model uses these exact label names)
_HF_LABEL_MAP = {
"angry": "Angry",
"disgust": "Disgust",
"fear": "Fear",
"happy": "Happy",
"neutral": "Neutral",
"sad": "Sad",
"surprise": "Surprise",
}
_ENGAGEMENT_MAP = {
"Happy": 90,
"Surprise": 75,
"Neutral": 50,
"Angry": 30,
"Fear": 25,
"Sad": 15,
"Disgust": 10,
}
def _load_vit():
global _vit_pipe
if _vit_pipe is not None:
return _vit_pipe
try:
from transformers import pipeline as hf_pipeline
import torch
device = 0 if torch.cuda.is_available() else -1
_vit_pipe = hf_pipeline(
"image-classification",
model="trpakov/vit-face-expression",
top_k=None,
device=device
)
print("[ImageEmotion] HuggingFace ViT face expression model loaded.")
return _vit_pipe
except Exception as e:
print(f"[ImageEmotion] HuggingFace ViT unavailable: {e}")
_vit_pipe = "UNAVAILABLE"
return _vit_pipe
def predict_from_image(img_rgb: np.ndarray) -> dict:
"""
Main entry point. Takes an RGB numpy image array.
Returns emotion, confidence, probabilities, engagement_score.
"""
# ── Strategy 1: HuggingFace ViT ─────────────────────────
pipe = _load_vit()
if pipe and pipe != "UNAVAILABLE":
try:
from PIL import Image
pil_img = Image.fromarray(img_rgb)
results = pipe(pil_img)
# Flatten list-of-list if needed
if isinstance(results[0], list):
results = results[0]
# Build score map
scores = {}
for r in results:
label = _HF_LABEL_MAP.get(r["label"].lower(), r["label"].capitalize())
scores[label] = r["score"]
dominant = max(scores, key=scores.get)
raw_conf = scores[dominant]
# Map to 58%β†’99.99% scale (same as live model)
confidence = round(58.0 + raw_conf * (99.99 - 58.0), 2)
probs = {k: round(v * 100, 1) for k, v in scores.items()}
return {
"emotion": dominant,
"confidence": confidence,
"probabilities": probs,
"engagement_score": _ENGAGEMENT_MAP.get(dominant, 50),
"provider": "ViT-Face-Expression",
"face_detected": True,
}
except Exception as e:
print(f"[ImageEmotion] ViT inference error: {e}")
# ── Strategy 2: DeepFace ────────────────────────────────
try:
from deepface import DeepFace
analysis = DeepFace.analyze(
img_rgb,
actions=["emotion"],
enforce_detection=False,
detector_backend="opencv"
)
if isinstance(analysis, list):
analysis = analysis[0]
emotions_raw = analysis.get("emotion", {})
dominant = analysis.get("dominant_emotion", "neutral").capitalize()
dominant = _HF_LABEL_MAP.get(dominant.lower(), dominant)
# Normalize to 0-1
total = sum(emotions_raw.values()) or 1
probs = {
_HF_LABEL_MAP.get(k.lower(), k.capitalize()): round((v / total) * 100, 1)
for k, v in emotions_raw.items()
}
raw_conf = emotions_raw.get(dominant.lower(), 0) / total
confidence = round(58.0 + raw_conf * (99.99 - 58.0), 2)
return {
"emotion": dominant,
"confidence": confidence,
"probabilities": probs,
"engagement_score": _ENGAGEMENT_MAP.get(dominant, 50),
"provider": "DeepFace",
"face_detected": True,
}
except Exception as e:
print(f"[ImageEmotion] DeepFace unavailable: {e}")
# ── Strategy 3: Fallback β€” pixel intensity heuristic ────
return _pixel_heuristic(img_rgb)
def _pixel_heuristic(img_rgb: np.ndarray) -> dict:
"""
Lightweight fallback: reads face region brightness, eye/mouth contrast
to make a basic guess. Better than always returning Neutral 50%.
"""
import cv2
gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
h, w = gray.shape
# Rough facial region splits
eye_region = gray[int(h*0.25):int(h*0.50), int(w*0.1):int(w*0.9)]
mouth_region= gray[int(h*0.65):int(h*0.90), int(w*0.2):int(w*0.8)]
forehead = gray[int(h*0.05):int(h*0.25), int(w*0.1):int(w*0.9)]
eye_std = float(np.std(eye_region))
mouth_std = float(np.std(mouth_region))
forehead_std= float(np.std(forehead))
overall_mean= float(np.mean(gray))
# Very crude heuristics β€” better than fixed Neutral
scores = {
"Happy": max(0, mouth_std - 30) * 2,
"Surprise": max(0, eye_std - 20) * 1.5 + max(0, mouth_std - 35) * 1.5,
"Angry": max(0, forehead_std - 15) * 2,
"Sad": max(0, 50 - overall_mean) * 0.5,
"Fear": max(0, eye_std - 25) * 1.0,
"Disgust": max(0, forehead_std - 20) * 1.0,
"Neutral": 20.0,
}
dominant = max(scores, key=scores.get)
total = sum(scores.values()) or 1
probs = {k: round((v / total) * 100, 1) for k, v in scores.items()}
raw_conf = scores[dominant] / total
confidence = round(58.0 + min(raw_conf, 1.0) * 30, 2)
return {
"emotion": dominant,
"confidence": confidence,
"probabilities": probs,
"engagement_score": _ENGAGEMENT_MAP.get(dominant, 50),
"provider": "Pixel-Heuristic",
"face_detected": True,
}