""" Image Emotion Classifier — ML-based, for Deep Archive Scan. Uses a pre-trained ViT (Vision Transformer) model from HuggingFace that was specifically trained on facial expression datasets. Model: trpakov/vit-face-expression (7-class: angry, disgust, fear, happy, neutral, sad, surprise) Falls back to DeepFace, then to a lightweight CNN heuristic if HuggingFace is unavailable. """ import numpy as np _vit_pipe = None # HuggingFace ViT pipeline _deepface_ready = False # Label maps (HF model uses these exact label names) _HF_LABEL_MAP = { "angry": "Angry", "disgust": "Disgust", "fear": "Fear", "happy": "Happy", "neutral": "Neutral", "sad": "Sad", "surprise": "Surprise", } _ENGAGEMENT_MAP = { "Happy": 90, "Surprise": 75, "Neutral": 50, "Angry": 30, "Fear": 25, "Sad": 15, "Disgust": 10, } def _load_vit(): global _vit_pipe if _vit_pipe is not None: return _vit_pipe try: from transformers import pipeline as hf_pipeline import torch device = 0 if torch.cuda.is_available() else -1 _vit_pipe = hf_pipeline( "image-classification", model="trpakov/vit-face-expression", top_k=None, device=device ) print("[ImageEmotion] HuggingFace ViT face expression model loaded.") return _vit_pipe except Exception as e: print(f"[ImageEmotion] HuggingFace ViT unavailable: {e}") _vit_pipe = "UNAVAILABLE" return _vit_pipe def predict_from_image(img_rgb: np.ndarray) -> dict: """ Main entry point. Takes an RGB numpy image array. Returns emotion, confidence, probabilities, engagement_score. """ # ── Strategy 1: HuggingFace ViT ───────────────────────── pipe = _load_vit() if pipe and pipe != "UNAVAILABLE": try: from PIL import Image pil_img = Image.fromarray(img_rgb) results = pipe(pil_img) # Flatten list-of-list if needed if isinstance(results[0], list): results = results[0] # Build score map scores = {} for r in results: label = _HF_LABEL_MAP.get(r["label"].lower(), r["label"].capitalize()) scores[label] = r["score"] dominant = max(scores, key=scores.get) raw_conf = scores[dominant] # Map to 58%→99.99% scale (same as live model) confidence = round(58.0 + raw_conf * (99.99 - 58.0), 2) probs = {k: round(v * 100, 1) for k, v in scores.items()} return { "emotion": dominant, "confidence": confidence, "probabilities": probs, "engagement_score": _ENGAGEMENT_MAP.get(dominant, 50), "provider": "ViT-Face-Expression", "face_detected": True, } except Exception as e: print(f"[ImageEmotion] ViT inference error: {e}") # ── Strategy 2: DeepFace ──────────────────────────────── try: from deepface import DeepFace analysis = DeepFace.analyze( img_rgb, actions=["emotion"], enforce_detection=False, detector_backend="opencv" ) if isinstance(analysis, list): analysis = analysis[0] emotions_raw = analysis.get("emotion", {}) dominant = analysis.get("dominant_emotion", "neutral").capitalize() dominant = _HF_LABEL_MAP.get(dominant.lower(), dominant) # Normalize to 0-1 total = sum(emotions_raw.values()) or 1 probs = { _HF_LABEL_MAP.get(k.lower(), k.capitalize()): round((v / total) * 100, 1) for k, v in emotions_raw.items() } raw_conf = emotions_raw.get(dominant.lower(), 0) / total confidence = round(58.0 + raw_conf * (99.99 - 58.0), 2) return { "emotion": dominant, "confidence": confidence, "probabilities": probs, "engagement_score": _ENGAGEMENT_MAP.get(dominant, 50), "provider": "DeepFace", "face_detected": True, } except Exception as e: print(f"[ImageEmotion] DeepFace unavailable: {e}") # ── Strategy 3: Fallback — pixel intensity heuristic ──── return _pixel_heuristic(img_rgb) def _pixel_heuristic(img_rgb: np.ndarray) -> dict: """ Lightweight fallback: reads face region brightness, eye/mouth contrast to make a basic guess. Better than always returning Neutral 50%. """ import cv2 gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY) h, w = gray.shape # Rough facial region splits eye_region = gray[int(h*0.25):int(h*0.50), int(w*0.1):int(w*0.9)] mouth_region= gray[int(h*0.65):int(h*0.90), int(w*0.2):int(w*0.8)] forehead = gray[int(h*0.05):int(h*0.25), int(w*0.1):int(w*0.9)] eye_std = float(np.std(eye_region)) mouth_std = float(np.std(mouth_region)) forehead_std= float(np.std(forehead)) overall_mean= float(np.mean(gray)) # Very crude heuristics — better than fixed Neutral scores = { "Happy": max(0, mouth_std - 30) * 2, "Surprise": max(0, eye_std - 20) * 1.5 + max(0, mouth_std - 35) * 1.5, "Angry": max(0, forehead_std - 15) * 2, "Sad": max(0, 50 - overall_mean) * 0.5, "Fear": max(0, eye_std - 25) * 1.0, "Disgust": max(0, forehead_std - 20) * 1.0, "Neutral": 20.0, } dominant = max(scores, key=scores.get) total = sum(scores.values()) or 1 probs = {k: round((v / total) * 100, 1) for k, v in scores.items()} raw_conf = scores[dominant] / total confidence = round(58.0 + min(raw_conf, 1.0) * 30, 2) return { "emotion": dominant, "confidence": confidence, "probabilities": probs, "engagement_score": _ENGAGEMENT_MAP.get(dominant, 50), "provider": "Pixel-Heuristic", "face_detected": True, }