Spaces:
Running
Running
| """EmoSphere Emotion Engine — Real ML inference for emotion detection. | |
| Integrates three modality detectors with weighted fusion. | |
| All models run locally. No data leaves the device. | |
| No medical screening. No anger detection. No surveillance. | |
| """ | |
| from __future__ import annotations | |
| import time | |
| from typing import Optional | |
| import numpy as np | |
| from models import ( | |
| EmotionLabel, EMOTION_LABELS, EmotionScore, | |
| EmotionDetectionResult, FusedDetectionResult, | |
| CulturalRegion, | |
| ) | |
| from face_detector import FaceEmotionDetector | |
| from voice_detector import VoiceEmotionDetector | |
| from text_detector import TextEmotionDetector | |
| from posture_detector import PostureEmotionDetector | |
| class EmotionFusionEngine: | |
| """Weighted average fusion of face + voice + text + posture modalities. | |
| Weights adapt based on modality confidence: | |
| face: 0.35 (most informative for basic emotions) | |
| voice: 0.25 (prosody reveals emotion intensity) | |
| text: 0.20 (semantic content) | |
| posture: 0.20 (body language and gestures) | |
| """ | |
| BASE_WEIGHTS = { | |
| "face": 0.35, | |
| "voice": 0.25, | |
| "text": 0.20, | |
| "posture": 0.20, | |
| } | |
| def fuse( | |
| self, | |
| face: Optional[EmotionDetectionResult] = None, | |
| voice: Optional[EmotionDetectionResult] = None, | |
| text: Optional[EmotionDetectionResult] = None, | |
| posture: Optional[EmotionDetectionResult] = None, | |
| ) -> FusedDetectionResult: | |
| """Fuse available modality results.""" | |
| start = time.time() | |
| available: list[tuple[str, EmotionDetectionResult]] = [] | |
| if face: available.append(("face", face)) | |
| if voice: available.append(("voice", voice)) | |
| if text: available.append(("text", text)) | |
| if posture: available.append(("posture", posture)) | |
| if not available: | |
| neutral_scores = [ | |
| EmotionScore(label=label, score=1.0 if label == EmotionLabel.NEUTRAL else 0.0, confidence=0.0) | |
| for label in EMOTION_LABELS | |
| ] | |
| return FusedDetectionResult( | |
| dominant=EmotionLabel.NEUTRAL, | |
| dominant_score=1.0, | |
| scores=neutral_scores, | |
| modality_weights={}, | |
| confidence=0.0, | |
| processing_time_ms=0.0, | |
| ) | |
| # Confidence-adjusted weights | |
| weights: dict[str, float] = {} | |
| for mod_name, result in available: | |
| base = self.BASE_WEIGHTS.get(mod_name, 0.2) | |
| weights[mod_name] = base * max(result.confidence, 0.01) | |
| total_w = sum(weights.values()) | |
| if total_w > 0: | |
| weights = {k: v / total_w for k, v in weights.items()} | |
| # Weighted blend | |
| fused: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS} | |
| for mod_name, result in available: | |
| w = weights.get(mod_name, 0.0) | |
| for score in result.scores: | |
| fused[score.label] += score.score * w | |
| scores = [ | |
| EmotionScore(label=label, score=fused[label], confidence=fused[label]) | |
| for label in EMOTION_LABELS | |
| ] | |
| dominant = max(fused, key=fused.get) # type: ignore | |
| return FusedDetectionResult( | |
| dominant=dominant, | |
| dominant_score=fused[dominant], | |
| scores=scores, | |
| face_result=face, | |
| voice_result=voice, | |
| text_result=text, | |
| posture_result=posture, | |
| modality_weights=weights, | |
| confidence=max(r.confidence for _, r in available) * 0.95, | |
| processing_time_ms=(time.time() - start) * 1000, | |
| ) | |
| class EmotionEngine: | |
| """Main EmoSphere engine combining all detectors + fusion.""" | |
| def __init__(self, device: str = "cpu"): | |
| self.device = device | |
| self.face = FaceEmotionDetector(device=device) | |
| self.voice = VoiceEmotionDetector(device=device) | |
| self.text = TextEmotionDetector(device=device) | |
| self.posture = PostureEmotionDetector(device=device) | |
| self.fusion = EmotionFusionEngine() | |
| self._ready = False | |
| def initialize(self) -> None: | |
| """Load all models.""" | |
| print("=" * 50) | |
| print(" EmoSphere Engine — Loading models...") | |
| print("=" * 50) | |
| self.face.load() | |
| self.voice.load() | |
| self.text.load() | |
| self.posture.load() | |
| self._ready = True | |
| print("=" * 50) | |
| print(" All models loaded and ready!") | |
| print(f" Face: {'transformer' if self.face.pipe else 'simulation'}") | |
| print(f" Voice: {'transformer' if self.voice.pipe else 'prosodic'}") | |
| print(f" Text: {self.text.model_type}") | |
| print(f" Posture: {'mediapipe' if self.posture.pose else 'heuristic'}") | |
| print("=" * 50) | |
| def is_ready(self) -> bool: | |
| return self._ready | |
| def models_status(self) -> dict[str, bool]: | |
| return { | |
| "face": self.face.loaded, | |
| "voice": self.voice.loaded, | |
| "text": self.text.loaded, | |
| "posture": self.posture.loaded, | |
| } | |
| # Singleton | |
| engine = EmotionEngine() | |