Spaces:

chariscait
/

EmoSphere

Running

App Files Files Community

chariscait commited on Apr 13

Commit

ed1c3e4

verified ·

1 Parent(s): 7f331f4

Upload emotion_engine.py with huggingface_hub

Browse files

Files changed (1) hide show

emotion_engine.py +16 -6

emotion_engine.py CHANGED Viewed

@@ -20,21 +20,24 @@ from models import (
 from face_detector import FaceEmotionDetector
 from voice_detector import VoiceEmotionDetector
 from text_detector import TextEmotionDetector
 class EmotionFusionEngine:
-    """Weighted average fusion of face + voice + text modalities.
     Weights adapt based on modality confidence:
-      face:  0.45 (most informative for basic emotions)
-      voice: 0.35 (prosody reveals emotion intensity)
-      text:  0.20 (semantic content)
     """
     BASE_WEIGHTS = {
-        "face": 0.45,
-        "voice": 0.35,
         "text": 0.20,
     }
     def fuse(
@@ -42,6 +45,7 @@ class EmotionFusionEngine:
         face: Optional[EmotionDetectionResult] = None,
         voice: Optional[EmotionDetectionResult] = None,
         text: Optional[EmotionDetectionResult] = None,
     ) -> FusedDetectionResult:
         """Fuse available modality results."""
         start = time.time()
@@ -50,6 +54,7 @@ class EmotionFusionEngine:
         if face: available.append(("face", face))
         if voice: available.append(("voice", voice))
         if text: available.append(("text", text))
         if not available:
             neutral_scores = [
@@ -95,6 +100,7 @@ class EmotionFusionEngine:
             face_result=face,
             voice_result=voice,
             text_result=text,
             modality_weights=weights,
             confidence=max(r.confidence for _, r in available) * 0.95,
             processing_time_ms=(time.time() - start) * 1000,
@@ -109,6 +115,7 @@ class EmotionEngine:
         self.face = FaceEmotionDetector(device=device)
         self.voice = VoiceEmotionDetector(device=device)
         self.text = TextEmotionDetector(device=device)
         self.fusion = EmotionFusionEngine()
         self._ready = False
@@ -120,12 +127,14 @@ class EmotionEngine:
         self.face.load()
         self.voice.load()
         self.text.load()
         self._ready = True
         print("=" * 50)
         print("  All models loaded and ready!")
         print(f"  Face: {'transformer' if self.face.pipe else 'simulation'}")
         print(f"  Voice: {'transformer' if self.voice.pipe else 'prosodic'}")
         print(f"  Text: {self.text.model_type}")
         print("=" * 50)
     @property
@@ -138,6 +147,7 @@ class EmotionEngine:
             "face": self.face.loaded,
             "voice": self.voice.loaded,
             "text": self.text.loaded,
         }

 from face_detector import FaceEmotionDetector
 from voice_detector import VoiceEmotionDetector
 from text_detector import TextEmotionDetector
+from posture_detector import PostureEmotionDetector
 class EmotionFusionEngine:
+    """Weighted average fusion of face + voice + text + posture modalities.
     Weights adapt based on modality confidence:
+      face:    0.35 (most informative for basic emotions)
+      voice:   0.25 (prosody reveals emotion intensity)
+      text:    0.20 (semantic content)
+      posture: 0.20 (body language and gestures)
     """
     BASE_WEIGHTS = {
+        "face": 0.35,
+        "voice": 0.25,
         "text": 0.20,
+        "posture": 0.20,
     }
     def fuse(
         face: Optional[EmotionDetectionResult] = None,
         voice: Optional[EmotionDetectionResult] = None,
         text: Optional[EmotionDetectionResult] = None,
+        posture: Optional[EmotionDetectionResult] = None,
     ) -> FusedDetectionResult:
         """Fuse available modality results."""
         start = time.time()
         if face: available.append(("face", face))
         if voice: available.append(("voice", voice))
         if text: available.append(("text", text))
+        if posture: available.append(("posture", posture))
         if not available:
             neutral_scores = [
             face_result=face,
             voice_result=voice,
             text_result=text,
+            posture_result=posture,
             modality_weights=weights,
             confidence=max(r.confidence for _, r in available) * 0.95,
             processing_time_ms=(time.time() - start) * 1000,
         self.face = FaceEmotionDetector(device=device)
         self.voice = VoiceEmotionDetector(device=device)
         self.text = TextEmotionDetector(device=device)
+        self.posture = PostureEmotionDetector(device=device)
         self.fusion = EmotionFusionEngine()
         self._ready = False
         self.face.load()
         self.voice.load()
         self.text.load()
+        self.posture.load()
         self._ready = True
         print("=" * 50)
         print("  All models loaded and ready!")
         print(f"  Face: {'transformer' if self.face.pipe else 'simulation'}")
         print(f"  Voice: {'transformer' if self.voice.pipe else 'prosodic'}")
         print(f"  Text: {self.text.model_type}")
+        print(f"  Posture: {'mediapipe' if self.posture.pose else 'heuristic'}")
         print("=" * 50)
     @property
             "face": self.face.loaded,
             "voice": self.voice.loaded,
             "text": self.text.loaded,
+            "posture": self.posture.loaded,
         }