Spaces:
Running
Running
Upload emotion_engine.py with huggingface_hub
Browse files- emotion_engine.py +16 -6
emotion_engine.py
CHANGED
|
@@ -20,21 +20,24 @@ from models import (
|
|
| 20 |
from face_detector import FaceEmotionDetector
|
| 21 |
from voice_detector import VoiceEmotionDetector
|
| 22 |
from text_detector import TextEmotionDetector
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
class EmotionFusionEngine:
|
| 26 |
-
"""Weighted average fusion of face + voice + text modalities.
|
| 27 |
|
| 28 |
Weights adapt based on modality confidence:
|
| 29 |
-
face:
|
| 30 |
-
voice:
|
| 31 |
-
text:
|
|
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
BASE_WEIGHTS = {
|
| 35 |
-
"face": 0.
|
| 36 |
-
"voice": 0.
|
| 37 |
"text": 0.20,
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
def fuse(
|
|
@@ -42,6 +45,7 @@ class EmotionFusionEngine:
|
|
| 42 |
face: Optional[EmotionDetectionResult] = None,
|
| 43 |
voice: Optional[EmotionDetectionResult] = None,
|
| 44 |
text: Optional[EmotionDetectionResult] = None,
|
|
|
|
| 45 |
) -> FusedDetectionResult:
|
| 46 |
"""Fuse available modality results."""
|
| 47 |
start = time.time()
|
|
@@ -50,6 +54,7 @@ class EmotionFusionEngine:
|
|
| 50 |
if face: available.append(("face", face))
|
| 51 |
if voice: available.append(("voice", voice))
|
| 52 |
if text: available.append(("text", text))
|
|
|
|
| 53 |
|
| 54 |
if not available:
|
| 55 |
neutral_scores = [
|
|
@@ -95,6 +100,7 @@ class EmotionFusionEngine:
|
|
| 95 |
face_result=face,
|
| 96 |
voice_result=voice,
|
| 97 |
text_result=text,
|
|
|
|
| 98 |
modality_weights=weights,
|
| 99 |
confidence=max(r.confidence for _, r in available) * 0.95,
|
| 100 |
processing_time_ms=(time.time() - start) * 1000,
|
|
@@ -109,6 +115,7 @@ class EmotionEngine:
|
|
| 109 |
self.face = FaceEmotionDetector(device=device)
|
| 110 |
self.voice = VoiceEmotionDetector(device=device)
|
| 111 |
self.text = TextEmotionDetector(device=device)
|
|
|
|
| 112 |
self.fusion = EmotionFusionEngine()
|
| 113 |
self._ready = False
|
| 114 |
|
|
@@ -120,12 +127,14 @@ class EmotionEngine:
|
|
| 120 |
self.face.load()
|
| 121 |
self.voice.load()
|
| 122 |
self.text.load()
|
|
|
|
| 123 |
self._ready = True
|
| 124 |
print("=" * 50)
|
| 125 |
print(" All models loaded and ready!")
|
| 126 |
print(f" Face: {'transformer' if self.face.pipe else 'simulation'}")
|
| 127 |
print(f" Voice: {'transformer' if self.voice.pipe else 'prosodic'}")
|
| 128 |
print(f" Text: {self.text.model_type}")
|
|
|
|
| 129 |
print("=" * 50)
|
| 130 |
|
| 131 |
@property
|
|
@@ -138,6 +147,7 @@ class EmotionEngine:
|
|
| 138 |
"face": self.face.loaded,
|
| 139 |
"voice": self.voice.loaded,
|
| 140 |
"text": self.text.loaded,
|
|
|
|
| 141 |
}
|
| 142 |
|
| 143 |
|
|
|
|
| 20 |
from face_detector import FaceEmotionDetector
|
| 21 |
from voice_detector import VoiceEmotionDetector
|
| 22 |
from text_detector import TextEmotionDetector
|
| 23 |
+
from posture_detector import PostureEmotionDetector
|
| 24 |
|
| 25 |
|
| 26 |
class EmotionFusionEngine:
|
| 27 |
+
"""Weighted average fusion of face + voice + text + posture modalities.
|
| 28 |
|
| 29 |
Weights adapt based on modality confidence:
|
| 30 |
+
face: 0.35 (most informative for basic emotions)
|
| 31 |
+
voice: 0.25 (prosody reveals emotion intensity)
|
| 32 |
+
text: 0.20 (semantic content)
|
| 33 |
+
posture: 0.20 (body language and gestures)
|
| 34 |
"""
|
| 35 |
|
| 36 |
BASE_WEIGHTS = {
|
| 37 |
+
"face": 0.35,
|
| 38 |
+
"voice": 0.25,
|
| 39 |
"text": 0.20,
|
| 40 |
+
"posture": 0.20,
|
| 41 |
}
|
| 42 |
|
| 43 |
def fuse(
|
|
|
|
| 45 |
face: Optional[EmotionDetectionResult] = None,
|
| 46 |
voice: Optional[EmotionDetectionResult] = None,
|
| 47 |
text: Optional[EmotionDetectionResult] = None,
|
| 48 |
+
posture: Optional[EmotionDetectionResult] = None,
|
| 49 |
) -> FusedDetectionResult:
|
| 50 |
"""Fuse available modality results."""
|
| 51 |
start = time.time()
|
|
|
|
| 54 |
if face: available.append(("face", face))
|
| 55 |
if voice: available.append(("voice", voice))
|
| 56 |
if text: available.append(("text", text))
|
| 57 |
+
if posture: available.append(("posture", posture))
|
| 58 |
|
| 59 |
if not available:
|
| 60 |
neutral_scores = [
|
|
|
|
| 100 |
face_result=face,
|
| 101 |
voice_result=voice,
|
| 102 |
text_result=text,
|
| 103 |
+
posture_result=posture,
|
| 104 |
modality_weights=weights,
|
| 105 |
confidence=max(r.confidence for _, r in available) * 0.95,
|
| 106 |
processing_time_ms=(time.time() - start) * 1000,
|
|
|
|
| 115 |
self.face = FaceEmotionDetector(device=device)
|
| 116 |
self.voice = VoiceEmotionDetector(device=device)
|
| 117 |
self.text = TextEmotionDetector(device=device)
|
| 118 |
+
self.posture = PostureEmotionDetector(device=device)
|
| 119 |
self.fusion = EmotionFusionEngine()
|
| 120 |
self._ready = False
|
| 121 |
|
|
|
|
| 127 |
self.face.load()
|
| 128 |
self.voice.load()
|
| 129 |
self.text.load()
|
| 130 |
+
self.posture.load()
|
| 131 |
self._ready = True
|
| 132 |
print("=" * 50)
|
| 133 |
print(" All models loaded and ready!")
|
| 134 |
print(f" Face: {'transformer' if self.face.pipe else 'simulation'}")
|
| 135 |
print(f" Voice: {'transformer' if self.voice.pipe else 'prosodic'}")
|
| 136 |
print(f" Text: {self.text.model_type}")
|
| 137 |
+
print(f" Posture: {'mediapipe' if self.posture.pose else 'heuristic'}")
|
| 138 |
print("=" * 50)
|
| 139 |
|
| 140 |
@property
|
|
|
|
| 147 |
"face": self.face.loaded,
|
| 148 |
"voice": self.voice.loaded,
|
| 149 |
"text": self.text.loaded,
|
| 150 |
+
"posture": self.posture.loaded,
|
| 151 |
}
|
| 152 |
|
| 153 |
|