Spaces:
Sleeping
Sleeping
| """EmoSphere Fuzzy Fusion Engine — Mamdani fuzzy inference for 9 emotions. | |
| Ported from Hermyon's FuzzyFusionEngine but adapted for EmoSphere's | |
| consumer-grade 9-emotion label set (joy, sadness, surprise, fear, | |
| disgust, anger, neutral, love, calm). | |
| Pipeline: | |
| 1. Fuzzification: map detector scores to 5 membership levels | |
| 2. Rule evaluation: fire agreement/conflict IF-THEN rules | |
| 3. Defuzzification: centroid method to produce crisp scores | |
| 4. Normalization: output valid probability distribution | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| from typing import Callable, Optional | |
| import numpy as np | |
| from models import ( | |
| EmotionLabel, | |
| EMOTION_LABELS, | |
| EmotionScore, | |
| EmotionDetectionResult, | |
| FusedDetectionResult, | |
| CulturalRegion, | |
| ) | |
| # ===================================================================== | |
| # Trapezoidal Membership Functions (5 levels) | |
| # ===================================================================== | |
| FUZZY_LEVELS = ["absent", "low", "moderate", "high", "very_high"] | |
| LEVEL_CENTROIDS = { | |
| "absent": 0.0, | |
| "low": 0.15, | |
| "moderate": 0.35, | |
| "high": 0.60, | |
| "very_high": 0.85, | |
| } | |
| def _trapezoid(x: float, a: float, b: float, c: float, d: float) -> float: | |
| """Standard trapezoidal membership function.""" | |
| if x <= a or x >= d: | |
| return 0.0 | |
| if b <= x <= c: | |
| return 1.0 | |
| if a < x < b: | |
| return (x - a) / (b - a) | |
| return (d - x) / (d - c) | |
| def _left_shoulder(x: float, a: float, b: float) -> float: | |
| if x <= a: | |
| return 1.0 | |
| if x >= b: | |
| return 0.0 | |
| return (b - x) / (b - a) | |
| def _right_shoulder(x: float, a: float, b: float) -> float: | |
| if x <= a: | |
| return 0.0 | |
| if x >= b: | |
| return 1.0 | |
| return (x - a) / (b - a) | |
| def fuzzify(crisp_value: float) -> dict[str, float]: | |
| """Convert a crisp probability [0,1] to fuzzy membership degrees.""" | |
| return { | |
| "absent": _left_shoulder(crisp_value, 0.05, 0.12), | |
| "low": _trapezoid(crisp_value, 0.05, 0.12, 0.20, 0.30), | |
| "moderate": _trapezoid(crisp_value, 0.20, 0.30, 0.45, 0.55), | |
| "high": _trapezoid(crisp_value, 0.45, 0.55, 0.70, 0.80), | |
| "very_high": _right_shoulder(crisp_value, 0.70, 0.80), | |
| } | |
| # ===================================================================== | |
| # Fuzzy Rule Definitions | |
| # ===================================================================== | |
| class FuzzyRule: | |
| name: str | |
| category: str # "agreement" or "conflict" | |
| condition: Callable # (face_fuzzy, voice_fuzzy, text_fuzzy, posture_fuzzy) -> float | |
| consequent: dict[str, tuple[str, float]] # EmotionLabel.value -> (level, weight_modifier) | |
| priority: int = 1 | |
| def _get(fuzzy_states: Optional[dict], emotion: str, level: str) -> float: | |
| """Safely get a fuzzy membership degree from a modality's fuzzified output.""" | |
| if fuzzy_states is None: | |
| return 0.0 | |
| state = fuzzy_states.get(emotion, {}) | |
| return state.get(level, 0.0) | |
| def _above(fuzzy_states: Optional[dict], emotion: str, min_level: str) -> float: | |
| """Get max membership at or above a given level.""" | |
| if fuzzy_states is None: | |
| return 0.0 | |
| state = fuzzy_states.get(emotion, {}) | |
| idx = FUZZY_LEVELS.index(min_level) | |
| return max(state.get(lv, 0.0) for lv in FUZZY_LEVELS[idx:]) | |
| def _build_rules() -> list[FuzzyRule]: | |
| """Build the consumer-grade fuzzy rule set for 9 emotions.""" | |
| rules: list[FuzzyRule] = [] | |
| # ── AGREEMENT RULES ────────────────────────────────────────── | |
| # R1: Genuine joy -- face+voice+text all show joy | |
| rules.append(FuzzyRule( | |
| name="R01_genuine_joy", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "joy", "high"), | |
| _above(v, "joy", "moderate"), | |
| _above(t, "joy", "moderate"), | |
| ), | |
| consequent={ | |
| "joy": ("very_high", 1.3), | |
| "love": ("moderate", 0.8), | |
| }, | |
| priority=2, | |
| )) | |
| # R2: Deep sadness -- confirmed across modalities | |
| rules.append(FuzzyRule( | |
| name="R02_deep_sadness", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "sadness", "high"), | |
| _above(v, "sadness", "moderate"), | |
| _above(t, "sadness", "moderate"), | |
| ), | |
| consequent={ | |
| "sadness": ("very_high", 1.3), | |
| "neutral": ("absent", 0.3), | |
| }, | |
| priority=2, | |
| )) | |
| # R3: Confirmed fear -- face+voice+posture | |
| rules.append(FuzzyRule( | |
| name="R03_confirmed_fear", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "fear", "moderate"), | |
| _above(v, "fear", "moderate"), | |
| max(_above(p, "fear", "moderate") if p else 0.0, | |
| _above(t, "fear", "moderate")), | |
| ), | |
| consequent={ | |
| "fear": ("very_high", 1.4), | |
| "calm": ("absent", 0.2), | |
| }, | |
| priority=2, | |
| )) | |
| # R4: Confirmed disgust -- face+voice+text agree | |
| rules.append(FuzzyRule( | |
| name="R04_confirmed_disgust", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "disgust", "moderate"), | |
| _above(v, "disgust", "moderate"), | |
| _above(t, "disgust", "low"), | |
| ), | |
| consequent={ | |
| "disgust": ("very_high", 1.3), | |
| }, | |
| priority=2, | |
| )) | |
| # R5: Genuine surprise -- face+voice agree | |
| rules.append(FuzzyRule( | |
| name="R05_genuine_surprise", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "surprise", "high"), | |
| _above(v, "surprise", "moderate"), | |
| ), | |
| consequent={ | |
| "surprise": ("very_high", 1.2), | |
| }, | |
| priority=2, | |
| )) | |
| # R6: Deep love -- text+voice+face gentle | |
| rules.append(FuzzyRule( | |
| name="R06_deep_love", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(t, "love", "high"), | |
| max(_above(f, "joy", "low"), _above(f, "calm", "low"), 0.2), | |
| ), | |
| consequent={ | |
| "love": ("very_high", 1.3), | |
| "joy": ("moderate", 0.8), | |
| "calm": ("moderate", 0.7), | |
| }, | |
| priority=2, | |
| )) | |
| # R7: Deep calm -- all modalities relaxed | |
| rules.append(FuzzyRule( | |
| name="R07_deep_calm", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "calm", "moderate"), | |
| _above(v, "calm", "moderate"), | |
| max(_above(t, "calm", "low"), _above(t, "neutral", "moderate")), | |
| ), | |
| consequent={ | |
| "calm": ("very_high", 1.2), | |
| "neutral": ("moderate", 0.7), | |
| }, | |
| priority=1, | |
| )) | |
| # ── CONFLICT RULES ─────────────────────────────────────────── | |
| # R8: Smile masking sadness -- face happy but voice sad | |
| rules.append(FuzzyRule( | |
| name="R08_smile_masking_sadness", | |
| category="conflict", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "joy", "moderate"), | |
| _above(v, "sadness", "moderate"), | |
| ), | |
| consequent={ | |
| "sadness": ("high", 1.4), | |
| "joy": ("low", 0.4), | |
| }, | |
| priority=3, | |
| )) | |
| # R9: Suppressed anger/disgust -- face neutral but voice tense | |
| rules.append(FuzzyRule( | |
| name="R09_suppressed_disgust", | |
| category="conflict", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "neutral", "high"), | |
| _above(v, "disgust", "moderate"), | |
| ), | |
| consequent={ | |
| "disgust": ("high", 1.5), | |
| "anger": ("moderate", 1.2), | |
| "neutral": ("low", 0.3), | |
| }, | |
| priority=3, | |
| )) | |
| # R10: Hidden anxiety -- posture tense but face/voice neutral | |
| rules.append(FuzzyRule( | |
| name="R10_hidden_anxiety", | |
| category="conflict", | |
| condition=lambda f, v, t, p: min( | |
| _above(p, "fear", "moderate") if p else 0.0, | |
| _above(f, "neutral", "moderate"), | |
| ), | |
| consequent={ | |
| "fear": ("high", 1.4), | |
| "neutral": ("low", 0.3), | |
| }, | |
| priority=3, | |
| )) | |
| # R11: Social desirability -- face+voice happy but text reveals negative | |
| rules.append(FuzzyRule( | |
| name="R11_social_desirability", | |
| category="conflict", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "joy", "moderate"), | |
| max(_above(v, "joy", "low"), 0.15), | |
| max( | |
| _above(t, "sadness", "moderate"), | |
| _above(t, "fear", "moderate"), | |
| _above(t, "disgust", "moderate"), | |
| ), | |
| ), | |
| consequent={ | |
| "sadness": ("high", 1.3), | |
| "fear": ("moderate", 1.2), | |
| "joy": ("low", 0.4), | |
| }, | |
| priority=3, | |
| )) | |
| # R12: Confirmed anger -- face+voice+text agree | |
| rules.append(FuzzyRule( | |
| name="R12_confirmed_anger", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "anger", "moderate"), | |
| max(_above(v, "anger", "moderate"), | |
| _above(t, "anger", "moderate")), | |
| ), | |
| consequent={ | |
| "anger": ("very_high", 1.3), | |
| "calm": ("absent", 0.2), | |
| }, | |
| priority=2, | |
| )) | |
| # ── VOICE/TEXT OVERRIDE RULES ──────────────────────────────── | |
| # These ensure that voice and text can strongly influence the | |
| # result even when face shows a different emotion. | |
| # R13: Voice-driven emotion -- voice is strong, override face | |
| for emo in ["joy", "sadness", "fear", "surprise", "anger", "disgust"]: | |
| rules.append(FuzzyRule( | |
| name=f"R13_voice_{emo}", | |
| category="agreement", | |
| condition=lambda f, v, t, p, e=emo: ( | |
| _above(v, e, "high") * 0.8 | |
| if _above(v, e, "high") > _above(f, e, "low") | |
| else 0.0 | |
| ), | |
| consequent={ | |
| emo: ("high", 1.2), | |
| }, | |
| priority=2, | |
| )) | |
| # R14: Text/speech-driven emotion -- text strongly indicates an emotion | |
| for emo in ["joy", "sadness", "fear", "surprise", "anger", "disgust", "love"]: | |
| rules.append(FuzzyRule( | |
| name=f"R14_text_{emo}", | |
| category="agreement", | |
| condition=lambda f, v, t, p, e=emo: ( | |
| _above(t, e, "high") * 0.8 | |
| if _above(t, e, "high") > _above(f, e, "low") | |
| else 0.0 | |
| ), | |
| consequent={ | |
| emo: ("high", 1.2), | |
| }, | |
| priority=2, | |
| )) | |
| # R15: Posture-driven emotion -- posture/gesture strongly indicates | |
| for emo in ["fear", "anger", "sadness", "calm"]: | |
| rules.append(FuzzyRule( | |
| name=f"R15_posture_{emo}", | |
| category="agreement", | |
| condition=lambda f, v, t, p, e=emo: ( | |
| _above(p, e, "high") * 0.7 if p else 0.0 | |
| ), | |
| consequent={ | |
| emo: ("moderate", 1.1), | |
| }, | |
| priority=1, | |
| )) | |
| # R16: Face contradicted -- face says X but 2+ other modalities say Y | |
| # This dampens face when it disagrees with majority | |
| rules.append(FuzzyRule( | |
| name="R16_face_contradicted_joy", | |
| category="conflict", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "joy", "high"), | |
| max( | |
| min(_above(v, "sadness", "moderate"), _above(t, "sadness", "low")), | |
| min(_above(v, "anger", "moderate"), _above(t, "anger", "low")), | |
| min(_above(v, "fear", "moderate"), _above(t, "fear", "low")), | |
| ), | |
| ), | |
| consequent={ | |
| "joy": ("low", 0.3), | |
| "sadness": ("moderate", 1.2), | |
| }, | |
| priority=3, | |
| )) | |
| rules.append(FuzzyRule( | |
| name="R17_face_contradicted_neutral", | |
| category="conflict", | |
| condition=lambda f, v, t, p: min( | |
| _above(f, "neutral", "high"), | |
| max( | |
| _above(v, "sadness", "high"), | |
| _above(v, "anger", "high"), | |
| _above(v, "fear", "high"), | |
| _above(t, "sadness", "high"), | |
| _above(t, "anger", "high"), | |
| ), | |
| ), | |
| consequent={ | |
| "neutral": ("low", 0.3), | |
| }, | |
| priority=3, | |
| )) | |
| # R18: Voice+text agree but face differs -- trust voice+text | |
| rules.append(FuzzyRule( | |
| name="R18_voice_text_agree_surprise", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(v, "surprise", "moderate"), | |
| _above(t, "surprise", "low"), | |
| ) * 0.9, | |
| consequent={ | |
| "surprise": ("high", 1.3), | |
| }, | |
| priority=2, | |
| )) | |
| rules.append(FuzzyRule( | |
| name="R19_voice_text_agree_sadness", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(v, "sadness", "moderate"), | |
| _above(t, "sadness", "low"), | |
| ) * 0.9, | |
| consequent={ | |
| "sadness": ("high", 1.3), | |
| }, | |
| priority=2, | |
| )) | |
| rules.append(FuzzyRule( | |
| name="R20_voice_text_agree_anger", | |
| category="agreement", | |
| condition=lambda f, v, t, p: min( | |
| _above(v, "anger", "moderate"), | |
| _above(t, "anger", "low"), | |
| ) * 0.9, | |
| consequent={ | |
| "anger": ("high", 1.3), | |
| }, | |
| priority=2, | |
| )) | |
| return rules | |
| # ===================================================================== | |
| # Defuzzifier (Centroid Method) | |
| # ===================================================================== | |
| def _defuzzify_centroid( | |
| base_fuzzy: dict[str, dict[str, float]], | |
| fired_rules: list[tuple[FuzzyRule, float]], | |
| base_crisp: dict[EmotionLabel, float], | |
| ) -> dict[EmotionLabel, float]: | |
| """Centroid defuzzification with rule blending.""" | |
| result = {label: base_crisp.get(label, 0.0) for label in EMOTION_LABELS} | |
| # Apply rule adjustments | |
| rule_targets: dict[str, float] = {} | |
| rule_activations: dict[str, float] = {} | |
| for rule, activation in fired_rules: | |
| for emotion_val, (target_level, weight_mod) in rule.consequent.items(): | |
| target_centroid = LEVEL_CENTROIDS.get(target_level, 0.35) | |
| effective = target_centroid * activation * weight_mod | |
| if emotion_val not in rule_targets or effective > rule_targets[emotion_val]: | |
| rule_targets[emotion_val] = effective | |
| rule_activations[emotion_val] = activation | |
| for label in EMOTION_LABELS: | |
| val = label.value | |
| if val in rule_activations: | |
| # Stronger rule blending — rules are the primary decision mechanism | |
| blend = min(rule_activations[val] * 1.3, 1.0) | |
| result[label] = (1.0 - blend) * result[label] + blend * rule_targets[val] | |
| # Centroid refinement from fuzzy memberships (lighter touch) | |
| for label in EMOTION_LABELS: | |
| memberships = base_fuzzy.get(label.value, {}) | |
| numerator = 0.0 | |
| denominator = 0.0 | |
| for level, mu in memberships.items(): | |
| if mu > 0 and level in LEVEL_CENTROIDS: | |
| numerator += mu * LEVEL_CENTROIDS[level] | |
| denominator += mu | |
| if denominator > 0: | |
| cog = numerator / denominator | |
| result[label] = 0.8 * result[label] + 0.2 * cog | |
| # Normalize | |
| result = {k: max(v, 0.0) for k, v in result.items()} | |
| total = sum(result.values()) | |
| if total > 0: | |
| result = {k: v / total for k, v in result.items()} | |
| else: | |
| result = {label: 1.0 / len(EMOTION_LABELS) for label in EMOTION_LABELS} | |
| return result | |
| # ===================================================================== | |
| # FuzzyFusionEngine | |
| # ===================================================================== | |
| class FuzzyFusionEngine: | |
| """Mamdani-style fuzzy inference for EmoSphere's 9 emotions. | |
| Replaces simple weighted averaging with fuzzy rule-based fusion that | |
| can detect cross-modal agreement, conflict, and masking patterns. | |
| Modality weights (equal contribution when all active): | |
| face: 0.25, voice: 0.25, text: 0.25, posture: 0.25 | |
| The fuzzy rules are the primary decision mechanism — these weights | |
| only provide the initial crisp baseline that rules then modify. | |
| """ | |
| BASE_WEIGHTS = { | |
| "face": 0.25, | |
| "voice": 0.25, | |
| "text": 0.25, | |
| "posture": 0.25, | |
| } | |
| def __init__(self): | |
| self.rules = _build_rules() | |
| def fuse( | |
| self, | |
| face: Optional[EmotionDetectionResult] = None, | |
| voice: Optional[EmotionDetectionResult] = None, | |
| text: Optional[EmotionDetectionResult] = None, | |
| posture: Optional[EmotionDetectionResult] = None, | |
| ) -> FusedDetectionResult: | |
| """Fuse available modality results using fuzzy inference.""" | |
| import time | |
| start = time.time() | |
| available: list[tuple[str, EmotionDetectionResult]] = [] | |
| if face: | |
| available.append(("face", face)) | |
| if voice: | |
| available.append(("voice", voice)) | |
| if text: | |
| available.append(("text", text)) | |
| if posture: | |
| available.append(("posture", posture)) | |
| if not available: | |
| neutral_scores = [ | |
| EmotionScore(label=label, score=1.0 if label == EmotionLabel.NEUTRAL else 0.0, confidence=0.0) | |
| for label in EMOTION_LABELS | |
| ] | |
| return FusedDetectionResult( | |
| dominant=EmotionLabel.NEUTRAL, | |
| dominant_score=1.0, | |
| scores=neutral_scores, | |
| modality_weights={}, | |
| confidence=0.0, | |
| processing_time_ms=0.0, | |
| ) | |
| # -- Step 1: Extract score dicts from each modality -- | |
| modality_scores: dict[str, dict[EmotionLabel, float]] = {} | |
| confidences: dict[str, float] = {} | |
| for mod_name, result in available: | |
| scores = {s.label: s.score for s in result.scores} | |
| modality_scores[mod_name] = scores | |
| confidences[mod_name] = max(result.confidence, 0.01) | |
| # -- Step 2: Equal weights for all available modalities -- | |
| n = len(modality_scores) | |
| weights: dict[str, float] = {mod: 1.0 / n for mod in modality_scores} | |
| # -- Step 3: Weighted baseline blend -- | |
| base_crisp: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS} | |
| for mod_name, scores in modality_scores.items(): | |
| w = weights.get(mod_name, 0.0) | |
| for label in EMOTION_LABELS: | |
| base_crisp[label] += scores.get(label, 0.0) * w | |
| # -- Step 4: Fuzzification -- | |
| modality_fuzzy: dict[str, dict[str, dict[str, float]]] = {} | |
| for mod_name, scores in modality_scores.items(): | |
| fuzzy_states = {} | |
| for label in EMOTION_LABELS: | |
| fuzzy_states[label.value] = fuzzify(scores.get(label, 0.0)) | |
| modality_fuzzy[mod_name] = fuzzy_states | |
| base_fuzzy: dict[str, dict[str, float]] = {} | |
| for label in EMOTION_LABELS: | |
| base_fuzzy[label.value] = fuzzify(base_crisp[label]) | |
| # -- Step 5: Rule evaluation -- | |
| face_fuzzy = modality_fuzzy.get("face") | |
| voice_fuzzy = modality_fuzzy.get("voice") | |
| text_fuzzy = modality_fuzzy.get("text") | |
| posture_fuzzy = modality_fuzzy.get("posture") | |
| fired_rules: list[tuple[FuzzyRule, float]] = [] | |
| for rule in self.rules: | |
| try: | |
| activation = rule.condition(face_fuzzy, voice_fuzzy, text_fuzzy, posture_fuzzy) | |
| activation = max(0.0, min(1.0, float(activation))) | |
| if activation > 0.05: | |
| fired_rules.append((rule, activation)) | |
| except (TypeError, KeyError, ValueError): | |
| continue | |
| fired_rules.sort(key=lambda x: (x[0].priority, x[1]), reverse=True) | |
| # -- Step 6: Defuzzification -- | |
| fused = _defuzzify_centroid(base_fuzzy, fired_rules, base_crisp) | |
| # -- Build result -- | |
| scores = [ | |
| EmotionScore(label=label, score=fused[label], confidence=fused[label]) | |
| for label in EMOTION_LABELS | |
| ] | |
| dominant = max(fused, key=fused.get) # type: ignore | |
| return FusedDetectionResult( | |
| dominant=dominant, | |
| dominant_score=fused[dominant], | |
| scores=scores, | |
| face_result=face, | |
| voice_result=voice, | |
| text_result=text, | |
| posture_result=posture, | |
| modality_weights=weights, | |
| confidence=max(r.confidence for _, r in available) * 0.95, | |
| processing_time_ms=(time.time() - start) * 1000, | |
| ) | |
| def fired_rule_names(self) -> list[str]: | |
| """Convenience — last fusion's fired rules. For reporting, call fuse() and inspect.""" | |
| return [] | |
| def num_rules(self) -> int: | |
| return len(self.rules) | |