""" emotion_engine.py — Dual-Layer Emotional Intelligence for Maya Architecture: Layer 1: Audio emotion via SpeechBrain wav2vec2-IEMOCAP (Acoustic) Layer 2: Text emotion via keyword matching (Semantic) Fusion: Fused reading adapts Maya's LLM prompt and TTS parameters. """ import asyncio import numpy as np import torch import io from dataclasses import dataclass, field from typing import Optional from enum import Enum # ── EMOTION TYPES ──────────────────────────────────────────────────────────── class Emotion(str, Enum): CALM = "calm" HAPPY = "happy" CONFUSED = "confused" URGENT = "urgent" # Pain / Emergency FRUSTRATED = "frustrated" ANGRY = "angry" @dataclass class EmotionResult: emotion: Emotion = Emotion.CALM confidence: float = 0.0 audio_emotion: Optional[str] = None text_emotion: Optional[str] = None triggered_words: list = field(default_factory=list) should_escalate: bool = False @dataclass class ResponseProfile: emotion: Emotion tts_pace: float prompt_suffix: str gujarati_opener: str hindi_opener: str english_opener: str # ── RESPONSE PROFILES ──────────────────────────────────────────────────────── RESPONSE_PROFILES: dict[Emotion, ResponseProfile] = { Emotion.CALM: ResponseProfile( Emotion.CALM, 0.92, "", "", "", "" ), Emotion.HAPPY: ResponseProfile( Emotion.HAPPY, 0.95, "\nThe caller sounds happy. Match their energy with warmth.", "ખૂબ સારું! ", "बहुत अच्छा! ", "Wonderful! " ), Emotion.CONFUSED: ResponseProfile( Emotion.CONFUSED, 0.80, "\nThe caller sounds confused. Use simple language and ask ONE question at a time.", "ભલે, ચિંતા ન કરો. ", "ठीक है, चिंता मत करो। ", "No problem, let me help. " ), Emotion.URGENT: ResponseProfile( Emotion.URGENT, 1.00, "\nCRITICAL: The caller is in pain or has an emergency. Acknowledge this, get their name/phone, and say the doctor will call back IMMEDIATELY. Skip standard booking.", "હું સમજી ગઈ, આ ખૂબ અગત્યની વાત છે. ", "मैं समझ गई, यह बहुत ज़रूरी है। ", "I understand, this sounds urgent. " ), Emotion.FRUSTRATED: ResponseProfile( Emotion.FRUSTRATED, 0.88, "\nThe caller sounds frustrated. Keep responses short (max 2 sentences) and offer to have the owner call them back.", "માફ કરશો, હું સમજી ગઈ. ", "माफ़ करें, मैं समझ गई। ", "I apologize for the inconvenience. " ), Emotion.ANGRY: ResponseProfile( Emotion.ANGRY, 0.82, "\nCRITICAL: The caller is ANGRY. Be extremely apologetic and brief. Say the owner will call them personally within minutes.", "મને ખૂબ જ ખેદ છે. ", "मुझे बहुत खेद है। ", "I sincerely apologize. " ), } # ── TEXT KEYWORDS ──────────────────────────────────────────────────────────── TEXT_KEYWORDS: dict[Emotion, dict[str, list[str]]] = { Emotion.URGENT: { "gujarati": ["દુઃખાવો", "પીડા", "ઇમર્જન્સી", "તાત્કાલિક", "અત્યારે જ", "હમણાં જ", "દુઃખે છે", "તકલીફ"], "hindi": ["दर्द", "तकलीफ", "पीड़ा", "इमरजेंसी", "तुरंत", "अभी", "जल्दी", "अर्जेंट"], "english": ["pain", "emergency", "urgent", "immediately", "hurts", "bleeding", "critical"] }, Emotion.FRUSTRATED: { "gujarati": ["કેમ નથી", "કેટલી વાર", "ફરીથી", "સમજાતું નથી", "બરાબર નથી"], "hindi": ["क्यों नहीं", "कितनी बार", "फिर से", "समझ नहीं", "ठीक नहीं"], "english": ["why not", "how long", "again", "already told", "frustrated"] }, Emotion.ANGRY: { "gujarati": ["ગુસ્સો", "ફરિયાદ", "બેકાર", "ખરાબ", "નકામું"], "hindi": ["गुस्सा", "शिकायत", "बेकार", "घटिया", "बकवास"], "english": ["angry", "complain", "useless", "terrible", "worst"] }, Emotion.HAPPY: { "gujarati": ["આભાર", "ધન્યવાદ", "ઉત્તમ", "ખૂબ સરસ", "સારું"], "hindi": ["धन्यवाद", "शुक्रिया", "बहुत अच्छा", "बढ़िया", "सही है"], "english": ["thanks", "thank you", "great", "perfect", "good"] } } # ── AUDIO EMOTION DETECTOR ─────────────────────────────────────────────────── class AudioEmotionDetector: AUDIO_EMOTION_MAP = { "hap": Emotion.HAPPY, "neu": Emotion.CALM, "ang": Emotion.ANGRY, "sad": Emotion.CONFUSED, "fru": Emotion.FRUSTRATED, "fea": Emotion.URGENT } def __init__(self): self._classifier = None self._loaded = False self._failed = False # Permanent failure flag — avoids retry-on-import-error def _ensure_loaded(self): if self._loaded or self._failed: return try: from speechbrain.inference.interfaces import foreign_class self._classifier = foreign_class( source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier", run_opts={"device": "cpu"} ) self._loaded = True print("[Emotion-Audio] SpeechBrain model loaded.") except Exception as e: self._failed = True print(f"[Emotion-Audio] Model unavailable, falling back to text-only: {e}") def classify(self, audio_bytes: bytes) -> tuple[Emotion, float]: self._ensure_loaded() # Gracefully return CALM if model is unavailable if self._failed or not self._loaded or not audio_bytes: return Emotion.CALM, 0.0 try: audio_np = np.frombuffer(audio_bytes, dtype=np.float32) if len(audio_np) < 8000: return Emotion.CALM, 0.0 audio_tensor = torch.FloatTensor(audio_np).unsqueeze(0) out_prob, score, _, text_lab = self._classifier.classify_batch(audio_tensor) raw_label = text_lab[0].lower().strip() confidence = float(score[0].item()) return self.AUDIO_EMOTION_MAP.get(raw_label, Emotion.CALM), confidence except Exception as e: print(f"[Emotion-Audio] classify error: {e}") return Emotion.CALM, 0.0 # ── MAIN ENGINE ─────────────────────────────────────────────────────────────── class EmotionEngine: def __init__(self): self._audio = AudioEmotionDetector() async def analyze_turn(self, audio: bytes, transcript: str, lang: str) -> EmotionResult: res = EmotionResult() # Text Layer t_lower = transcript.lower() for emo, lang_map in TEXT_KEYWORDS.items(): words = lang_map.get(lang, []) + lang_map.get("english", []) matches = [w for w in words if w in t_lower] if matches: res.text_emotion = emo.value res.triggered_words = matches res.emotion = emo res.confidence = 0.8 break # Audio Layer (Async) if not res.text_emotion: loop = asyncio.get_event_loop() a_emo, a_conf = await loop.run_in_executor(None, self._audio.classify, audio) res.audio_emotion = a_emo.value res.emotion = a_emo res.confidence = a_conf res.should_escalate = res.emotion in [Emotion.ANGRY, Emotion.URGENT] print(f"[Emotion] Detected: {res.emotion.value} (conf={res.confidence:.2f})") return res