"""Text Emotion Detector — Real inference using DistilBERT/BERT. Two modes: 1. HuggingFace text-classification pipeline (accurate, needs model download) 2. Keyword + emoji lexicon analysis (fast, no dependencies, instant) Both run locally. No data sent anywhere. """ from __future__ import annotations import re import time from typing import Optional import numpy as np try: from transformers import pipeline HAS_TRANSFORMERS = True except ImportError: HAS_TRANSFORMERS = False from models import ( EmotionLabel, EMOTION_LABELS, EmotionScore, EmotionDetectionResult, CulturalRegion, ) # ── Text emotion models (HuggingFace, public) ─────────────────────── TEXT_MODELS = [ "j-hartmann/emotion-english-distilroberta-base", # 7-class, excellent "SamLowe/roberta-base-go_emotions", # 28-class GoEmotions "bhadresh-savani/distilbert-base-uncased-emotion", # 6-class ] # GoEmotions → EmoSphere mapping (for multi-label models) GOEMOTIONS_MAP = { "admiration": EmotionLabel.LOVE, "amusement": EmotionLabel.JOY, "anger": EmotionLabel.ANGER, "annoyance": EmotionLabel.ANGER, "approval": EmotionLabel.JOY, "caring": EmotionLabel.LOVE, "confusion": EmotionLabel.SURPRISE, "curiosity": EmotionLabel.SURPRISE, "desire": EmotionLabel.LOVE, "disappointment": EmotionLabel.SADNESS, "disapproval": EmotionLabel.DISGUST, "disgust": EmotionLabel.DISGUST, "embarrassment": EmotionLabel.FEAR, "excitement": EmotionLabel.JOY, "fear": EmotionLabel.FEAR, "gratitude": EmotionLabel.LOVE, "grief": EmotionLabel.SADNESS, "joy": EmotionLabel.JOY, "love": EmotionLabel.LOVE, "nervousness": EmotionLabel.FEAR, "optimism": EmotionLabel.JOY, "pride": EmotionLabel.JOY, "realization": EmotionLabel.SURPRISE, "relief": EmotionLabel.CALM, "remorse": EmotionLabel.SADNESS, "sadness": EmotionLabel.SADNESS, "surprise": EmotionLabel.SURPRISE, "neutral": EmotionLabel.NEUTRAL, } # Simple 6/7-class model mapping SIMPLE_MAP = { "anger": EmotionLabel.ANGER, "angry": EmotionLabel.ANGER, "disgust": EmotionLabel.DISGUST, "fear": EmotionLabel.FEAR, "happy": EmotionLabel.JOY, "joy": EmotionLabel.JOY, "love": EmotionLabel.LOVE, "sad": EmotionLabel.SADNESS, "sadness": EmotionLabel.SADNESS, "surprise": EmotionLabel.SURPRISE, "neutral": EmotionLabel.NEUTRAL, } # ── Keyword Lexicons ───────────────────────────────────────────────── KEYWORDS: dict[EmotionLabel, list[str]] = { EmotionLabel.JOY: [ "happy", "glad", "excited", "wonderful", "great", "amazing", "awesome", "fantastic", "yay", "smile", "laugh", "fun", "enjoy", "pleased", "delighted", "cheerful", "thrilled", "blessed", "grateful", "ecstatic", "brilliant", "perfect", "excellent", "magnificent", "joyful", "elated", "overjoyed", "euphoric", "blissful", "merry", "jubilant", "lively", ], EmotionLabel.SADNESS: [ "sad", "unhappy", "depressed", "lonely", "miss", "cry", "tears", "heartbreak", "sorry", "grief", "loss", "disappointed", "miserable", "gloomy", "melancholy", "devastated", "hopeless", "pain", "hurt", "sorrow", "mourning", "regret", "aching", "broken", "empty", "despair", "forlorn", "downcast", "dejected", "somber", ], EmotionLabel.SURPRISE: [ "wow", "omg", "surprised", "unexpected", "shocking", "unbelievable", "incredible", "suddenly", "whoa", "astonished", "stunned", "remarkable", "extraordinary", "unforeseen", "startled", "amazed", ], EmotionLabel.FEAR: [ "afraid", "scared", "worried", "anxious", "nervous", "terrified", "panic", "dread", "uneasy", "concern", "fearful", "frightened", "stressed", "overwhelmed", "tense", "apprehensive", "alarmed", "phobia", "nightmare", "horror", "creepy", "threatening", ], EmotionLabel.DISGUST: [ "gross", "disgusting", "horrible", "terrible", "awful", "nasty", "repulsive", "yuck", "ugh", "revolting", "sick", "unpleasant", "vile", "offensive", "repugnant", "loathsome", "ghastly", ], EmotionLabel.ANGER: [ "angry", "furious", "annoyed", "frustrated", "rage", "mad", "irritated", "outraged", "livid", "hostile", "enraged", "infuriated", "aggravated", "resentful", "bitter", "hate", "fury", "wrath", "temper", "irate", ], EmotionLabel.NEUTRAL: [ "okay", "fine", "alright", "normal", "regular", "usual", "average", "standard", "nothing", "so-so", "meh", "whatever", "indifferent", ], EmotionLabel.LOVE: [ "love", "adore", "cherish", "darling", "sweetheart", "heart", "romantic", "affection", "caring", "tender", "passion", "beloved", "soulmate", "dear", "treasure", "devotion", "embrace", "kiss", "hug", "warmth", "intimate", "partner", "together", "forever", ], EmotionLabel.CALM: [ "calm", "peaceful", "relaxed", "serene", "tranquil", "zen", "mindful", "quiet", "gentle", "soothing", "meditate", "breathe", "harmony", "still", "content", "composed", "balanced", "centered", "grounded", "patient", "ease", "restful", "untroubled", ], } # ── Multilingual Keywords (Greek, Spanish, French, German, etc.) ───── MULTILINGUAL_KEYWORDS: dict[EmotionLabel, list[str]] = { EmotionLabel.JOY: [ # Greek "χαρά", "χαρούμενος", "χαρούμενη", "ευτυχισμένος", "ευτυχισμένη", "ευτυχία", "χαίρομαι", "υπέροχα", "τέλεια", "φανταστικά", "γέλιο", "γελάω", "χαμογελώ", "χαμόγελο", "ωραία", "εξαιρετικά", # Spanish "feliz", "alegre", "contento", "maravilloso", "genial", "risa", # French "heureux", "heureuse", "joie", "magnifique", "formidable", # German "glücklich", "froh", "wunderbar", "fantastisch", "freude", ], EmotionLabel.SADNESS: [ # Greek "λυπημένος", "λυπημένη", "λύπη", "στεναχωρημένος", "στεναχώρια", "κλαίω", "δάκρυα", "πόνος", "μοναξιά", "μόνος", "μόνη", "θλίψη", "απογοητευμένος", "δυστυχισμένος", "απελπισία", # Spanish "triste", "tristeza", "llorar", "dolor", "soledad", # French "triste", "tristesse", "pleurer", "douleur", "chagrin", ], EmotionLabel.SURPRISE: [ # Greek "έκπληξη", "εκπληκτικό", "εκπληκτικός", "εκπληκτική", "εκπλήσσομαι", "απίστευτο", "αναπάντεχο", "ξαφνικά", "δεν το περίμενα", "σοκ", "εντυπωσιακό", "παράξενο", "εκπληκτη", # Spanish "sorpresa", "sorprendido", "increíble", "inesperado", # French "surprise", "surpris", "incroyable", "inattendu", ], EmotionLabel.FEAR: [ # Greek "φόβος", "φοβάμαι", "τρομαγμένος", "τρομαγμένη", "ανησυχία", "ανήσυχος", "αγχωμένος", "άγχος", "πανικός", "τρόμος", "φοβερό", "ανησυχώ", "στρες", # Spanish "miedo", "asustado", "nervioso", "ansiedad", "pánico", # French "peur", "effrayé", "anxieux", "angoisse", "panique", ], EmotionLabel.ANGER: [ # Greek "θυμός", "θυμωμένος", "θυμωμένη", "εκνευρισμένος", "εκνευρισμένη", "οργή", "εξοργισμένος", "νεύρα", "μίσος", "μισώ", "αγανακτισμένος", "εξαγριωμένος", "τσαντίλα", # Spanish "enojado", "furioso", "rabia", "odio", "ira", # French "colère", "furieux", "enragé", "haine", "irrité", ], EmotionLabel.DISGUST: [ # Greek "αηδία", "αηδιαστικό", "αποκρουστικό", "φρικτό", "απαίσιο", "σιχαμερό", "αρρωστημένο", "χάλια", # Spanish "asco", "asqueroso", "repugnante", "horrible", # French "dégoût", "dégoûtant", "horrible", "répugnant", ], EmotionLabel.LOVE: [ # Greek "αγάπη", "αγαπώ", "αγαπημένος", "αγαπημένη", "ερωτευμένος", "ερωτευμένη", "τρυφερότητα", "αγκαλιά", "φιλί", "καρδιά", "λατρεύω", "στοργή", "αφοσίωση", # Spanish "amor", "te quiero", "cariño", "corazón", "ternura", # French "amour", "aimer", "tendresse", "coeur", "chéri", ], EmotionLabel.CALM: [ # Greek "ηρεμία", "ήρεμος", "ήρεμη", "χαλαρός", "χαλαρή", "γαλήνη", "ήσυχος", "ειρηνικός", "ξεκούραση", "ψυχραιμία", # Spanish "calma", "tranquilo", "relajado", "sereno", "paz", # French "calme", "tranquille", "détendu", "serein", "paix", ], EmotionLabel.NEUTRAL: [ # Greek "εντάξει", "μια χαρά", "κανονικά", "συνήθως", "απλά", "τίποτα", "ουδέτερο", # Spanish "bien", "normal", "regular", # French "bien", "normal", "ordinaire", ], } # Emoji patterns EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = { EmotionLabel.JOY: re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'), EmotionLabel.SADNESS: re.compile(r'[\U0001F622\U0001F62D\U0001F61E\U0001F614\U0001F494\U0001F63F\U0001F97A\U0001F629]'), EmotionLabel.SURPRISE: re.compile(r'[\U0001F632\U0001F62E\U0001F92F\U0001F631\U0001F633]'), EmotionLabel.FEAR: re.compile(r'[\U0001F630\U0001F628\U0001F627\U0001F61F\U0001F62C]'), EmotionLabel.DISGUST: re.compile(r'[\U0001F922\U0001F92E]'), EmotionLabel.ANGER: re.compile(r'[\U0001F621\U0001F624\U0001F620\U0001F92C]'), EmotionLabel.LOVE: re.compile(r'[\U00002764\U0001F495\U0001F970\U0001F60D\U0001F497\U0001F496\U0001F498\U0001F49D\U0001F618]'), EmotionLabel.CALM: re.compile(r'[\U0001F60C\U0001F9D8\U0000262E\U0001F54A\U0001F33F\U0001F343]'), } class TextEmotionDetector: """Text emotion detection with transformer model + keyword fallback.""" def __init__(self, model_name: str | None = None, device: str = "cpu"): self.model_name = model_name or TEXT_MODELS[0] self.device = device self.pipe = None self.model_type = "keyword" # "transformer" or "keyword" self.loaded = False def load(self) -> None: if self.loaded: return if HAS_TRANSFORMERS: try: self.pipe = pipeline( "text-classification", model=self.model_name, device=self.device, top_k=None, ) self.model_type = "transformer" print(f"[TextDetector] Loaded model: {self.model_name}") except Exception as e: print(f"[TextDetector] Model load failed: {e}") print("[TextDetector] Using keyword analysis") else: print("[TextDetector] transformers not available, keyword mode") self.loaded = True def _keyword_analysis(self, text: str) -> dict[EmotionLabel, float]: """Keyword + emoji + punctuation based emotion scoring.""" lower = text.lower() scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS} scores[EmotionLabel.NEUTRAL] = 0.08 # baseline # Keyword matching (English) for label, keywords in KEYWORDS.items(): count = sum(1 for kw in keywords if kw in lower) scores[label] += count * 0.12 # Multilingual keyword matching (Greek, Spanish, French, German, etc.) for label, keywords in MULTILINGUAL_KEYWORDS.items(): count = sum(1 for kw in keywords if kw in lower) scores[label] += count * 0.15 # slightly higher weight for exact multilingual match # Emoji matching for label, pattern in EMOJI_PATTERNS.items(): matches = pattern.findall(text) scores[label] += len(matches) * 0.25 # Punctuation features excl = text.count('!') ques = text.count('?') caps_words = sum(1 for w in text.split() if w.isupper() and len(w) > 1) scores[EmotionLabel.SURPRISE] += excl * 0.04 scores[EmotionLabel.JOY] += excl * 0.025 scores[EmotionLabel.SURPRISE] += ques * 0.03 scores[EmotionLabel.JOY] += caps_words * 0.03 # Negation awareness (simple) negations = ["not", "no", "never", "don't", "doesn't", "didn't", "won't", "can't", "couldn't", "wouldn't", "shouldn't", "isn't", "aren't"] has_negation = any(neg in lower.split() for neg in negations) if has_negation: # Negation can flip positive emotions if scores[EmotionLabel.JOY] > scores[EmotionLabel.SADNESS]: scores[EmotionLabel.SADNESS] += scores[EmotionLabel.JOY] * 0.3 scores[EmotionLabel.JOY] *= 0.5 # Normalize total = sum(scores.values()) if total > 0: scores = {k: v / total for k, v in scores.items()} return scores def _map_transformer_scores(self, predictions: list[dict]) -> dict[EmotionLabel, float]: """Map transformer predictions to EmoSphere labels.""" scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS} for pred in predictions: model_label = pred["label"].lower().strip() score = pred["score"] # Try GoEmotions mapping first, then simple mapping emo_label = GOEMOTIONS_MAP.get(model_label) or SIMPLE_MAP.get(model_label) if emo_label: scores[emo_label] = max(scores[emo_label], score) # Ensure calm gets some weight if scores[EmotionLabel.NEUTRAL] > 0.3: scores[EmotionLabel.CALM] = max(scores[EmotionLabel.CALM], scores[EmotionLabel.NEUTRAL] * 0.2) total = sum(scores.values()) if total > 0: scores = {k: v / total for k, v in scores.items()} return scores def detect( self, text: str, cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL, ) -> EmotionDetectionResult: """Detect emotion from text.""" start = time.time() if not text.strip(): scores = {label: (1.0 if label == EmotionLabel.NEUTRAL else 0.0) for label in EMOTION_LABELS} elif self.pipe is not None: try: raw = self.pipe(text[:512]) # Truncate to model max # Pipeline with top_k=None returns list[list[dict]] or list[dict] predictions = raw[0] if raw and isinstance(raw[0], list) else raw scores = self._map_transformer_scores(predictions) except Exception as e: print(f"[TextDetector] Inference error: {e}, falling back to keywords") scores = self._keyword_analysis(text) else: scores = self._keyword_analysis(text) # Blend with keyword analysis for robustness if self.model_type == "transformer" and text.strip(): kw_scores = self._keyword_analysis(text) # Detect if text is non-Latin (Greek, Arabic, Chinese, etc.) non_latin_chars = sum(1 for c in text if ord(c) > 0x024F and c.isalpha()) total_alpha = sum(1 for c in text if c.isalpha()) or 1 is_non_english = (non_latin_chars / total_alpha) > 0.3 if is_non_english: # For non-English: 30% model, 70% keywords (model is English-only) for label in EMOTION_LABELS: scores[label] = scores[label] * 0.3 + kw_scores[label] * 0.7 else: # For English: 75% model, 25% keywords for label in EMOTION_LABELS: scores[label] = scores[label] * 0.75 + kw_scores[label] * 0.25 total = sum(scores.values()) if total > 0: scores = {k: v / total for k, v in scores.items()} emotion_scores = [ EmotionScore(label=label, score=scores[label], confidence=scores[label]) for label in EMOTION_LABELS ] dominant = max(scores, key=scores.get) # type: ignore return EmotionDetectionResult( dominant=dominant, dominant_score=scores[dominant], scores=emotion_scores, modality="text", confidence=scores[dominant] * (0.85 if self.model_type == "transformer" else 0.65), processing_time_ms=(time.time() - start) * 1000, cultural_region=cultural_region, )