Spaces:
Running
Running
Add Greek/Spanish/French/German multilingual keywords + non-English keyword priority
82d2d36 verified | """Text Emotion Detector — Real inference using DistilBERT/BERT. | |
| Two modes: | |
| 1. HuggingFace text-classification pipeline (accurate, needs model download) | |
| 2. Keyword + emoji lexicon analysis (fast, no dependencies, instant) | |
| Both run locally. No data sent anywhere. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| import time | |
| from typing import Optional | |
| import numpy as np | |
| try: | |
| from transformers import pipeline | |
| HAS_TRANSFORMERS = True | |
| except ImportError: | |
| HAS_TRANSFORMERS = False | |
| from models import ( | |
| EmotionLabel, EMOTION_LABELS, EmotionScore, | |
| EmotionDetectionResult, CulturalRegion, | |
| ) | |
| # ── Text emotion models (HuggingFace, public) ─────────────────────── | |
| TEXT_MODELS = [ | |
| "j-hartmann/emotion-english-distilroberta-base", # 7-class, excellent | |
| "SamLowe/roberta-base-go_emotions", # 28-class GoEmotions | |
| "bhadresh-savani/distilbert-base-uncased-emotion", # 6-class | |
| ] | |
| # GoEmotions → EmoSphere mapping (for multi-label models) | |
| GOEMOTIONS_MAP = { | |
| "admiration": EmotionLabel.LOVE, | |
| "amusement": EmotionLabel.JOY, | |
| "anger": EmotionLabel.ANGER, | |
| "annoyance": EmotionLabel.ANGER, | |
| "approval": EmotionLabel.JOY, | |
| "caring": EmotionLabel.LOVE, | |
| "confusion": EmotionLabel.SURPRISE, | |
| "curiosity": EmotionLabel.SURPRISE, | |
| "desire": EmotionLabel.LOVE, | |
| "disappointment": EmotionLabel.SADNESS, | |
| "disapproval": EmotionLabel.DISGUST, | |
| "disgust": EmotionLabel.DISGUST, | |
| "embarrassment": EmotionLabel.FEAR, | |
| "excitement": EmotionLabel.JOY, | |
| "fear": EmotionLabel.FEAR, | |
| "gratitude": EmotionLabel.LOVE, | |
| "grief": EmotionLabel.SADNESS, | |
| "joy": EmotionLabel.JOY, | |
| "love": EmotionLabel.LOVE, | |
| "nervousness": EmotionLabel.FEAR, | |
| "optimism": EmotionLabel.JOY, | |
| "pride": EmotionLabel.JOY, | |
| "realization": EmotionLabel.SURPRISE, | |
| "relief": EmotionLabel.CALM, | |
| "remorse": EmotionLabel.SADNESS, | |
| "sadness": EmotionLabel.SADNESS, | |
| "surprise": EmotionLabel.SURPRISE, | |
| "neutral": EmotionLabel.NEUTRAL, | |
| } | |
| # Simple 6/7-class model mapping | |
| SIMPLE_MAP = { | |
| "anger": EmotionLabel.ANGER, | |
| "angry": EmotionLabel.ANGER, | |
| "disgust": EmotionLabel.DISGUST, | |
| "fear": EmotionLabel.FEAR, | |
| "happy": EmotionLabel.JOY, | |
| "joy": EmotionLabel.JOY, | |
| "love": EmotionLabel.LOVE, | |
| "sad": EmotionLabel.SADNESS, | |
| "sadness": EmotionLabel.SADNESS, | |
| "surprise": EmotionLabel.SURPRISE, | |
| "neutral": EmotionLabel.NEUTRAL, | |
| } | |
| # ── Keyword Lexicons ───────────────────────────────────────────────── | |
| KEYWORDS: dict[EmotionLabel, list[str]] = { | |
| EmotionLabel.JOY: [ | |
| "happy", "glad", "excited", "wonderful", "great", "amazing", "awesome", | |
| "fantastic", "yay", "smile", "laugh", "fun", "enjoy", "pleased", | |
| "delighted", "cheerful", "thrilled", "blessed", "grateful", "ecstatic", | |
| "brilliant", "perfect", "excellent", "magnificent", "joyful", "elated", | |
| "overjoyed", "euphoric", "blissful", "merry", "jubilant", "lively", | |
| ], | |
| EmotionLabel.SADNESS: [ | |
| "sad", "unhappy", "depressed", "lonely", "miss", "cry", "tears", | |
| "heartbreak", "sorry", "grief", "loss", "disappointed", "miserable", | |
| "gloomy", "melancholy", "devastated", "hopeless", "pain", "hurt", | |
| "sorrow", "mourning", "regret", "aching", "broken", "empty", | |
| "despair", "forlorn", "downcast", "dejected", "somber", | |
| ], | |
| EmotionLabel.SURPRISE: [ | |
| "wow", "omg", "surprised", "unexpected", "shocking", "unbelievable", | |
| "incredible", "suddenly", "whoa", "astonished", "stunned", | |
| "remarkable", "extraordinary", "unforeseen", "startled", "amazed", | |
| ], | |
| EmotionLabel.FEAR: [ | |
| "afraid", "scared", "worried", "anxious", "nervous", "terrified", | |
| "panic", "dread", "uneasy", "concern", "fearful", "frightened", | |
| "stressed", "overwhelmed", "tense", "apprehensive", "alarmed", | |
| "phobia", "nightmare", "horror", "creepy", "threatening", | |
| ], | |
| EmotionLabel.DISGUST: [ | |
| "gross", "disgusting", "horrible", "terrible", "awful", "nasty", | |
| "repulsive", "yuck", "ugh", "revolting", "sick", "unpleasant", | |
| "vile", "offensive", "repugnant", "loathsome", "ghastly", | |
| ], | |
| EmotionLabel.ANGER: [ | |
| "angry", "furious", "annoyed", "frustrated", "rage", "mad", "irritated", | |
| "outraged", "livid", "hostile", "enraged", "infuriated", "aggravated", | |
| "resentful", "bitter", "hate", "fury", "wrath", "temper", "irate", | |
| ], | |
| EmotionLabel.NEUTRAL: [ | |
| "okay", "fine", "alright", "normal", "regular", "usual", "average", | |
| "standard", "nothing", "so-so", "meh", "whatever", "indifferent", | |
| ], | |
| EmotionLabel.LOVE: [ | |
| "love", "adore", "cherish", "darling", "sweetheart", "heart", | |
| "romantic", "affection", "caring", "tender", "passion", "beloved", | |
| "soulmate", "dear", "treasure", "devotion", "embrace", "kiss", | |
| "hug", "warmth", "intimate", "partner", "together", "forever", | |
| ], | |
| EmotionLabel.CALM: [ | |
| "calm", "peaceful", "relaxed", "serene", "tranquil", "zen", | |
| "mindful", "quiet", "gentle", "soothing", "meditate", "breathe", | |
| "harmony", "still", "content", "composed", "balanced", "centered", | |
| "grounded", "patient", "ease", "restful", "untroubled", | |
| ], | |
| } | |
| # ── Multilingual Keywords (Greek, Spanish, French, German, etc.) ───── | |
| MULTILINGUAL_KEYWORDS: dict[EmotionLabel, list[str]] = { | |
| EmotionLabel.JOY: [ | |
| # Greek | |
| "χαρά", "χαρούμενος", "χαρούμενη", "ευτυχισμένος", "ευτυχισμένη", | |
| "ευτυχία", "χαίρομαι", "υπέροχα", "τέλεια", "φανταστικά", "γέλιο", | |
| "γελάω", "χαμογελώ", "χαμόγελο", "ωραία", "εξαιρετικά", | |
| # Spanish | |
| "feliz", "alegre", "contento", "maravilloso", "genial", "risa", | |
| # French | |
| "heureux", "heureuse", "joie", "magnifique", "formidable", | |
| # German | |
| "glücklich", "froh", "wunderbar", "fantastisch", "freude", | |
| ], | |
| EmotionLabel.SADNESS: [ | |
| # Greek | |
| "λυπημένος", "λυπημένη", "λύπη", "στεναχωρημένος", "στεναχώρια", | |
| "κλαίω", "δάκρυα", "πόνος", "μοναξιά", "μόνος", "μόνη", | |
| "θλίψη", "απογοητευμένος", "δυστυχισμένος", "απελπισία", | |
| # Spanish | |
| "triste", "tristeza", "llorar", "dolor", "soledad", | |
| # French | |
| "triste", "tristesse", "pleurer", "douleur", "chagrin", | |
| ], | |
| EmotionLabel.SURPRISE: [ | |
| # Greek | |
| "έκπληξη", "εκπληκτικό", "εκπληκτικός", "εκπληκτική", "εκπλήσσομαι", | |
| "απίστευτο", "αναπάντεχο", "ξαφνικά", "δεν το περίμενα", "σοκ", | |
| "εντυπωσιακό", "παράξενο", "εκπληκτη", | |
| # Spanish | |
| "sorpresa", "sorprendido", "increíble", "inesperado", | |
| # French | |
| "surprise", "surpris", "incroyable", "inattendu", | |
| ], | |
| EmotionLabel.FEAR: [ | |
| # Greek | |
| "φόβος", "φοβάμαι", "τρομαγμένος", "τρομαγμένη", "ανησυχία", | |
| "ανήσυχος", "αγχωμένος", "άγχος", "πανικός", "τρόμος", | |
| "φοβερό", "ανησυχώ", "στρες", | |
| # Spanish | |
| "miedo", "asustado", "nervioso", "ansiedad", "pánico", | |
| # French | |
| "peur", "effrayé", "anxieux", "angoisse", "panique", | |
| ], | |
| EmotionLabel.ANGER: [ | |
| # Greek | |
| "θυμός", "θυμωμένος", "θυμωμένη", "εκνευρισμένος", "εκνευρισμένη", | |
| "οργή", "εξοργισμένος", "νεύρα", "μίσος", "μισώ", | |
| "αγανακτισμένος", "εξαγριωμένος", "τσαντίλα", | |
| # Spanish | |
| "enojado", "furioso", "rabia", "odio", "ira", | |
| # French | |
| "colère", "furieux", "enragé", "haine", "irrité", | |
| ], | |
| EmotionLabel.DISGUST: [ | |
| # Greek | |
| "αηδία", "αηδιαστικό", "αποκρουστικό", "φρικτό", "απαίσιο", | |
| "σιχαμερό", "αρρωστημένο", "χάλια", | |
| # Spanish | |
| "asco", "asqueroso", "repugnante", "horrible", | |
| # French | |
| "dégoût", "dégoûtant", "horrible", "répugnant", | |
| ], | |
| EmotionLabel.LOVE: [ | |
| # Greek | |
| "αγάπη", "αγαπώ", "αγαπημένος", "αγαπημένη", "ερωτευμένος", | |
| "ερωτευμένη", "τρυφερότητα", "αγκαλιά", "φιλί", "καρδιά", | |
| "λατρεύω", "στοργή", "αφοσίωση", | |
| # Spanish | |
| "amor", "te quiero", "cariño", "corazón", "ternura", | |
| # French | |
| "amour", "aimer", "tendresse", "coeur", "chéri", | |
| ], | |
| EmotionLabel.CALM: [ | |
| # Greek | |
| "ηρεμία", "ήρεμος", "ήρεμη", "χαλαρός", "χαλαρή", | |
| "γαλήνη", "ήσυχος", "ειρηνικός", "ξεκούραση", "ψυχραιμία", | |
| # Spanish | |
| "calma", "tranquilo", "relajado", "sereno", "paz", | |
| # French | |
| "calme", "tranquille", "détendu", "serein", "paix", | |
| ], | |
| EmotionLabel.NEUTRAL: [ | |
| # Greek | |
| "εντάξει", "μια χαρά", "κανονικά", "συνήθως", "απλά", | |
| "τίποτα", "ουδέτερο", | |
| # Spanish | |
| "bien", "normal", "regular", | |
| # French | |
| "bien", "normal", "ordinaire", | |
| ], | |
| } | |
| # Emoji patterns | |
| EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = { | |
| EmotionLabel.JOY: re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'), | |
| EmotionLabel.SADNESS: re.compile(r'[\U0001F622\U0001F62D\U0001F61E\U0001F614\U0001F494\U0001F63F\U0001F97A\U0001F629]'), | |
| EmotionLabel.SURPRISE: re.compile(r'[\U0001F632\U0001F62E\U0001F92F\U0001F631\U0001F633]'), | |
| EmotionLabel.FEAR: re.compile(r'[\U0001F630\U0001F628\U0001F627\U0001F61F\U0001F62C]'), | |
| EmotionLabel.DISGUST: re.compile(r'[\U0001F922\U0001F92E]'), | |
| EmotionLabel.ANGER: re.compile(r'[\U0001F621\U0001F624\U0001F620\U0001F92C]'), | |
| EmotionLabel.LOVE: re.compile(r'[\U00002764\U0001F495\U0001F970\U0001F60D\U0001F497\U0001F496\U0001F498\U0001F49D\U0001F618]'), | |
| EmotionLabel.CALM: re.compile(r'[\U0001F60C\U0001F9D8\U0000262E\U0001F54A\U0001F33F\U0001F343]'), | |
| } | |
| class TextEmotionDetector: | |
| """Text emotion detection with transformer model + keyword fallback.""" | |
| def __init__(self, model_name: str | None = None, device: str = "cpu"): | |
| self.model_name = model_name or TEXT_MODELS[0] | |
| self.device = device | |
| self.pipe = None | |
| self.model_type = "keyword" # "transformer" or "keyword" | |
| self.loaded = False | |
| def load(self) -> None: | |
| if self.loaded: | |
| return | |
| if HAS_TRANSFORMERS: | |
| try: | |
| self.pipe = pipeline( | |
| "text-classification", | |
| model=self.model_name, | |
| device=self.device, | |
| top_k=None, | |
| ) | |
| self.model_type = "transformer" | |
| print(f"[TextDetector] Loaded model: {self.model_name}") | |
| except Exception as e: | |
| print(f"[TextDetector] Model load failed: {e}") | |
| print("[TextDetector] Using keyword analysis") | |
| else: | |
| print("[TextDetector] transformers not available, keyword mode") | |
| self.loaded = True | |
| def _keyword_analysis(self, text: str) -> dict[EmotionLabel, float]: | |
| """Keyword + emoji + punctuation based emotion scoring.""" | |
| lower = text.lower() | |
| scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS} | |
| scores[EmotionLabel.NEUTRAL] = 0.08 # baseline | |
| # Keyword matching (English) | |
| for label, keywords in KEYWORDS.items(): | |
| count = sum(1 for kw in keywords if kw in lower) | |
| scores[label] += count * 0.12 | |
| # Multilingual keyword matching (Greek, Spanish, French, German, etc.) | |
| for label, keywords in MULTILINGUAL_KEYWORDS.items(): | |
| count = sum(1 for kw in keywords if kw in lower) | |
| scores[label] += count * 0.15 # slightly higher weight for exact multilingual match | |
| # Emoji matching | |
| for label, pattern in EMOJI_PATTERNS.items(): | |
| matches = pattern.findall(text) | |
| scores[label] += len(matches) * 0.25 | |
| # Punctuation features | |
| excl = text.count('!') | |
| ques = text.count('?') | |
| caps_words = sum(1 for w in text.split() if w.isupper() and len(w) > 1) | |
| scores[EmotionLabel.SURPRISE] += excl * 0.04 | |
| scores[EmotionLabel.JOY] += excl * 0.025 | |
| scores[EmotionLabel.SURPRISE] += ques * 0.03 | |
| scores[EmotionLabel.JOY] += caps_words * 0.03 | |
| # Negation awareness (simple) | |
| negations = ["not", "no", "never", "don't", "doesn't", "didn't", "won't", | |
| "can't", "couldn't", "wouldn't", "shouldn't", "isn't", "aren't"] | |
| has_negation = any(neg in lower.split() for neg in negations) | |
| if has_negation: | |
| # Negation can flip positive emotions | |
| if scores[EmotionLabel.JOY] > scores[EmotionLabel.SADNESS]: | |
| scores[EmotionLabel.SADNESS] += scores[EmotionLabel.JOY] * 0.3 | |
| scores[EmotionLabel.JOY] *= 0.5 | |
| # Normalize | |
| total = sum(scores.values()) | |
| if total > 0: | |
| scores = {k: v / total for k, v in scores.items()} | |
| return scores | |
| def _map_transformer_scores(self, predictions: list[dict]) -> dict[EmotionLabel, float]: | |
| """Map transformer predictions to EmoSphere labels.""" | |
| scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS} | |
| for pred in predictions: | |
| model_label = pred["label"].lower().strip() | |
| score = pred["score"] | |
| # Try GoEmotions mapping first, then simple mapping | |
| emo_label = GOEMOTIONS_MAP.get(model_label) or SIMPLE_MAP.get(model_label) | |
| if emo_label: | |
| scores[emo_label] = max(scores[emo_label], score) | |
| # Ensure calm gets some weight | |
| if scores[EmotionLabel.NEUTRAL] > 0.3: | |
| scores[EmotionLabel.CALM] = max(scores[EmotionLabel.CALM], scores[EmotionLabel.NEUTRAL] * 0.2) | |
| total = sum(scores.values()) | |
| if total > 0: | |
| scores = {k: v / total for k, v in scores.items()} | |
| return scores | |
| def detect( | |
| self, | |
| text: str, | |
| cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL, | |
| ) -> EmotionDetectionResult: | |
| """Detect emotion from text.""" | |
| start = time.time() | |
| if not text.strip(): | |
| scores = {label: (1.0 if label == EmotionLabel.NEUTRAL else 0.0) for label in EMOTION_LABELS} | |
| elif self.pipe is not None: | |
| try: | |
| raw = self.pipe(text[:512]) # Truncate to model max | |
| # Pipeline with top_k=None returns list[list[dict]] or list[dict] | |
| predictions = raw[0] if raw and isinstance(raw[0], list) else raw | |
| scores = self._map_transformer_scores(predictions) | |
| except Exception as e: | |
| print(f"[TextDetector] Inference error: {e}, falling back to keywords") | |
| scores = self._keyword_analysis(text) | |
| else: | |
| scores = self._keyword_analysis(text) | |
| # Blend with keyword analysis for robustness | |
| if self.model_type == "transformer" and text.strip(): | |
| kw_scores = self._keyword_analysis(text) | |
| # Detect if text is non-Latin (Greek, Arabic, Chinese, etc.) | |
| non_latin_chars = sum(1 for c in text if ord(c) > 0x024F and c.isalpha()) | |
| total_alpha = sum(1 for c in text if c.isalpha()) or 1 | |
| is_non_english = (non_latin_chars / total_alpha) > 0.3 | |
| if is_non_english: | |
| # For non-English: 30% model, 70% keywords (model is English-only) | |
| for label in EMOTION_LABELS: | |
| scores[label] = scores[label] * 0.3 + kw_scores[label] * 0.7 | |
| else: | |
| # For English: 75% model, 25% keywords | |
| for label in EMOTION_LABELS: | |
| scores[label] = scores[label] * 0.75 + kw_scores[label] * 0.25 | |
| total = sum(scores.values()) | |
| if total > 0: | |
| scores = {k: v / total for k, v in scores.items()} | |
| emotion_scores = [ | |
| EmotionScore(label=label, score=scores[label], confidence=scores[label]) | |
| for label in EMOTION_LABELS | |
| ] | |
| dominant = max(scores, key=scores.get) # type: ignore | |
| return EmotionDetectionResult( | |
| dominant=dominant, | |
| dominant_score=scores[dominant], | |
| scores=emotion_scores, | |
| modality="text", | |
| confidence=scores[dominant] * (0.85 if self.model_type == "transformer" else 0.65), | |
| processing_time_ms=(time.time() - start) * 1000, | |
| cultural_region=cultural_region, | |
| ) | |