Spaces:

chariscait
/

EmoSphere

Running

File size: 17,498 Bytes

"""Text Emotion Detector — Real inference using DistilBERT/BERT.

Two modes:
  1. HuggingFace text-classification pipeline (accurate, needs model download)
  2. Keyword + emoji lexicon analysis (fast, no dependencies, instant)

Both run locally. No data sent anywhere.
"""

from __future__ import annotations

import re
import time
from typing import Optional

import numpy as np

try:
    from transformers import pipeline
    HAS_TRANSFORMERS = True
except ImportError:
    HAS_TRANSFORMERS = False

from models import (
    EmotionLabel, EMOTION_LABELS, EmotionScore,
    EmotionDetectionResult, CulturalRegion,
)


# ── Text emotion models (HuggingFace, public) ───────────────────────
TEXT_MODELS = [
    "j-hartmann/emotion-english-distilroberta-base",  # 7-class, excellent
    "SamLowe/roberta-base-go_emotions",               # 28-class GoEmotions
    "bhadresh-savani/distilbert-base-uncased-emotion", # 6-class
]

# GoEmotions → EmoSphere mapping (for multi-label models)
GOEMOTIONS_MAP = {
    "admiration": EmotionLabel.LOVE,
    "amusement": EmotionLabel.JOY,
    "anger": EmotionLabel.ANGER,
    "annoyance": EmotionLabel.ANGER,
    "approval": EmotionLabel.JOY,
    "caring": EmotionLabel.LOVE,
    "confusion": EmotionLabel.SURPRISE,
    "curiosity": EmotionLabel.SURPRISE,
    "desire": EmotionLabel.LOVE,
    "disappointment": EmotionLabel.SADNESS,
    "disapproval": EmotionLabel.DISGUST,
    "disgust": EmotionLabel.DISGUST,
    "embarrassment": EmotionLabel.FEAR,
    "excitement": EmotionLabel.JOY,
    "fear": EmotionLabel.FEAR,
    "gratitude": EmotionLabel.LOVE,
    "grief": EmotionLabel.SADNESS,
    "joy": EmotionLabel.JOY,
    "love": EmotionLabel.LOVE,
    "nervousness": EmotionLabel.FEAR,
    "optimism": EmotionLabel.JOY,
    "pride": EmotionLabel.JOY,
    "realization": EmotionLabel.SURPRISE,
    "relief": EmotionLabel.CALM,
    "remorse": EmotionLabel.SADNESS,
    "sadness": EmotionLabel.SADNESS,
    "surprise": EmotionLabel.SURPRISE,
    "neutral": EmotionLabel.NEUTRAL,
}

# Simple 6/7-class model mapping
SIMPLE_MAP = {
    "anger":    EmotionLabel.ANGER,
    "angry":    EmotionLabel.ANGER,
    "disgust":  EmotionLabel.DISGUST,
    "fear":     EmotionLabel.FEAR,
    "happy":    EmotionLabel.JOY,
    "joy":      EmotionLabel.JOY,
    "love":     EmotionLabel.LOVE,
    "sad":      EmotionLabel.SADNESS,
    "sadness":  EmotionLabel.SADNESS,
    "surprise": EmotionLabel.SURPRISE,
    "neutral":  EmotionLabel.NEUTRAL,
}


# ── Keyword Lexicons ─────────────────────────────────────────────────
KEYWORDS: dict[EmotionLabel, list[str]] = {
    EmotionLabel.JOY: [
        "happy", "glad", "excited", "wonderful", "great", "amazing", "awesome",
        "fantastic", "yay", "smile", "laugh", "fun", "enjoy", "pleased",
        "delighted", "cheerful", "thrilled", "blessed", "grateful", "ecstatic",
        "brilliant", "perfect", "excellent", "magnificent", "joyful", "elated",
        "overjoyed", "euphoric", "blissful", "merry", "jubilant", "lively",
    ],
    EmotionLabel.SADNESS: [
        "sad", "unhappy", "depressed", "lonely", "miss", "cry", "tears",
        "heartbreak", "sorry", "grief", "loss", "disappointed", "miserable",
        "gloomy", "melancholy", "devastated", "hopeless", "pain", "hurt",
        "sorrow", "mourning", "regret", "aching", "broken", "empty",
        "despair", "forlorn", "downcast", "dejected", "somber",
    ],
    EmotionLabel.SURPRISE: [
        "wow", "omg", "surprised", "unexpected", "shocking", "unbelievable",
        "incredible", "suddenly", "whoa", "astonished", "stunned",
        "remarkable", "extraordinary", "unforeseen", "startled", "amazed",
    ],
    EmotionLabel.FEAR: [
        "afraid", "scared", "worried", "anxious", "nervous", "terrified",
        "panic", "dread", "uneasy", "concern", "fearful", "frightened",
        "stressed", "overwhelmed", "tense", "apprehensive", "alarmed",
        "phobia", "nightmare", "horror", "creepy", "threatening",
    ],
    EmotionLabel.DISGUST: [
        "gross", "disgusting", "horrible", "terrible", "awful", "nasty",
        "repulsive", "yuck", "ugh", "revolting", "sick", "unpleasant",
        "vile", "offensive", "repugnant", "loathsome", "ghastly",
    ],
    EmotionLabel.ANGER: [
        "angry", "furious", "annoyed", "frustrated", "rage", "mad", "irritated",
        "outraged", "livid", "hostile", "enraged", "infuriated", "aggravated",
        "resentful", "bitter", "hate", "fury", "wrath", "temper", "irate",
    ],
    EmotionLabel.NEUTRAL: [
        "okay", "fine", "alright", "normal", "regular", "usual", "average",
        "standard", "nothing", "so-so", "meh", "whatever", "indifferent",
    ],
    EmotionLabel.LOVE: [
        "love", "adore", "cherish", "darling", "sweetheart", "heart",
        "romantic", "affection", "caring", "tender", "passion", "beloved",
        "soulmate", "dear", "treasure", "devotion", "embrace", "kiss",
        "hug", "warmth", "intimate", "partner", "together", "forever",
    ],
    EmotionLabel.CALM: [
        "calm", "peaceful", "relaxed", "serene", "tranquil", "zen",
        "mindful", "quiet", "gentle", "soothing", "meditate", "breathe",
        "harmony", "still", "content", "composed", "balanced", "centered",
        "grounded", "patient", "ease", "restful", "untroubled",
    ],
}

# ── Multilingual Keywords (Greek, Spanish, French, German, etc.) ─────
MULTILINGUAL_KEYWORDS: dict[EmotionLabel, list[str]] = {
    EmotionLabel.JOY: [
        # Greek
        "χαρά", "χαρούμενος", "χαρούμενη", "ευτυχισμένος", "ευτυχισμένη",
        "ευτυχία", "χαίρομαι", "υπέροχα", "τέλεια", "φανταστικά", "γέλιο",
        "γελάω", "χαμογελώ", "χαμόγελο", "ωραία", "εξαιρετικά",
        # Spanish
        "feliz", "alegre", "contento", "maravilloso", "genial", "risa",
        # French
        "heureux", "heureuse", "joie", "magnifique", "formidable",
        # German
        "glücklich", "froh", "wunderbar", "fantastisch", "freude",
    ],
    EmotionLabel.SADNESS: [
        # Greek
        "λυπημένος", "λυπημένη", "λύπη", "στεναχωρημένος", "στεναχώρια",
        "κλαίω", "δάκρυα", "πόνος", "μοναξιά", "μόνος", "μόνη",
        "θλίψη", "απογοητευμένος", "δυστυχισμένος", "απελπισία",
        # Spanish
        "triste", "tristeza", "llorar", "dolor", "soledad",
        # French
        "triste", "tristesse", "pleurer", "douleur", "chagrin",
    ],
    EmotionLabel.SURPRISE: [
        # Greek
        "έκπληξη", "εκπληκτικό", "εκπληκτικός", "εκπληκτική", "εκπλήσσομαι",
        "απίστευτο", "αναπάντεχο", "ξαφνικά", "δεν το περίμενα", "σοκ",
        "εντυπωσιακό", "παράξενο", "εκπληκτη",
        # Spanish
        "sorpresa", "sorprendido", "increíble", "inesperado",
        # French
        "surprise", "surpris", "incroyable", "inattendu",
    ],
    EmotionLabel.FEAR: [
        # Greek
        "φόβος", "φοβάμαι", "τρομαγμένος", "τρομαγμένη", "ανησυχία",
        "ανήσυχος", "αγχωμένος", "άγχος", "πανικός", "τρόμος",
        "φοβερό", "ανησυχώ", "στρες",
        # Spanish
        "miedo", "asustado", "nervioso", "ansiedad", "pánico",
        # French
        "peur", "effrayé", "anxieux", "angoisse", "panique",
    ],
    EmotionLabel.ANGER: [
        # Greek
        "θυμός", "θυμωμένος", "θυμωμένη", "εκνευρισμένος", "εκνευρισμένη",
        "οργή", "εξοργισμένος", "νεύρα", "μίσος", "μισώ",
        "αγανακτισμένος", "εξαγριωμένος", "τσαντίλα",
        # Spanish
        "enojado", "furioso", "rabia", "odio", "ira",
        # French
        "colère", "furieux", "enragé", "haine", "irrité",
    ],
    EmotionLabel.DISGUST: [
        # Greek
        "αηδία", "αηδιαστικό", "αποκρουστικό", "φρικτό", "απαίσιο",
        "σιχαμερό", "αρρωστημένο", "χάλια",
        # Spanish
        "asco", "asqueroso", "repugnante", "horrible",
        # French
        "dégoût", "dégoûtant", "horrible", "répugnant",
    ],
    EmotionLabel.LOVE: [
        # Greek
        "αγάπη", "αγαπώ", "αγαπημένος", "αγαπημένη", "ερωτευμένος",
        "ερωτευμένη", "τρυφερότητα", "αγκαλιά", "φιλί", "καρδιά",
        "λατρεύω", "στοργή", "αφοσίωση",
        # Spanish
        "amor", "te quiero", "cariño", "corazón", "ternura",
        # French
        "amour", "aimer", "tendresse", "coeur", "chéri",
    ],
    EmotionLabel.CALM: [
        # Greek
        "ηρεμία", "ήρεμος", "ήρεμη", "χαλαρός", "χαλαρή",
        "γαλήνη", "ήσυχος", "ειρηνικός", "ξεκούραση", "ψυχραιμία",
        # Spanish
        "calma", "tranquilo", "relajado", "sereno", "paz",
        # French
        "calme", "tranquille", "détendu", "serein", "paix",
    ],
    EmotionLabel.NEUTRAL: [
        # Greek
        "εντάξει", "μια χαρά", "κανονικά", "συνήθως", "απλά",
        "τίποτα", "ουδέτερο",
        # Spanish
        "bien", "normal", "regular",
        # French
        "bien", "normal", "ordinaire",
    ],
}

# Emoji patterns
EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = {
    EmotionLabel.JOY:      re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'),
    EmotionLabel.SADNESS:  re.compile(r'[\U0001F622\U0001F62D\U0001F61E\U0001F614\U0001F494\U0001F63F\U0001F97A\U0001F629]'),
    EmotionLabel.SURPRISE: re.compile(r'[\U0001F632\U0001F62E\U0001F92F\U0001F631\U0001F633]'),
    EmotionLabel.FEAR:     re.compile(r'[\U0001F630\U0001F628\U0001F627\U0001F61F\U0001F62C]'),
    EmotionLabel.DISGUST:  re.compile(r'[\U0001F922\U0001F92E]'),
    EmotionLabel.ANGER:    re.compile(r'[\U0001F621\U0001F624\U0001F620\U0001F92C]'),
    EmotionLabel.LOVE:     re.compile(r'[\U00002764\U0001F495\U0001F970\U0001F60D\U0001F497\U0001F496\U0001F498\U0001F49D\U0001F618]'),
    EmotionLabel.CALM:     re.compile(r'[\U0001F60C\U0001F9D8\U0000262E\U0001F54A\U0001F33F\U0001F343]'),
}


class TextEmotionDetector:
    """Text emotion detection with transformer model + keyword fallback."""

    def __init__(self, model_name: str | None = None, device: str = "cpu"):
        self.model_name = model_name or TEXT_MODELS[0]
        self.device = device
        self.pipe = None
        self.model_type = "keyword"  # "transformer" or "keyword"
        self.loaded = False

    def load(self) -> None:
        if self.loaded:
            return

        if HAS_TRANSFORMERS:
            try:
                self.pipe = pipeline(
                    "text-classification",
                    model=self.model_name,
                    device=self.device,
                    top_k=None,
                )
                self.model_type = "transformer"
                print(f"[TextDetector] Loaded model: {self.model_name}")
            except Exception as e:
                print(f"[TextDetector] Model load failed: {e}")
                print("[TextDetector] Using keyword analysis")
        else:
            print("[TextDetector] transformers not available, keyword mode")

        self.loaded = True

    def _keyword_analysis(self, text: str) -> dict[EmotionLabel, float]:
        """Keyword + emoji + punctuation based emotion scoring."""
        lower = text.lower()
        scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
        scores[EmotionLabel.NEUTRAL] = 0.08  # baseline

        # Keyword matching (English)
        for label, keywords in KEYWORDS.items():
            count = sum(1 for kw in keywords if kw in lower)
            scores[label] += count * 0.12

        # Multilingual keyword matching (Greek, Spanish, French, German, etc.)
        for label, keywords in MULTILINGUAL_KEYWORDS.items():
            count = sum(1 for kw in keywords if kw in lower)
            scores[label] += count * 0.15  # slightly higher weight for exact multilingual match

        # Emoji matching
        for label, pattern in EMOJI_PATTERNS.items():
            matches = pattern.findall(text)
            scores[label] += len(matches) * 0.25

        # Punctuation features
        excl = text.count('!')
        ques = text.count('?')
        caps_words = sum(1 for w in text.split() if w.isupper() and len(w) > 1)

        scores[EmotionLabel.SURPRISE] += excl * 0.04
        scores[EmotionLabel.JOY] += excl * 0.025
        scores[EmotionLabel.SURPRISE] += ques * 0.03
        scores[EmotionLabel.JOY] += caps_words * 0.03

        # Negation awareness (simple)
        negations = ["not", "no", "never", "don't", "doesn't", "didn't", "won't",
                     "can't", "couldn't", "wouldn't", "shouldn't", "isn't", "aren't"]
        has_negation = any(neg in lower.split() for neg in negations)
        if has_negation:
            # Negation can flip positive emotions
            if scores[EmotionLabel.JOY] > scores[EmotionLabel.SADNESS]:
                scores[EmotionLabel.SADNESS] += scores[EmotionLabel.JOY] * 0.3
                scores[EmotionLabel.JOY] *= 0.5

        # Normalize
        total = sum(scores.values())
        if total > 0:
            scores = {k: v / total for k, v in scores.items()}

        return scores

    def _map_transformer_scores(self, predictions: list[dict]) -> dict[EmotionLabel, float]:
        """Map transformer predictions to EmoSphere labels."""
        scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}

        for pred in predictions:
            model_label = pred["label"].lower().strip()
            score = pred["score"]

            # Try GoEmotions mapping first, then simple mapping
            emo_label = GOEMOTIONS_MAP.get(model_label) or SIMPLE_MAP.get(model_label)
            if emo_label:
                scores[emo_label] = max(scores[emo_label], score)

        # Ensure calm gets some weight
        if scores[EmotionLabel.NEUTRAL] > 0.3:
            scores[EmotionLabel.CALM] = max(scores[EmotionLabel.CALM], scores[EmotionLabel.NEUTRAL] * 0.2)

        total = sum(scores.values())
        if total > 0:
            scores = {k: v / total for k, v in scores.items()}
        return scores

    def detect(
        self,
        text: str,
        cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL,
    ) -> EmotionDetectionResult:
        """Detect emotion from text."""
        start = time.time()

        if not text.strip():
            scores = {label: (1.0 if label == EmotionLabel.NEUTRAL else 0.0) for label in EMOTION_LABELS}
        elif self.pipe is not None:
            try:
                raw = self.pipe(text[:512])  # Truncate to model max
                # Pipeline with top_k=None returns list[list[dict]] or list[dict]
                predictions = raw[0] if raw and isinstance(raw[0], list) else raw
                scores = self._map_transformer_scores(predictions)
            except Exception as e:
                print(f"[TextDetector] Inference error: {e}, falling back to keywords")
                scores = self._keyword_analysis(text)
        else:
            scores = self._keyword_analysis(text)

        # Blend with keyword analysis for robustness
        if self.model_type == "transformer" and text.strip():
            kw_scores = self._keyword_analysis(text)
            # Detect if text is non-Latin (Greek, Arabic, Chinese, etc.)
            non_latin_chars = sum(1 for c in text if ord(c) > 0x024F and c.isalpha())
            total_alpha = sum(1 for c in text if c.isalpha()) or 1
            is_non_english = (non_latin_chars / total_alpha) > 0.3

            if is_non_english:
                # For non-English: 30% model, 70% keywords (model is English-only)
                for label in EMOTION_LABELS:
                    scores[label] = scores[label] * 0.3 + kw_scores[label] * 0.7
            else:
                # For English: 75% model, 25% keywords
                for label in EMOTION_LABELS:
                    scores[label] = scores[label] * 0.75 + kw_scores[label] * 0.25
            total = sum(scores.values())
            if total > 0:
                scores = {k: v / total for k, v in scores.items()}

        emotion_scores = [
            EmotionScore(label=label, score=scores[label], confidence=scores[label])
            for label in EMOTION_LABELS
        ]
        dominant = max(scores, key=scores.get)  # type: ignore

        return EmotionDetectionResult(
            dominant=dominant,
            dominant_score=scores[dominant],
            scores=emotion_scores,
            modality="text",
            confidence=scores[dominant] * (0.85 if self.model_type == "transformer" else 0.65),
            processing_time_ms=(time.time() - start) * 1000,
            cultural_region=cultural_region,
        )