EmoSphere / text_detector.py
chariscait's picture
Add Greek/Spanish/French/German multilingual keywords + non-English keyword priority
82d2d36 verified
"""Text Emotion Detector — Real inference using DistilBERT/BERT.
Two modes:
1. HuggingFace text-classification pipeline (accurate, needs model download)
2. Keyword + emoji lexicon analysis (fast, no dependencies, instant)
Both run locally. No data sent anywhere.
"""
from __future__ import annotations
import re
import time
from typing import Optional
import numpy as np
try:
from transformers import pipeline
HAS_TRANSFORMERS = True
except ImportError:
HAS_TRANSFORMERS = False
from models import (
EmotionLabel, EMOTION_LABELS, EmotionScore,
EmotionDetectionResult, CulturalRegion,
)
# ── Text emotion models (HuggingFace, public) ───────────────────────
TEXT_MODELS = [
"j-hartmann/emotion-english-distilroberta-base", # 7-class, excellent
"SamLowe/roberta-base-go_emotions", # 28-class GoEmotions
"bhadresh-savani/distilbert-base-uncased-emotion", # 6-class
]
# GoEmotions → EmoSphere mapping (for multi-label models)
GOEMOTIONS_MAP = {
"admiration": EmotionLabel.LOVE,
"amusement": EmotionLabel.JOY,
"anger": EmotionLabel.ANGER,
"annoyance": EmotionLabel.ANGER,
"approval": EmotionLabel.JOY,
"caring": EmotionLabel.LOVE,
"confusion": EmotionLabel.SURPRISE,
"curiosity": EmotionLabel.SURPRISE,
"desire": EmotionLabel.LOVE,
"disappointment": EmotionLabel.SADNESS,
"disapproval": EmotionLabel.DISGUST,
"disgust": EmotionLabel.DISGUST,
"embarrassment": EmotionLabel.FEAR,
"excitement": EmotionLabel.JOY,
"fear": EmotionLabel.FEAR,
"gratitude": EmotionLabel.LOVE,
"grief": EmotionLabel.SADNESS,
"joy": EmotionLabel.JOY,
"love": EmotionLabel.LOVE,
"nervousness": EmotionLabel.FEAR,
"optimism": EmotionLabel.JOY,
"pride": EmotionLabel.JOY,
"realization": EmotionLabel.SURPRISE,
"relief": EmotionLabel.CALM,
"remorse": EmotionLabel.SADNESS,
"sadness": EmotionLabel.SADNESS,
"surprise": EmotionLabel.SURPRISE,
"neutral": EmotionLabel.NEUTRAL,
}
# Simple 6/7-class model mapping
SIMPLE_MAP = {
"anger": EmotionLabel.ANGER,
"angry": EmotionLabel.ANGER,
"disgust": EmotionLabel.DISGUST,
"fear": EmotionLabel.FEAR,
"happy": EmotionLabel.JOY,
"joy": EmotionLabel.JOY,
"love": EmotionLabel.LOVE,
"sad": EmotionLabel.SADNESS,
"sadness": EmotionLabel.SADNESS,
"surprise": EmotionLabel.SURPRISE,
"neutral": EmotionLabel.NEUTRAL,
}
# ── Keyword Lexicons ─────────────────────────────────────────────────
KEYWORDS: dict[EmotionLabel, list[str]] = {
EmotionLabel.JOY: [
"happy", "glad", "excited", "wonderful", "great", "amazing", "awesome",
"fantastic", "yay", "smile", "laugh", "fun", "enjoy", "pleased",
"delighted", "cheerful", "thrilled", "blessed", "grateful", "ecstatic",
"brilliant", "perfect", "excellent", "magnificent", "joyful", "elated",
"overjoyed", "euphoric", "blissful", "merry", "jubilant", "lively",
],
EmotionLabel.SADNESS: [
"sad", "unhappy", "depressed", "lonely", "miss", "cry", "tears",
"heartbreak", "sorry", "grief", "loss", "disappointed", "miserable",
"gloomy", "melancholy", "devastated", "hopeless", "pain", "hurt",
"sorrow", "mourning", "regret", "aching", "broken", "empty",
"despair", "forlorn", "downcast", "dejected", "somber",
],
EmotionLabel.SURPRISE: [
"wow", "omg", "surprised", "unexpected", "shocking", "unbelievable",
"incredible", "suddenly", "whoa", "astonished", "stunned",
"remarkable", "extraordinary", "unforeseen", "startled", "amazed",
],
EmotionLabel.FEAR: [
"afraid", "scared", "worried", "anxious", "nervous", "terrified",
"panic", "dread", "uneasy", "concern", "fearful", "frightened",
"stressed", "overwhelmed", "tense", "apprehensive", "alarmed",
"phobia", "nightmare", "horror", "creepy", "threatening",
],
EmotionLabel.DISGUST: [
"gross", "disgusting", "horrible", "terrible", "awful", "nasty",
"repulsive", "yuck", "ugh", "revolting", "sick", "unpleasant",
"vile", "offensive", "repugnant", "loathsome", "ghastly",
],
EmotionLabel.ANGER: [
"angry", "furious", "annoyed", "frustrated", "rage", "mad", "irritated",
"outraged", "livid", "hostile", "enraged", "infuriated", "aggravated",
"resentful", "bitter", "hate", "fury", "wrath", "temper", "irate",
],
EmotionLabel.NEUTRAL: [
"okay", "fine", "alright", "normal", "regular", "usual", "average",
"standard", "nothing", "so-so", "meh", "whatever", "indifferent",
],
EmotionLabel.LOVE: [
"love", "adore", "cherish", "darling", "sweetheart", "heart",
"romantic", "affection", "caring", "tender", "passion", "beloved",
"soulmate", "dear", "treasure", "devotion", "embrace", "kiss",
"hug", "warmth", "intimate", "partner", "together", "forever",
],
EmotionLabel.CALM: [
"calm", "peaceful", "relaxed", "serene", "tranquil", "zen",
"mindful", "quiet", "gentle", "soothing", "meditate", "breathe",
"harmony", "still", "content", "composed", "balanced", "centered",
"grounded", "patient", "ease", "restful", "untroubled",
],
}
# ── Multilingual Keywords (Greek, Spanish, French, German, etc.) ─────
MULTILINGUAL_KEYWORDS: dict[EmotionLabel, list[str]] = {
EmotionLabel.JOY: [
# Greek
"χαρά", "χαρούμενος", "χαρούμενη", "ευτυχισμένος", "ευτυχισμένη",
"ευτυχία", "χαίρομαι", "υπέροχα", "τέλεια", "φανταστικά", "γέλιο",
"γελάω", "χαμογελώ", "χαμόγελο", "ωραία", "εξαιρετικά",
# Spanish
"feliz", "alegre", "contento", "maravilloso", "genial", "risa",
# French
"heureux", "heureuse", "joie", "magnifique", "formidable",
# German
"glücklich", "froh", "wunderbar", "fantastisch", "freude",
],
EmotionLabel.SADNESS: [
# Greek
"λυπημένος", "λυπημένη", "λύπη", "στεναχωρημένος", "στεναχώρια",
"κλαίω", "δάκρυα", "πόνος", "μοναξιά", "μόνος", "μόνη",
"θλίψη", "απογοητευμένος", "δυστυχισμένος", "απελπισία",
# Spanish
"triste", "tristeza", "llorar", "dolor", "soledad",
# French
"triste", "tristesse", "pleurer", "douleur", "chagrin",
],
EmotionLabel.SURPRISE: [
# Greek
"έκπληξη", "εκπληκτικό", "εκπληκτικός", "εκπληκτική", "εκπλήσσομαι",
"απίστευτο", "αναπάντεχο", "ξαφνικά", "δεν το περίμενα", "σοκ",
"εντυπωσιακό", "παράξενο", "εκπληκτη",
# Spanish
"sorpresa", "sorprendido", "increíble", "inesperado",
# French
"surprise", "surpris", "incroyable", "inattendu",
],
EmotionLabel.FEAR: [
# Greek
"φόβος", "φοβάμαι", "τρομαγμένος", "τρομαγμένη", "ανησυχία",
"ανήσυχος", "αγχωμένος", "άγχος", "πανικός", "τρόμος",
"φοβερό", "ανησυχώ", "στρες",
# Spanish
"miedo", "asustado", "nervioso", "ansiedad", "pánico",
# French
"peur", "effrayé", "anxieux", "angoisse", "panique",
],
EmotionLabel.ANGER: [
# Greek
"θυμός", "θυμωμένος", "θυμωμένη", "εκνευρισμένος", "εκνευρισμένη",
"οργή", "εξοργισμένος", "νεύρα", "μίσος", "μισώ",
"αγανακτισμένος", "εξαγριωμένος", "τσαντίλα",
# Spanish
"enojado", "furioso", "rabia", "odio", "ira",
# French
"colère", "furieux", "enragé", "haine", "irrité",
],
EmotionLabel.DISGUST: [
# Greek
"αηδία", "αηδιαστικό", "αποκρουστικό", "φρικτό", "απαίσιο",
"σιχαμερό", "αρρωστημένο", "χάλια",
# Spanish
"asco", "asqueroso", "repugnante", "horrible",
# French
"dégoût", "dégoûtant", "horrible", "répugnant",
],
EmotionLabel.LOVE: [
# Greek
"αγάπη", "αγαπώ", "αγαπημένος", "αγαπημένη", "ερωτευμένος",
"ερωτευμένη", "τρυφερότητα", "αγκαλιά", "φιλί", "καρδιά",
"λατρεύω", "στοργή", "αφοσίωση",
# Spanish
"amor", "te quiero", "cariño", "corazón", "ternura",
# French
"amour", "aimer", "tendresse", "coeur", "chéri",
],
EmotionLabel.CALM: [
# Greek
"ηρεμία", "ήρεμος", "ήρεμη", "χαλαρός", "χαλαρή",
"γαλήνη", "ήσυχος", "ειρηνικός", "ξεκούραση", "ψυχραιμία",
# Spanish
"calma", "tranquilo", "relajado", "sereno", "paz",
# French
"calme", "tranquille", "détendu", "serein", "paix",
],
EmotionLabel.NEUTRAL: [
# Greek
"εντάξει", "μια χαρά", "κανονικά", "συνήθως", "απλά",
"τίποτα", "ουδέτερο",
# Spanish
"bien", "normal", "regular",
# French
"bien", "normal", "ordinaire",
],
}
# Emoji patterns
EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = {
EmotionLabel.JOY: re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'),
EmotionLabel.SADNESS: re.compile(r'[\U0001F622\U0001F62D\U0001F61E\U0001F614\U0001F494\U0001F63F\U0001F97A\U0001F629]'),
EmotionLabel.SURPRISE: re.compile(r'[\U0001F632\U0001F62E\U0001F92F\U0001F631\U0001F633]'),
EmotionLabel.FEAR: re.compile(r'[\U0001F630\U0001F628\U0001F627\U0001F61F\U0001F62C]'),
EmotionLabel.DISGUST: re.compile(r'[\U0001F922\U0001F92E]'),
EmotionLabel.ANGER: re.compile(r'[\U0001F621\U0001F624\U0001F620\U0001F92C]'),
EmotionLabel.LOVE: re.compile(r'[\U00002764\U0001F495\U0001F970\U0001F60D\U0001F497\U0001F496\U0001F498\U0001F49D\U0001F618]'),
EmotionLabel.CALM: re.compile(r'[\U0001F60C\U0001F9D8\U0000262E\U0001F54A\U0001F33F\U0001F343]'),
}
class TextEmotionDetector:
"""Text emotion detection with transformer model + keyword fallback."""
def __init__(self, model_name: str | None = None, device: str = "cpu"):
self.model_name = model_name or TEXT_MODELS[0]
self.device = device
self.pipe = None
self.model_type = "keyword" # "transformer" or "keyword"
self.loaded = False
def load(self) -> None:
if self.loaded:
return
if HAS_TRANSFORMERS:
try:
self.pipe = pipeline(
"text-classification",
model=self.model_name,
device=self.device,
top_k=None,
)
self.model_type = "transformer"
print(f"[TextDetector] Loaded model: {self.model_name}")
except Exception as e:
print(f"[TextDetector] Model load failed: {e}")
print("[TextDetector] Using keyword analysis")
else:
print("[TextDetector] transformers not available, keyword mode")
self.loaded = True
def _keyword_analysis(self, text: str) -> dict[EmotionLabel, float]:
"""Keyword + emoji + punctuation based emotion scoring."""
lower = text.lower()
scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
scores[EmotionLabel.NEUTRAL] = 0.08 # baseline
# Keyword matching (English)
for label, keywords in KEYWORDS.items():
count = sum(1 for kw in keywords if kw in lower)
scores[label] += count * 0.12
# Multilingual keyword matching (Greek, Spanish, French, German, etc.)
for label, keywords in MULTILINGUAL_KEYWORDS.items():
count = sum(1 for kw in keywords if kw in lower)
scores[label] += count * 0.15 # slightly higher weight for exact multilingual match
# Emoji matching
for label, pattern in EMOJI_PATTERNS.items():
matches = pattern.findall(text)
scores[label] += len(matches) * 0.25
# Punctuation features
excl = text.count('!')
ques = text.count('?')
caps_words = sum(1 for w in text.split() if w.isupper() and len(w) > 1)
scores[EmotionLabel.SURPRISE] += excl * 0.04
scores[EmotionLabel.JOY] += excl * 0.025
scores[EmotionLabel.SURPRISE] += ques * 0.03
scores[EmotionLabel.JOY] += caps_words * 0.03
# Negation awareness (simple)
negations = ["not", "no", "never", "don't", "doesn't", "didn't", "won't",
"can't", "couldn't", "wouldn't", "shouldn't", "isn't", "aren't"]
has_negation = any(neg in lower.split() for neg in negations)
if has_negation:
# Negation can flip positive emotions
if scores[EmotionLabel.JOY] > scores[EmotionLabel.SADNESS]:
scores[EmotionLabel.SADNESS] += scores[EmotionLabel.JOY] * 0.3
scores[EmotionLabel.JOY] *= 0.5
# Normalize
total = sum(scores.values())
if total > 0:
scores = {k: v / total for k, v in scores.items()}
return scores
def _map_transformer_scores(self, predictions: list[dict]) -> dict[EmotionLabel, float]:
"""Map transformer predictions to EmoSphere labels."""
scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
for pred in predictions:
model_label = pred["label"].lower().strip()
score = pred["score"]
# Try GoEmotions mapping first, then simple mapping
emo_label = GOEMOTIONS_MAP.get(model_label) or SIMPLE_MAP.get(model_label)
if emo_label:
scores[emo_label] = max(scores[emo_label], score)
# Ensure calm gets some weight
if scores[EmotionLabel.NEUTRAL] > 0.3:
scores[EmotionLabel.CALM] = max(scores[EmotionLabel.CALM], scores[EmotionLabel.NEUTRAL] * 0.2)
total = sum(scores.values())
if total > 0:
scores = {k: v / total for k, v in scores.items()}
return scores
def detect(
self,
text: str,
cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL,
) -> EmotionDetectionResult:
"""Detect emotion from text."""
start = time.time()
if not text.strip():
scores = {label: (1.0 if label == EmotionLabel.NEUTRAL else 0.0) for label in EMOTION_LABELS}
elif self.pipe is not None:
try:
raw = self.pipe(text[:512]) # Truncate to model max
# Pipeline with top_k=None returns list[list[dict]] or list[dict]
predictions = raw[0] if raw and isinstance(raw[0], list) else raw
scores = self._map_transformer_scores(predictions)
except Exception as e:
print(f"[TextDetector] Inference error: {e}, falling back to keywords")
scores = self._keyword_analysis(text)
else:
scores = self._keyword_analysis(text)
# Blend with keyword analysis for robustness
if self.model_type == "transformer" and text.strip():
kw_scores = self._keyword_analysis(text)
# Detect if text is non-Latin (Greek, Arabic, Chinese, etc.)
non_latin_chars = sum(1 for c in text if ord(c) > 0x024F and c.isalpha())
total_alpha = sum(1 for c in text if c.isalpha()) or 1
is_non_english = (non_latin_chars / total_alpha) > 0.3
if is_non_english:
# For non-English: 30% model, 70% keywords (model is English-only)
for label in EMOTION_LABELS:
scores[label] = scores[label] * 0.3 + kw_scores[label] * 0.7
else:
# For English: 75% model, 25% keywords
for label in EMOTION_LABELS:
scores[label] = scores[label] * 0.75 + kw_scores[label] * 0.25
total = sum(scores.values())
if total > 0:
scores = {k: v / total for k, v in scores.items()}
emotion_scores = [
EmotionScore(label=label, score=scores[label], confidence=scores[label])
for label in EMOTION_LABELS
]
dominant = max(scores, key=scores.get) # type: ignore
return EmotionDetectionResult(
dominant=dominant,
dominant_score=scores[dominant],
scores=emotion_scores,
modality="text",
confidence=scores[dominant] * (0.85 if self.model_type == "transformer" else 0.65),
processing_time_ms=(time.time() - start) * 1000,
cultural_region=cultural_region,
)