Spaces:

chariscait
/

EmoSphere

Running

App Files Files Community

EmoSphere / text_detector.py

chariscait

Add Greek/Spanish/French/German multilingual keywords + non-English keyword priority

82d2d36 verified about 1 month ago

raw

history blame contribute delete

17.5 kB

	"""Text Emotion Detector — Real inference using DistilBERT/BERT.

	Two modes:
	1. HuggingFace text-classification pipeline (accurate, needs model download)
	2. Keyword + emoji lexicon analysis (fast, no dependencies, instant)

	Both run locally. No data sent anywhere.
	"""

	from __future__ import annotations

	import re
	import time
	from typing import Optional

	import numpy as np

	try:
	from transformers import pipeline
	HAS_TRANSFORMERS = True
	except ImportError:
	HAS_TRANSFORMERS = False

	from models import (
	EmotionLabel, EMOTION_LABELS, EmotionScore,
	EmotionDetectionResult, CulturalRegion,
	)


	# ── Text emotion models (HuggingFace, public) ───────────────────────
	TEXT_MODELS = [
	"j-hartmann/emotion-english-distilroberta-base", # 7-class, excellent
	"SamLowe/roberta-base-go_emotions", # 28-class GoEmotions
	"bhadresh-savani/distilbert-base-uncased-emotion", # 6-class
	]

	# GoEmotions → EmoSphere mapping (for multi-label models)
	GOEMOTIONS_MAP = {
	"admiration": EmotionLabel.LOVE,
	"amusement": EmotionLabel.JOY,
	"anger": EmotionLabel.ANGER,
	"annoyance": EmotionLabel.ANGER,
	"approval": EmotionLabel.JOY,
	"caring": EmotionLabel.LOVE,
	"confusion": EmotionLabel.SURPRISE,
	"curiosity": EmotionLabel.SURPRISE,
	"desire": EmotionLabel.LOVE,
	"disappointment": EmotionLabel.SADNESS,
	"disapproval": EmotionLabel.DISGUST,
	"disgust": EmotionLabel.DISGUST,
	"embarrassment": EmotionLabel.FEAR,
	"excitement": EmotionLabel.JOY,
	"fear": EmotionLabel.FEAR,
	"gratitude": EmotionLabel.LOVE,
	"grief": EmotionLabel.SADNESS,
	"joy": EmotionLabel.JOY,
	"love": EmotionLabel.LOVE,
	"nervousness": EmotionLabel.FEAR,
	"optimism": EmotionLabel.JOY,
	"pride": EmotionLabel.JOY,
	"realization": EmotionLabel.SURPRISE,
	"relief": EmotionLabel.CALM,
	"remorse": EmotionLabel.SADNESS,
	"sadness": EmotionLabel.SADNESS,
	"surprise": EmotionLabel.SURPRISE,
	"neutral": EmotionLabel.NEUTRAL,
	}

	# Simple 6/7-class model mapping
	SIMPLE_MAP = {
	"anger": EmotionLabel.ANGER,
	"angry": EmotionLabel.ANGER,
	"disgust": EmotionLabel.DISGUST,
	"fear": EmotionLabel.FEAR,
	"happy": EmotionLabel.JOY,
	"joy": EmotionLabel.JOY,
	"love": EmotionLabel.LOVE,
	"sad": EmotionLabel.SADNESS,
	"sadness": EmotionLabel.SADNESS,
	"surprise": EmotionLabel.SURPRISE,
	"neutral": EmotionLabel.NEUTRAL,
	}


	# ── Keyword Lexicons ─────────────────────────────────────────────────
	KEYWORDS: dict[EmotionLabel, list[str]] = {
	EmotionLabel.JOY: [
	"happy", "glad", "excited", "wonderful", "great", "amazing", "awesome",
	"fantastic", "yay", "smile", "laugh", "fun", "enjoy", "pleased",
	"delighted", "cheerful", "thrilled", "blessed", "grateful", "ecstatic",
	"brilliant", "perfect", "excellent", "magnificent", "joyful", "elated",
	"overjoyed", "euphoric", "blissful", "merry", "jubilant", "lively",
	],
	EmotionLabel.SADNESS: [
	"sad", "unhappy", "depressed", "lonely", "miss", "cry", "tears",
	"heartbreak", "sorry", "grief", "loss", "disappointed", "miserable",
	"gloomy", "melancholy", "devastated", "hopeless", "pain", "hurt",
	"sorrow", "mourning", "regret", "aching", "broken", "empty",
	"despair", "forlorn", "downcast", "dejected", "somber",
	],
	EmotionLabel.SURPRISE: [
	"wow", "omg", "surprised", "unexpected", "shocking", "unbelievable",
	"incredible", "suddenly", "whoa", "astonished", "stunned",
	"remarkable", "extraordinary", "unforeseen", "startled", "amazed",
	],
	EmotionLabel.FEAR: [
	"afraid", "scared", "worried", "anxious", "nervous", "terrified",
	"panic", "dread", "uneasy", "concern", "fearful", "frightened",
	"stressed", "overwhelmed", "tense", "apprehensive", "alarmed",
	"phobia", "nightmare", "horror", "creepy", "threatening",
	],
	EmotionLabel.DISGUST: [
	"gross", "disgusting", "horrible", "terrible", "awful", "nasty",
	"repulsive", "yuck", "ugh", "revolting", "sick", "unpleasant",
	"vile", "offensive", "repugnant", "loathsome", "ghastly",
	],
	EmotionLabel.ANGER: [
	"angry", "furious", "annoyed", "frustrated", "rage", "mad", "irritated",
	"outraged", "livid", "hostile", "enraged", "infuriated", "aggravated",
	"resentful", "bitter", "hate", "fury", "wrath", "temper", "irate",
	],
	EmotionLabel.NEUTRAL: [
	"okay", "fine", "alright", "normal", "regular", "usual", "average",
	"standard", "nothing", "so-so", "meh", "whatever", "indifferent",
	],
	EmotionLabel.LOVE: [
	"love", "adore", "cherish", "darling", "sweetheart", "heart",
	"romantic", "affection", "caring", "tender", "passion", "beloved",
	"soulmate", "dear", "treasure", "devotion", "embrace", "kiss",
	"hug", "warmth", "intimate", "partner", "together", "forever",
	],
	EmotionLabel.CALM: [
	"calm", "peaceful", "relaxed", "serene", "tranquil", "zen",
	"mindful", "quiet", "gentle", "soothing", "meditate", "breathe",
	"harmony", "still", "content", "composed", "balanced", "centered",
	"grounded", "patient", "ease", "restful", "untroubled",
	],
	}

	# ── Multilingual Keywords (Greek, Spanish, French, German, etc.) ─────
	MULTILINGUAL_KEYWORDS: dict[EmotionLabel, list[str]] = {
	EmotionLabel.JOY: [
	# Greek
	"χαρά", "χαρούμενος", "χαρούμενη", "ευτυχισμένος", "ευτυχισμένη",
	"ευτυχία", "χαίρομαι", "υπέροχα", "τέλεια", "φανταστικά", "γέλιο",
	"γελάω", "χαμογελώ", "χαμόγελο", "ωραία", "εξαιρετικά",
	# Spanish
	"feliz", "alegre", "contento", "maravilloso", "genial", "risa",
	# French
	"heureux", "heureuse", "joie", "magnifique", "formidable",
	# German
	"glücklich", "froh", "wunderbar", "fantastisch", "freude",
	],
	EmotionLabel.SADNESS: [
	# Greek
	"λυπημένος", "λυπημένη", "λύπη", "στεναχωρημένος", "στεναχώρια",
	"κλαίω", "δάκρυα", "πόνος", "μοναξιά", "μόνος", "μόνη",
	"θλίψη", "απογοητευμένος", "δυστυχισμένος", "απελπισία",
	# Spanish
	"triste", "tristeza", "llorar", "dolor", "soledad",
	# French
	"triste", "tristesse", "pleurer", "douleur", "chagrin",
	],
	EmotionLabel.SURPRISE: [
	# Greek
	"έκπληξη", "εκπληκτικό", "εκπληκτικός", "εκπληκτική", "εκπλήσσομαι",
	"απίστευτο", "αναπάντεχο", "ξαφνικά", "δεν το περίμενα", "σοκ",
	"εντυπωσιακό", "παράξενο", "εκπληκτη",
	# Spanish
	"sorpresa", "sorprendido", "increíble", "inesperado",
	# French
	"surprise", "surpris", "incroyable", "inattendu",
	],
	EmotionLabel.FEAR: [
	# Greek
	"φόβος", "φοβάμαι", "τρομαγμένος", "τρομαγμένη", "ανησυχία",
	"ανήσυχος", "αγχωμένος", "άγχος", "πανικός", "τρόμος",
	"φοβερό", "ανησυχώ", "στρες",
	# Spanish
	"miedo", "asustado", "nervioso", "ansiedad", "pánico",
	# French
	"peur", "effrayé", "anxieux", "angoisse", "panique",
	],
	EmotionLabel.ANGER: [
	# Greek
	"θυμός", "θυμωμένος", "θυμωμένη", "εκνευρισμένος", "εκνευρισμένη",
	"οργή", "εξοργισμένος", "νεύρα", "μίσος", "μισώ",
	"αγανακτισμένος", "εξαγριωμένος", "τσαντίλα",
	# Spanish
	"enojado", "furioso", "rabia", "odio", "ira",
	# French
	"colère", "furieux", "enragé", "haine", "irrité",
	],
	EmotionLabel.DISGUST: [
	# Greek
	"αηδία", "αηδιαστικό", "αποκρουστικό", "φρικτό", "απαίσιο",
	"σιχαμερό", "αρρωστημένο", "χάλια",
	# Spanish
	"asco", "asqueroso", "repugnante", "horrible",
	# French
	"dégoût", "dégoûtant", "horrible", "répugnant",
	],
	EmotionLabel.LOVE: [
	# Greek
	"αγάπη", "αγαπώ", "αγαπημένος", "αγαπημένη", "ερωτευμένος",
	"ερωτευμένη", "τρυφερότητα", "αγκαλιά", "φιλί", "καρδιά",
	"λατρεύω", "στοργή", "αφοσίωση",
	# Spanish
	"amor", "te quiero", "cariño", "corazón", "ternura",
	# French
	"amour", "aimer", "tendresse", "coeur", "chéri",
	],
	EmotionLabel.CALM: [
	# Greek
	"ηρεμία", "ήρεμος", "ήρεμη", "χαλαρός", "χαλαρή",
	"γαλήνη", "ήσυχος", "ειρηνικός", "ξεκούραση", "ψυχραιμία",
	# Spanish
	"calma", "tranquilo", "relajado", "sereno", "paz",
	# French
	"calme", "tranquille", "détendu", "serein", "paix",
	],
	EmotionLabel.NEUTRAL: [
	# Greek
	"εντάξει", "μια χαρά", "κανονικά", "συνήθως", "απλά",
	"τίποτα", "ουδέτερο",
	# Spanish
	"bien", "normal", "regular",
	# French
	"bien", "normal", "ordinaire",
	],
	}

	# Emoji patterns
	EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = {
	EmotionLabel.JOY: re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'),
	EmotionLabel.SADNESS: re.compile(r'[\U0001F622\U0001F62D\U0001F61E\U0001F614\U0001F494\U0001F63F\U0001F97A\U0001F629]'),
	EmotionLabel.SURPRISE: re.compile(r'[\U0001F632\U0001F62E\U0001F92F\U0001F631\U0001F633]'),
	EmotionLabel.FEAR: re.compile(r'[\U0001F630\U0001F628\U0001F627\U0001F61F\U0001F62C]'),
	EmotionLabel.DISGUST: re.compile(r'[\U0001F922\U0001F92E]'),
	EmotionLabel.ANGER: re.compile(r'[\U0001F621\U0001F624\U0001F620\U0001F92C]'),
	EmotionLabel.LOVE: re.compile(r'[\U00002764\U0001F495\U0001F970\U0001F60D\U0001F497\U0001F496\U0001F498\U0001F49D\U0001F618]'),
	EmotionLabel.CALM: re.compile(r'[\U0001F60C\U0001F9D8\U0000262E\U0001F54A\U0001F33F\U0001F343]'),
	}


	class TextEmotionDetector:
	"""Text emotion detection with transformer model + keyword fallback."""

	def __init__(self, model_name: str \| None = None, device: str = "cpu"):
	self.model_name = model_name or TEXT_MODELS[0]
	self.device = device
	self.pipe = None
	self.model_type = "keyword" # "transformer" or "keyword"
	self.loaded = False

	def load(self) -> None:
	if self.loaded:
	return

	if HAS_TRANSFORMERS:
	try:
	self.pipe = pipeline(
	"text-classification",
	model=self.model_name,
	device=self.device,
	top_k=None,
	)
	self.model_type = "transformer"
	print(f"[TextDetector] Loaded model: {self.model_name}")
	except Exception as e:
	print(f"[TextDetector] Model load failed: {e}")
	print("[TextDetector] Using keyword analysis")
	else:
	print("[TextDetector] transformers not available, keyword mode")

	self.loaded = True

	def _keyword_analysis(self, text: str) -> dict[EmotionLabel, float]:
	"""Keyword + emoji + punctuation based emotion scoring."""
	lower = text.lower()
	scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
	scores[EmotionLabel.NEUTRAL] = 0.08 # baseline

	# Keyword matching (English)
	for label, keywords in KEYWORDS.items():
	count = sum(1 for kw in keywords if kw in lower)
	scores[label] += count * 0.12

	# Multilingual keyword matching (Greek, Spanish, French, German, etc.)
	for label, keywords in MULTILINGUAL_KEYWORDS.items():
	count = sum(1 for kw in keywords if kw in lower)
	scores[label] += count * 0.15 # slightly higher weight for exact multilingual match

	# Emoji matching
	for label, pattern in EMOJI_PATTERNS.items():
	matches = pattern.findall(text)
	scores[label] += len(matches) * 0.25

	# Punctuation features
	excl = text.count('!')
	ques = text.count('?')
	caps_words = sum(1 for w in text.split() if w.isupper() and len(w) > 1)

	scores[EmotionLabel.SURPRISE] += excl * 0.04
	scores[EmotionLabel.JOY] += excl * 0.025
	scores[EmotionLabel.SURPRISE] += ques * 0.03
	scores[EmotionLabel.JOY] += caps_words * 0.03

	# Negation awareness (simple)
	negations = ["not", "no", "never", "don't", "doesn't", "didn't", "won't",
	"can't", "couldn't", "wouldn't", "shouldn't", "isn't", "aren't"]
	has_negation = any(neg in lower.split() for neg in negations)
	if has_negation:
	# Negation can flip positive emotions
	if scores[EmotionLabel.JOY] > scores[EmotionLabel.SADNESS]:
	scores[EmotionLabel.SADNESS] += scores[EmotionLabel.JOY] * 0.3
	scores[EmotionLabel.JOY] *= 0.5

	# Normalize
	total = sum(scores.values())
	if total > 0:
	scores = {k: v / total for k, v in scores.items()}

	return scores

	def _map_transformer_scores(self, predictions: list[dict]) -> dict[EmotionLabel, float]:
	"""Map transformer predictions to EmoSphere labels."""
	scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}

	for pred in predictions:
	model_label = pred["label"].lower().strip()
	score = pred["score"]

	# Try GoEmotions mapping first, then simple mapping
	emo_label = GOEMOTIONS_MAP.get(model_label) or SIMPLE_MAP.get(model_label)
	if emo_label:
	scores[emo_label] = max(scores[emo_label], score)

	# Ensure calm gets some weight
	if scores[EmotionLabel.NEUTRAL] > 0.3:
	scores[EmotionLabel.CALM] = max(scores[EmotionLabel.CALM], scores[EmotionLabel.NEUTRAL] * 0.2)

	total = sum(scores.values())
	if total > 0:
	scores = {k: v / total for k, v in scores.items()}
	return scores

	def detect(
	self,
	text: str,
	cultural_region: CulturalRegion = CulturalRegion.UNIVERSAL,
	) -> EmotionDetectionResult:
	"""Detect emotion from text."""
	start = time.time()

	if not text.strip():
	scores = {label: (1.0 if label == EmotionLabel.NEUTRAL else 0.0) for label in EMOTION_LABELS}
	elif self.pipe is not None:
	try:
	raw = self.pipe(text[:512]) # Truncate to model max
	# Pipeline with top_k=None returns list[list[dict]] or list[dict]
	predictions = raw[0] if raw and isinstance(raw[0], list) else raw
	scores = self._map_transformer_scores(predictions)
	except Exception as e:
	print(f"[TextDetector] Inference error: {e}, falling back to keywords")
	scores = self._keyword_analysis(text)
	else:
	scores = self._keyword_analysis(text)

	# Blend with keyword analysis for robustness
	if self.model_type == "transformer" and text.strip():
	kw_scores = self._keyword_analysis(text)
	# Detect if text is non-Latin (Greek, Arabic, Chinese, etc.)
	non_latin_chars = sum(1 for c in text if ord(c) > 0x024F and c.isalpha())
	total_alpha = sum(1 for c in text if c.isalpha()) or 1
	is_non_english = (non_latin_chars / total_alpha) > 0.3

	if is_non_english:
	# For non-English: 30% model, 70% keywords (model is English-only)
	for label in EMOTION_LABELS:
	scores[label] = scores[label] * 0.3 + kw_scores[label] * 0.7
	else:
	# For English: 75% model, 25% keywords
	for label in EMOTION_LABELS:
	scores[label] = scores[label] * 0.75 + kw_scores[label] * 0.25
	total = sum(scores.values())
	if total > 0:
	scores = {k: v / total for k, v in scores.items()}

	emotion_scores = [
	EmotionScore(label=label, score=scores[label], confidence=scores[label])
	for label in EMOTION_LABELS
	]
	dominant = max(scores, key=scores.get) # type: ignore

	return EmotionDetectionResult(
	dominant=dominant,
	dominant_score=scores[dominant],
	scores=emotion_scores,
	modality="text",
	confidence=scores[dominant] * (0.85 if self.model_type == "transformer" else 0.65),
	processing_time_ms=(time.time() - start) * 1000,
	cultural_region=cultural_region,
	)