Spaces:

chariscait
/

EmoSphere

Running

App Files Files Community

chariscait commited on Apr 13

Commit

82d2d36

verified ·

1 Parent(s): 2170a8d

Add Greek/Spanish/French/German multilingual keywords + non-English keyword priority

Browse files

Files changed (1) hide show

text_detector.py +112 -4

text_detector.py CHANGED Viewed

@@ -137,6 +137,99 @@ KEYWORDS: dict[EmotionLabel, list[str]] = {
     ],
 }
 # Emoji patterns
 EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = {
     EmotionLabel.JOY:      re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'),
@@ -188,11 +281,16 @@ class TextEmotionDetector:
         scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
         scores[EmotionLabel.NEUTRAL] = 0.08  # baseline
-        # Keyword matching
         for label, keywords in KEYWORDS.items():
             count = sum(1 for kw in keywords if kw in lower)
             scores[label] += count * 0.12
         # Emoji matching
         for label, pattern in EMOJI_PATTERNS.items():
             matches = pattern.findall(text)
@@ -272,9 +370,19 @@ class TextEmotionDetector:
         # Blend with keyword analysis for robustness
         if self.model_type == "transformer" and text.strip():
             kw_scores = self._keyword_analysis(text)
-            # 80% model, 20% keywords for better emoji/punctuation handling
-            for label in EMOTION_LABELS:
-                scores[label] = scores[label] * 0.8 + kw_scores[label] * 0.2
             total = sum(scores.values())
             if total > 0:
                 scores = {k: v / total for k, v in scores.items()}

     ],
 }
+# ── Multilingual Keywords (Greek, Spanish, French, German, etc.) ─────
+MULTILINGUAL_KEYWORDS: dict[EmotionLabel, list[str]] = {
+    EmotionLabel.JOY: [
+        # Greek
+        "χαρά", "χαρούμενος", "χαρούμενη", "ευτυχισμένος", "ευτυχισμένη",
+        "ευτυχία", "χαίρομαι", "υπέροχα", "τέλεια", "φανταστικά", "γέλιο",
+        "γελάω", "χαμογελώ", "χαμόγελο", "ωραία", "εξαιρετικά",
+        # Spanish
+        "feliz", "alegre", "contento", "maravilloso", "genial", "risa",
+        # French
+        "heureux", "heureuse", "joie", "magnifique", "formidable",
+        # German
+        "glücklich", "froh", "wunderbar", "fantastisch", "freude",
+    ],
+    EmotionLabel.SADNESS: [
+        # Greek
+        "λυπημένος", "λυπημένη", "λύπη", "στεναχωρημένος", "στεναχώρια",
+        "κλαίω", "δάκρυα", "πόνος", "μοναξιά", "μόνος", "μόνη",
+        "θλίψη", "απογοητευμένος", "δυστυχισμένος", "απελπισία",
+        # Spanish
+        "triste", "tristeza", "llorar", "dolor", "soledad",
+        # French
+        "triste", "tristesse", "pleurer", "douleur", "chagrin",
+    ],
+    EmotionLabel.SURPRISE: [
+        # Greek
+        "έκπληξη", "εκπληκτικό", "εκπληκτικός", "εκπληκτική", "εκπλήσσομαι",
+        "απίστευτο", "αναπάντεχο", "ξαφνικά", "δεν το περίμενα", "σοκ",
+        "εντυπωσιακό", "παράξενο", "εκπληκτη",
+        # Spanish
+        "sorpresa", "sorprendido", "increíble", "inesperado",
+        # French
+        "surprise", "surpris", "incroyable", "inattendu",
+    ],
+    EmotionLabel.FEAR: [
+        # Greek
+        "φόβος", "φοβάμαι", "τρομαγμένος", "τρομαγμένη", "ανησυχία",
+        "ανήσυχος", "αγχωμένος", "άγχος", "πανικός", "τρόμος",
+        "φοβερό", "ανησυχώ", "στρες",
+        # Spanish
+        "miedo", "asustado", "nervioso", "ansiedad", "pánico",
+        # French
+        "peur", "effrayé", "anxieux", "angoisse", "panique",
+    ],
+    EmotionLabel.ANGER: [
+        # Greek
+        "θυμός", "θυμωμένος", "θυμωμένη", "εκνευρισμένος", "εκνευρισμένη",
+        "οργή", "εξοργισμένος", "νεύρα", "μίσος", "μισώ",
+        "αγανακτισμένος", "εξαγριωμένος", "τσαντίλα",
+        # Spanish
+        "enojado", "furioso", "rabia", "odio", "ira",
+        # French
+        "colère", "furieux", "enragé", "haine", "irrité",
+    ],
+    EmotionLabel.DISGUST: [
+        # Greek
+        "αηδία", "αηδιαστικό", "αποκρουστικό", "φρικτό", "απαίσιο",
+        "σιχαμερό", "αρρωστημένο", "χάλια",
+        # Spanish
+        "asco", "asqueroso", "repugnante", "horrible",
+        # French
+        "dégoût", "dégoûtant", "horrible", "répugnant",
+    ],
+    EmotionLabel.LOVE: [
+        # Greek
+        "αγάπη", "αγαπώ", "αγαπημένος", "αγαπημένη", "ερωτευμένος",
+        "ερωτευμένη", "τρυφερότητα", "αγκαλιά", "φιλί", "καρδιά",
+        "λατρεύω", "στοργή", "αφοσίωση",
+        # Spanish
+        "amor", "te quiero", "cariño", "corazón", "ternura",
+        # French
+        "amour", "aimer", "tendresse", "coeur", "chéri",
+    ],
+    EmotionLabel.CALM: [
+        # Greek
+        "ηρεμία", "ήρεμος", "ήρεμη", "χαλαρός", "χαλαρή",
+        "γαλήνη", "ήσυχος", "ειρηνικός", "ξεκούραση", "ψυχραιμία",
+        # Spanish
+        "calma", "tranquilo", "relajado", "sereno", "paz",
+        # French
+        "calme", "tranquille", "détendu", "serein", "paix",
+    ],
+    EmotionLabel.NEUTRAL: [
+        # Greek
+        "εντάξει", "μια χαρά", "κανονικά", "συνήθως", "απλά",
+        "τίποτα", "ουδέτερο",
+        # Spanish
+        "bien", "normal", "regular",
+        # French
+        "bien", "normal", "ordinaire",
+    ],
+}
 # Emoji patterns
 EMOJI_PATTERNS: dict[EmotionLabel, re.Pattern] = {
     EmotionLabel.JOY:      re.compile(r'[\U0001F600-\U0001F606\U0001F609\U0001F60A\U0001F60B\U0001F60E\U0001F929\U0001F973\U0001F389\U0001F38A]'),
         scores: dict[EmotionLabel, float] = {label: 0.0 for label in EMOTION_LABELS}
         scores[EmotionLabel.NEUTRAL] = 0.08  # baseline
+        # Keyword matching (English)
         for label, keywords in KEYWORDS.items():
             count = sum(1 for kw in keywords if kw in lower)
             scores[label] += count * 0.12
+        # Multilingual keyword matching (Greek, Spanish, French, German, etc.)
+        for label, keywords in MULTILINGUAL_KEYWORDS.items():
+            count = sum(1 for kw in keywords if kw in lower)
+            scores[label] += count * 0.15  # slightly higher weight for exact multilingual match
         # Emoji matching
         for label, pattern in EMOJI_PATTERNS.items():
             matches = pattern.findall(text)
         # Blend with keyword analysis for robustness
         if self.model_type == "transformer" and text.strip():
             kw_scores = self._keyword_analysis(text)
+            # Detect if text is non-Latin (Greek, Arabic, Chinese, etc.)
+            non_latin_chars = sum(1 for c in text if ord(c) > 0x024F and c.isalpha())
+            total_alpha = sum(1 for c in text if c.isalpha()) or 1
+            is_non_english = (non_latin_chars / total_alpha) > 0.3
+            if is_non_english:
+                # For non-English: 30% model, 70% keywords (model is English-only)
+                for label in EMOTION_LABELS:
+                    scores[label] = scores[label] * 0.3 + kw_scores[label] * 0.7
+            else:
+                # For English: 75% model, 25% keywords
+                for label in EMOTION_LABELS:
+                    scores[label] = scores[label] * 0.75 + kw_scores[label] * 0.25
             total = sum(scores.values())
             if total > 0:
                 scores = {k: v / total for k, v in scores.items()}