Spaces:

sentimentanalyzer01
/

sentiment_analyzer

Runtime error

App Files Files Community

sentimentanalyzer01 commited on Mar 21

Commit

3e55ee4

verified ·

1 Parent(s): 71c6108

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -21

app.py CHANGED Viewed

@@ -77,26 +77,71 @@ class OntologyEmotionModel:
     def _parse_rusentilex(self, content):
         lines = content.splitlines()
-        for line in lines[1:]:
-            parts = line.strip().split(',')
-            if len(parts) >= 3:
-                word = parts[0].strip().lower()
-                sentiment = parts[2].strip().lower()
-                lemma = self.morph.parse(word)[0].normal_form
-                if sentiment == 'positive':
-                    self.sentiment_lexicon[lemma] = 'радость'
-                elif sentiment == 'negative':
-                    self.sentiment_lexicon[lemma] = 'грусть'
     def _load_rusentilex(self):
-        url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
-        try:
-            r = requests.get(url, timeout=10)
-            if r.status_code == 200:
-                self._parse_rusentilex(r.text)
-                print("RuSentiLex загружен")
-        except Exception as e:
-            print(f"RuSentiLex не загружен: {e}")
     def init_ontology_level1(self):
         self.emotion_definitions = {
@@ -219,6 +264,16 @@ class OntologyEmotionModel:
         lemmas = [p.normal_form for p in parsed]
         pos_tags = [p.tag.POS for p in parsed]
         for category, rule in self.linguistic_rules.items():
             if 'words' in rule:
                 for word in rule['words']:
@@ -303,14 +358,32 @@ class OntologyEmotionModel:
         original_confidence = prediction['confidence']
         adj = rule_analysis['adjustments']
         rules = rule_analysis['rules_applied']
         conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
         conf_mult = np.clip(conf_mult, 0.5, 1.5)
         new_confidence = original_confidence * conf_mult
-        # Ограничиваем максимум 1.0 (100%)
-        new_confidence = min(new_confidence, 1.0)
         new_emotion = original_emotion
         # Инверсия на основе правил
         for rule in rules:
             if rule.startswith("инверсия негатива:"):
@@ -334,6 +407,14 @@ class OntologyEmotionModel:
         # Восклицания
         if any('восклицание' in r for r in rules):
             new_confidence = min(new_confidence * 1.2, 1.0)
         return {
             'emotion': new_emotion,
@@ -409,7 +490,7 @@ class EmotionBERT(nn.Module):
         return logits
 # ============================================================
-# КАСКАДНЫЙ КЛАССИФИКАТОР (исправленный)
 # ============================================================
 class CascadeEmotionClassifier:

     def _parse_rusentilex(self, content):
         lines = content.splitlines()
+        added = 0
+        for line in lines[1:]:  # пропускаем заголовок
+            try:
+                parts = line.strip().split(',')
+                if len(parts) >= 3:
+                    word = parts[0].strip().lower()
+                    sentiment = parts[2].strip().lower()
+                    lemma = self.morph.parse(word)[0].normal_form
+                    if sentiment == 'positive':
+                        self.sentiment_lexicon[lemma] = 'радость'
+                        added += 1
+                    elif sentiment == 'negative':
+                        self.sentiment_lexicon[lemma] = 'грусть'
+                        added += 1
+            except Exception as e:
+                continue
+        print(f"  Добавлено слов из RuSentiLex: {added}")
     def _load_rusentilex(self):
+        """Загружает RuSentiLex из локального файла в папке model"""
+        import os
+        # Пути для поиска файла RuSentiLex
+        possible_paths = [
+            'model/rusentilex.csv',
+            'rusentilex.csv',
+            '/app/model/rusentilex.csv',
+            os.path.join(os.path.dirname(__file__), 'model', 'rusentilex.csv')
+        ]
+        loaded = False
+        print("📂 Поиск RuSentiLex...")
+        # Пробуем загрузить из локального файла
+        for path in possible_paths:
+            if os.path.exists(path):
+                try:
+                    with open(path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    self._parse_rusentilex(content)
+                    print(f"✅ RuSentiLex загружен из файла: {path}")
+                    loaded = True
+                    break
+                except Exception as e:
+                    print(f"⚠️ Ошибка при загрузке {path}: {e}")
+        # Если локально не нашли, пробуем скачать из интернета
+        if not loaded:
+            print("⚠️ Локальный файл RuSentiLex не найден, пробуем скачать...")
+            url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
+            try:
+                r = requests.get(url, timeout=10)
+                if r.status_code == 200:
+                    self._parse_rusentilex(r.text)
+                    print("✅ RuSentiLex загружен из репозитория")
+                    loaded = True
+            except Exception as e:
+                print(f"⚠️ Не удалось загрузить RuSentiLex из репозитория: {e}")
+        if not loaded:
+            print("⚠️ RuSentiLex не загружен. Используется только статистический лексикон.")
+        # Выводим статистику
+        print(f"📊 Всего слов в лексиконе: {len(self.sentiment_lexicon)}")
     def init_ontology_level1(self):
         self.emotion_definitions = {
         lemmas = [p.normal_form for p in parsed]
         pos_tags = [p.tag.POS for p in parsed]
+        # Проверка на слова из лексикона
+        for lemma in lemmas:
+            sentiment = self.sentiment_lexicon.get(lemma, 'neutral')
+            if sentiment == 'радость':
+                rules_applied.append(f"позитивное слово: {lemma}")
+                adjustments['valence'] += 0.2
+            elif sentiment in ('грусть', 'злость', 'страх'):
+                rules_applied.append(f"негативное слово: {lemma}")
+                adjustments['valence'] -= 0.2
         for category, rule in self.linguistic_rules.items():
             if 'words' in rule:
                 for word in rule['words']:
         original_confidence = prediction['confidence']
         adj = rule_analysis['adjustments']
         rules = rule_analysis['rules_applied']
+        # Сохраняем исходную уверенность для проверки коррекции
+        original_confidence_value = original_confidence
+        was_corrected = len(rules) > 0
         conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
         conf_mult = np.clip(conf_mult, 0.5, 1.5)
         new_confidence = original_confidence * conf_mult
         new_emotion = original_emotion
+        # Если есть негативные слова и нет позитивных, корректируем эмоцию
+        has_negative = any('негативное слово' in r for r in rules)
+        has_positive = any('позитивное слово' in r for r in rules)
+        if has_negative and not has_positive:
+            if original_emotion == 'радость':
+                new_emotion = 'грусть'
+                new_confidence *= 0.8
+                rules.append("коррекция: негативные слова без позитивных")
+            elif original_emotion == 'сарказм':
+                new_emotion = 'грусть'
+                new_confidence *= 0.9
+        elif has_positive and not has_negative and original_emotion in ('грусть', 'злость', 'страх'):
+            new_emotion = 'радость'
+            rules.append("коррекция: позитивные слова")
         # Инверсия на основе правил
         for rule in rules:
             if rule.startswith("инверсия негатива:"):
         # Восклицания
         if any('восклицание' in r for r in rules):
             new_confidence = min(new_confidence * 1.2, 1.0)
+        # Если онтология не применила коррекции, а уверенность была менее 90%,
+        # то повышаем уверенность на 10% (но не более 100%)
+        if not was_corrected and original_confidence_value < 0.9:
+            new_confidence = min(new_confidence * 1.10, 1.0)
+        # Ограничиваем максимум 1.0 (100%)
+        new_confidence = min(new_confidence, 1.0)
         return {
             'emotion': new_emotion,
         return logits
 # ============================================================
+# КАСКАДНЫЙ КЛАССИФИКАТОР
 # ============================================================
 class CascadeEmotionClassifier: