Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -77,26 +77,71 @@ class OntologyEmotionModel:
|
|
| 77 |
|
| 78 |
def _parse_rusentilex(self, content):
|
| 79 |
lines = content.splitlines()
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
def _load_rusentilex(self):
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def init_ontology_level1(self):
|
| 102 |
self.emotion_definitions = {
|
|
@@ -219,6 +264,16 @@ class OntologyEmotionModel:
|
|
| 219 |
lemmas = [p.normal_form for p in parsed]
|
| 220 |
pos_tags = [p.tag.POS for p in parsed]
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
for category, rule in self.linguistic_rules.items():
|
| 223 |
if 'words' in rule:
|
| 224 |
for word in rule['words']:
|
|
@@ -303,14 +358,32 @@ class OntologyEmotionModel:
|
|
| 303 |
original_confidence = prediction['confidence']
|
| 304 |
adj = rule_analysis['adjustments']
|
| 305 |
rules = rule_analysis['rules_applied']
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
|
| 308 |
conf_mult = np.clip(conf_mult, 0.5, 1.5)
|
| 309 |
new_confidence = original_confidence * conf_mult
|
| 310 |
-
# Ограничиваем максимум 1.0 (100%)
|
| 311 |
-
new_confidence = min(new_confidence, 1.0)
|
| 312 |
new_emotion = original_emotion
|
| 313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
# Инверсия на основе правил
|
| 315 |
for rule in rules:
|
| 316 |
if rule.startswith("инверсия негатива:"):
|
|
@@ -334,6 +407,14 @@ class OntologyEmotionModel:
|
|
| 334 |
# Восклицания
|
| 335 |
if any('восклицание' in r for r in rules):
|
| 336 |
new_confidence = min(new_confidence * 1.2, 1.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
return {
|
| 339 |
'emotion': new_emotion,
|
|
@@ -409,7 +490,7 @@ class EmotionBERT(nn.Module):
|
|
| 409 |
return logits
|
| 410 |
|
| 411 |
# ============================================================
|
| 412 |
-
# КАСКАДНЫЙ КЛАССИФИКАТОР
|
| 413 |
# ============================================================
|
| 414 |
|
| 415 |
class CascadeEmotionClassifier:
|
|
|
|
| 77 |
|
| 78 |
def _parse_rusentilex(self, content):
|
| 79 |
lines = content.splitlines()
|
| 80 |
+
added = 0
|
| 81 |
+
for line in lines[1:]: # пропускаем заголовок
|
| 82 |
+
try:
|
| 83 |
+
parts = line.strip().split(',')
|
| 84 |
+
if len(parts) >= 3:
|
| 85 |
+
word = parts[0].strip().lower()
|
| 86 |
+
sentiment = parts[2].strip().lower()
|
| 87 |
+
lemma = self.morph.parse(word)[0].normal_form
|
| 88 |
+
if sentiment == 'positive':
|
| 89 |
+
self.sentiment_lexicon[lemma] = 'радость'
|
| 90 |
+
added += 1
|
| 91 |
+
elif sentiment == 'negative':
|
| 92 |
+
self.sentiment_lexicon[lemma] = 'грусть'
|
| 93 |
+
added += 1
|
| 94 |
+
except Exception as e:
|
| 95 |
+
continue
|
| 96 |
+
print(f" Добавлено слов из RuSentiLex: {added}")
|
| 97 |
|
| 98 |
def _load_rusentilex(self):
|
| 99 |
+
"""Загружает RuSentiLex из локального файла в папке model"""
|
| 100 |
+
import os
|
| 101 |
+
|
| 102 |
+
# Пути для поиска файла RuSentiLex
|
| 103 |
+
possible_paths = [
|
| 104 |
+
'model/rusentilex.csv',
|
| 105 |
+
'rusentilex.csv',
|
| 106 |
+
'/app/model/rusentilex.csv',
|
| 107 |
+
os.path.join(os.path.dirname(__file__), 'model', 'rusentilex.csv')
|
| 108 |
+
]
|
| 109 |
+
|
| 110 |
+
loaded = False
|
| 111 |
+
|
| 112 |
+
print("📂 Поиск RuSentiLex...")
|
| 113 |
+
|
| 114 |
+
# Пробуем загрузить из локального файла
|
| 115 |
+
for path in possible_paths:
|
| 116 |
+
if os.path.exists(path):
|
| 117 |
+
try:
|
| 118 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 119 |
+
content = f.read()
|
| 120 |
+
self._parse_rusentilex(content)
|
| 121 |
+
print(f"✅ RuSentiLex загружен из файла: {path}")
|
| 122 |
+
loaded = True
|
| 123 |
+
break
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"⚠️ Ошибка при загрузке {path}: {e}")
|
| 126 |
+
|
| 127 |
+
# Если локально не нашли, пробуем скачать из интернета
|
| 128 |
+
if not loaded:
|
| 129 |
+
print("⚠️ Локальный файл RuSentiLex не найден, пробуем скачать...")
|
| 130 |
+
url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
|
| 131 |
+
try:
|
| 132 |
+
r = requests.get(url, timeout=10)
|
| 133 |
+
if r.status_code == 200:
|
| 134 |
+
self._parse_rusentilex(r.text)
|
| 135 |
+
print("✅ RuSentiLex загружен из репозитория")
|
| 136 |
+
loaded = True
|
| 137 |
+
except Exception as e:
|
| 138 |
+
print(f"⚠️ Не удалось загрузить RuSentiLex из репозитория: {e}")
|
| 139 |
+
|
| 140 |
+
if not loaded:
|
| 141 |
+
print("⚠️ RuSentiLex не загружен. Используется только статистический лексикон.")
|
| 142 |
+
|
| 143 |
+
# Выводим статистику
|
| 144 |
+
print(f"📊 Всего слов в лексиконе: {len(self.sentiment_lexicon)}")
|
| 145 |
|
| 146 |
def init_ontology_level1(self):
|
| 147 |
self.emotion_definitions = {
|
|
|
|
| 264 |
lemmas = [p.normal_form for p in parsed]
|
| 265 |
pos_tags = [p.tag.POS for p in parsed]
|
| 266 |
|
| 267 |
+
# Проверка на слова из лексикона
|
| 268 |
+
for lemma in lemmas:
|
| 269 |
+
sentiment = self.sentiment_lexicon.get(lemma, 'neutral')
|
| 270 |
+
if sentiment == 'радость':
|
| 271 |
+
rules_applied.append(f"позитивное слово: {lemma}")
|
| 272 |
+
adjustments['valence'] += 0.2
|
| 273 |
+
elif sentiment in ('грусть', 'злость', 'страх'):
|
| 274 |
+
rules_applied.append(f"негативное слово: {lemma}")
|
| 275 |
+
adjustments['valence'] -= 0.2
|
| 276 |
+
|
| 277 |
for category, rule in self.linguistic_rules.items():
|
| 278 |
if 'words' in rule:
|
| 279 |
for word in rule['words']:
|
|
|
|
| 358 |
original_confidence = prediction['confidence']
|
| 359 |
adj = rule_analysis['adjustments']
|
| 360 |
rules = rule_analysis['rules_applied']
|
| 361 |
+
|
| 362 |
+
# Сохраняем исходную уверенность для проверки коррекции
|
| 363 |
+
original_confidence_value = original_confidence
|
| 364 |
+
was_corrected = len(rules) > 0
|
| 365 |
|
| 366 |
conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
|
| 367 |
conf_mult = np.clip(conf_mult, 0.5, 1.5)
|
| 368 |
new_confidence = original_confidence * conf_mult
|
|
|
|
|
|
|
| 369 |
new_emotion = original_emotion
|
| 370 |
|
| 371 |
+
# Если есть негативные слова и нет позитивных, корректируем эмоцию
|
| 372 |
+
has_negative = any('негативное слово' in r for r in rules)
|
| 373 |
+
has_positive = any('позитивное слово' in r for r in rules)
|
| 374 |
+
|
| 375 |
+
if has_negative and not has_positive:
|
| 376 |
+
if original_emotion == 'радость':
|
| 377 |
+
new_emotion = 'грусть'
|
| 378 |
+
new_confidence *= 0.8
|
| 379 |
+
rules.append("коррекция: негативные слова без позитивных")
|
| 380 |
+
elif original_emotion == 'сарказм':
|
| 381 |
+
new_emotion = 'грусть'
|
| 382 |
+
new_confidence *= 0.9
|
| 383 |
+
elif has_positive and not has_negative and original_emotion in ('грусть', 'злость', 'страх'):
|
| 384 |
+
new_emotion = 'радость'
|
| 385 |
+
rules.append("коррекция: позитивные слова")
|
| 386 |
+
|
| 387 |
# Инверсия на основе правил
|
| 388 |
for rule in rules:
|
| 389 |
if rule.startswith("инверсия негатива:"):
|
|
|
|
| 407 |
# Восклицания
|
| 408 |
if any('восклицание' in r for r in rules):
|
| 409 |
new_confidence = min(new_confidence * 1.2, 1.0)
|
| 410 |
+
|
| 411 |
+
# Если онтология не применила коррекции, а уверенность была менее 90%,
|
| 412 |
+
# то повышаем уверенность на 10% (но не более 100%)
|
| 413 |
+
if not was_corrected and original_confidence_value < 0.9:
|
| 414 |
+
new_confidence = min(new_confidence * 1.10, 1.0)
|
| 415 |
+
|
| 416 |
+
# Ограничиваем максимум 1.0 (100%)
|
| 417 |
+
new_confidence = min(new_confidence, 1.0)
|
| 418 |
|
| 419 |
return {
|
| 420 |
'emotion': new_emotion,
|
|
|
|
| 490 |
return logits
|
| 491 |
|
| 492 |
# ============================================================
|
| 493 |
+
# КАСКАДНЫЙ КЛАССИФИКАТОР
|
| 494 |
# ============================================================
|
| 495 |
|
| 496 |
class CascadeEmotionClassifier:
|