Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -41,7 +41,7 @@ def clean_russian_text(text):
|
|
| 41 |
return text
|
| 42 |
|
| 43 |
# ============================================================
|
| 44 |
-
# ПОЛНЫЙ КЛАСС ОНТОЛОГИИ
|
| 45 |
# ============================================================
|
| 46 |
|
| 47 |
class OntologyEmotionModel:
|
|
@@ -139,12 +139,42 @@ class OntologyEmotionModel:
|
|
| 139 |
|
| 140 |
def init_ontology_level2(self):
|
| 141 |
self.linguistic_rules = {
|
| 142 |
-
'усилители': {
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
}
|
| 149 |
|
| 150 |
def add_empirical_knowledge(self, text: str, emotion: str, confidence: float):
|
|
@@ -194,7 +224,8 @@ class OntologyEmotionModel:
|
|
| 194 |
for word in rule['words']:
|
| 195 |
if word in lemmas:
|
| 196 |
rules_applied.append(f"{category}: {word}")
|
| 197 |
-
effect = rule['effect']
|
|
|
|
| 198 |
if effect == 'increase_arousal':
|
| 199 |
adjustments['arousal'] += weight
|
| 200 |
elif effect == 'decrease_arousal':
|
|
@@ -238,6 +269,16 @@ class OntologyEmotionModel:
|
|
| 238 |
adjustments['sarcasm'] += 0.5
|
| 239 |
rules_applied.append(f"контраст тональности: позитив {pos_words[:2]} vs негатив {neg_words[:2]}")
|
| 240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
return {'rules_applied': rules_applied, 'adjustments': adjustments, 'lemmas': lemmas}
|
| 242 |
|
| 243 |
def calculate_disagreement(self, pred1: Dict, pred2: Dict) -> float:
|
|
@@ -266,24 +307,39 @@ class OntologyEmotionModel:
|
|
| 266 |
conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
|
| 267 |
conf_mult = np.clip(conf_mult, 0.5, 1.5)
|
| 268 |
new_confidence = original_confidence * conf_mult
|
|
|
|
|
|
|
| 269 |
new_emotion = original_emotion
|
| 270 |
|
|
|
|
| 271 |
for rule in rules:
|
| 272 |
if rule.startswith("инверсия негатива:"):
|
| 273 |
new_emotion = 'радость'
|
| 274 |
break
|
| 275 |
elif rule.startswith("инверсия позитива:"):
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
| 277 |
break
|
| 278 |
|
| 279 |
-
|
|
|
|
|
|
|
| 280 |
new_emotion = 'сарказм'
|
| 281 |
-
new_confidence
|
|
|
|
|
|
|
| 282 |
|
|
|
|
| 283 |
if any('восклицание' in r for r in rules):
|
| 284 |
new_confidence = min(new_confidence * 1.2, 1.0)
|
| 285 |
|
| 286 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
|
| 289 |
rule_analysis = self.apply_linguistic_rules(text)
|
|
@@ -352,6 +408,10 @@ class EmotionBERT(nn.Module):
|
|
| 352 |
return logits, conf
|
| 353 |
return logits
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
class CascadeEmotionClassifier:
|
| 356 |
def __init__(self, lstm_model, bert_model, vocab, tokenizer, label_encoder, ontology_model, threshold=0.95, device='cpu', max_length_lstm=100, max_length_bert=128):
|
| 357 |
self.lstm_model = lstm_model
|
|
@@ -532,6 +592,8 @@ async def predict(text: str = Form(...)):
|
|
| 532 |
return JSONResponse({"error": "Введите хотя бы 3 символа."}, status_code=400)
|
| 533 |
try:
|
| 534 |
result = classifier.predict(text)
|
|
|
|
|
|
|
| 535 |
rules_display = []
|
| 536 |
for rule in result['rules_applied'][:10]:
|
| 537 |
if ':' in rule:
|
|
@@ -539,13 +601,14 @@ async def predict(text: str = Form(...)):
|
|
| 539 |
rules_display.append(f"<span class='rule-tag'>{cat}: {val}</span>")
|
| 540 |
else:
|
| 541 |
rules_display.append(f"<span class='rule-tag'>{rule}</span>")
|
|
|
|
| 542 |
return JSONResponse({
|
| 543 |
"success": True,
|
| 544 |
"emotion": result['predicted_emotion'],
|
| 545 |
"confidence": f"{result['confidence']*100:.1f}%",
|
| 546 |
"used_model": result['used_model'],
|
| 547 |
"rules": "".join(rules_display) if rules_display else "Нет правил",
|
| 548 |
-
"was_corrected": result['was_corrected_by_ontology']
|
| 549 |
})
|
| 550 |
except Exception as e:
|
| 551 |
return JSONResponse({"error": str(e)}, status_code=500)
|
|
|
|
| 41 |
return text
|
| 42 |
|
| 43 |
# ============================================================
|
| 44 |
+
# ПОЛНЫЙ КЛАСС ОНТОЛОГИИ (исправленный)
|
| 45 |
# ============================================================
|
| 46 |
|
| 47 |
class OntologyEmotionModel:
|
|
|
|
| 139 |
|
| 140 |
def init_ontology_level2(self):
|
| 141 |
self.linguistic_rules = {
|
| 142 |
+
'усилители': {
|
| 143 |
+
'words': ['очень', 'сильно', 'крайне', 'чрезвычайно', 'невероятно', 'абсолютно'],
|
| 144 |
+
'effect': 'increase_arousal',
|
| 145 |
+
'weight': 0.3,
|
| 146 |
+
'learnable': True
|
| 147 |
+
},
|
| 148 |
+
'ослабители': {
|
| 149 |
+
'words': ['слегка', 'немного', 'чуть-чуть', 'отчасти', 'несколько'],
|
| 150 |
+
'effect': 'decrease_arousal',
|
| 151 |
+
'weight': -0.2,
|
| 152 |
+
'learnable': True
|
| 153 |
+
},
|
| 154 |
+
'отрицания': {
|
| 155 |
+
'words': ['не', 'ни', 'нет', 'нельзя', 'невозможно'],
|
| 156 |
+
'effect': 'negation',
|
| 157 |
+
'weight': -0.5,
|
| 158 |
+
'learnable': True
|
| 159 |
+
},
|
| 160 |
+
'восклицания': {
|
| 161 |
+
'patterns': [r'!+', r'\?+'],
|
| 162 |
+
'effect': 'increase_arousal',
|
| 163 |
+
'weight': 0.4,
|
| 164 |
+
'learnable': True
|
| 165 |
+
},
|
| 166 |
+
'вопросительные': {
|
| 167 |
+
'patterns': [r'\?+'],
|
| 168 |
+
'effect': 'uncertainty',
|
| 169 |
+
'weight': 0.2,
|
| 170 |
+
'learnable': True
|
| 171 |
+
},
|
| 172 |
+
'сарказм_маркеры': {
|
| 173 |
+
'words': ['какой', 'такой', 'прям', 'ага', 'ну да', 'конечно', 'отличная работа', 'прекрасно', 'замечательно', 'как всегда'],
|
| 174 |
+
'effect': 'sarcasm',
|
| 175 |
+
'weight': 0.6,
|
| 176 |
+
'learnable': True
|
| 177 |
+
}
|
| 178 |
}
|
| 179 |
|
| 180 |
def add_empirical_knowledge(self, text: str, emotion: str, confidence: float):
|
|
|
|
| 224 |
for word in rule['words']:
|
| 225 |
if word in lemmas:
|
| 226 |
rules_applied.append(f"{category}: {word}")
|
| 227 |
+
effect = rule['effect']
|
| 228 |
+
weight = rule['weight']
|
| 229 |
if effect == 'increase_arousal':
|
| 230 |
adjustments['arousal'] += weight
|
| 231 |
elif effect == 'decrease_arousal':
|
|
|
|
| 269 |
adjustments['sarcasm'] += 0.5
|
| 270 |
rules_applied.append(f"контраст тональности: позитив {pos_words[:2]} vs негатив {neg_words[:2]}")
|
| 271 |
|
| 272 |
+
# Дополнительная проверка на саркастические фразы
|
| 273 |
+
sarcasm_phrases = ['конечно', 'ага', 'ну да', 'как всегда', 'отличная работа', 'прекрасно', 'замечательно']
|
| 274 |
+
for phrase in sarcasm_phrases:
|
| 275 |
+
if phrase in text.lower():
|
| 276 |
+
adjustments['sarcasm'] += 0.6
|
| 277 |
+
rules_applied.append(f"саркастическая фраза: {phrase}")
|
| 278 |
+
|
| 279 |
+
if adjustments['sarcasm'] > 0.5:
|
| 280 |
+
rules_applied.append("обнаружен сарказм")
|
| 281 |
+
|
| 282 |
return {'rules_applied': rules_applied, 'adjustments': adjustments, 'lemmas': lemmas}
|
| 283 |
|
| 284 |
def calculate_disagreement(self, pred1: Dict, pred2: Dict) -> float:
|
|
|
|
| 307 |
conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
|
| 308 |
conf_mult = np.clip(conf_mult, 0.5, 1.5)
|
| 309 |
new_confidence = original_confidence * conf_mult
|
| 310 |
+
# Ограничиваем максимум 1.0 (100%)
|
| 311 |
+
new_confidence = min(new_confidence, 1.0)
|
| 312 |
new_emotion = original_emotion
|
| 313 |
|
| 314 |
+
# Инверсия на основе правил
|
| 315 |
for rule in rules:
|
| 316 |
if rule.startswith("инверсия негатива:"):
|
| 317 |
new_emotion = 'радость'
|
| 318 |
break
|
| 319 |
elif rule.startswith("инверсия позитива:"):
|
| 320 |
+
if adj['arousal'] > 0.3:
|
| 321 |
+
new_emotion = 'злость'
|
| 322 |
+
else:
|
| 323 |
+
new_emotion = 'грусть'
|
| 324 |
break
|
| 325 |
|
| 326 |
+
# Сарказм (контраст + маркеры)
|
| 327 |
+
sarcasm_flag = adj['sarcasm'] > 0.5
|
| 328 |
+
if sarcasm_flag:
|
| 329 |
new_emotion = 'сарказм'
|
| 330 |
+
new_confidence = min(new_confidence * 0.8, 0.9)
|
| 331 |
+
if "саркастическая фраза" in str(rules):
|
| 332 |
+
new_confidence = min(new_confidence * 1.1, 0.95)
|
| 333 |
|
| 334 |
+
# Восклицания
|
| 335 |
if any('восклицание' in r for r in rules):
|
| 336 |
new_confidence = min(new_confidence * 1.2, 1.0)
|
| 337 |
|
| 338 |
+
return {
|
| 339 |
+
'emotion': new_emotion,
|
| 340 |
+
'confidence': new_confidence,
|
| 341 |
+
'rules_applied': rules
|
| 342 |
+
}
|
| 343 |
|
| 344 |
def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
|
| 345 |
rule_analysis = self.apply_linguistic_rules(text)
|
|
|
|
| 408 |
return logits, conf
|
| 409 |
return logits
|
| 410 |
|
| 411 |
+
# ============================================================
|
| 412 |
+
# КАСКАДНЫЙ КЛАССИФИКАТОР (исправленный)
|
| 413 |
+
# ============================================================
|
| 414 |
+
|
| 415 |
class CascadeEmotionClassifier:
|
| 416 |
def __init__(self, lstm_model, bert_model, vocab, tokenizer, label_encoder, ontology_model, threshold=0.95, device='cpu', max_length_lstm=100, max_length_bert=128):
|
| 417 |
self.lstm_model = lstm_model
|
|
|
|
| 592 |
return JSONResponse({"error": "Введите хотя бы 3 символа."}, status_code=400)
|
| 593 |
try:
|
| 594 |
result = classifier.predict(text)
|
| 595 |
+
|
| 596 |
+
# Форматируем правила для отображения
|
| 597 |
rules_display = []
|
| 598 |
for rule in result['rules_applied'][:10]:
|
| 599 |
if ':' in rule:
|
|
|
|
| 601 |
rules_display.append(f"<span class='rule-tag'>{cat}: {val}</span>")
|
| 602 |
else:
|
| 603 |
rules_display.append(f"<span class='rule-tag'>{rule}</span>")
|
| 604 |
+
|
| 605 |
return JSONResponse({
|
| 606 |
"success": True,
|
| 607 |
"emotion": result['predicted_emotion'],
|
| 608 |
"confidence": f"{result['confidence']*100:.1f}%",
|
| 609 |
"used_model": result['used_model'],
|
| 610 |
"rules": "".join(rules_display) if rules_display else "Нет правил",
|
| 611 |
+
"was_corrected": str(result['was_corrected_by_ontology'])
|
| 612 |
})
|
| 613 |
except Exception as e:
|
| 614 |
return JSONResponse({"error": str(e)}, status_code=500)
|