Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,14 +6,12 @@ import torch
|
|
| 6 |
import torch.nn as nn
|
| 7 |
import numpy as np
|
| 8 |
import re
|
| 9 |
-
import pandas as pd
|
| 10 |
-
import io
|
| 11 |
from typing import Dict, List, Any, Optional
|
| 12 |
from collections import defaultdict, Counter
|
| 13 |
import networkx as nx
|
| 14 |
import pymorphy3
|
| 15 |
import requests
|
| 16 |
-
from fastapi import FastAPI, Request, Form, HTTPException
|
| 17 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 18 |
from fastapi.templating import Jinja2Templates
|
| 19 |
import uvicorn
|
|
@@ -93,20 +91,27 @@ class OntologyEmotionModel:
|
|
| 93 |
elif sentiment == 'negative':
|
| 94 |
self.sentiment_lexicon[lemma] = 'грусть'
|
| 95 |
added += 1
|
| 96 |
-
except Exception:
|
| 97 |
continue
|
| 98 |
print(f" Добавлено слов из RuSentiLex: {added}")
|
| 99 |
|
| 100 |
def _load_rusentilex(self):
|
| 101 |
"""Загружает RuSentiLex из локального файла в папке model"""
|
|
|
|
|
|
|
|
|
|
| 102 |
possible_paths = [
|
| 103 |
'model/rusentilex.csv',
|
| 104 |
'rusentilex.csv',
|
| 105 |
'/app/model/rusentilex.csv',
|
| 106 |
os.path.join(os.path.dirname(__file__), 'model', 'rusentilex.csv')
|
| 107 |
]
|
|
|
|
| 108 |
loaded = False
|
|
|
|
| 109 |
print("📂 Поиск RuSentiLex...")
|
|
|
|
|
|
|
| 110 |
for path in possible_paths:
|
| 111 |
if os.path.exists(path):
|
| 112 |
try:
|
|
@@ -118,6 +123,8 @@ class OntologyEmotionModel:
|
|
| 118 |
break
|
| 119 |
except Exception as e:
|
| 120 |
print(f"⚠️ Ошибка при загрузке {path}: {e}")
|
|
|
|
|
|
|
| 121 |
if not loaded:
|
| 122 |
print("⚠️ Локальный файл RuSentiLex не найден, пробуем скачать...")
|
| 123 |
url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
|
|
@@ -129,8 +136,11 @@ class OntologyEmotionModel:
|
|
| 129 |
loaded = True
|
| 130 |
except Exception as e:
|
| 131 |
print(f"⚠️ Не удалось загрузить RuSentiLex из репозитория: {e}")
|
|
|
|
| 132 |
if not loaded:
|
| 133 |
print("⚠️ RuSentiLex не загружен. Используется только статистический лексикон.")
|
|
|
|
|
|
|
| 134 |
print(f"📊 Всего слов в лексиконе: {len(self.sentiment_lexicon)}")
|
| 135 |
|
| 136 |
def init_ontology_level1(self):
|
|
@@ -349,6 +359,7 @@ class OntologyEmotionModel:
|
|
| 349 |
adj = rule_analysis['adjustments']
|
| 350 |
rules = rule_analysis['rules_applied']
|
| 351 |
|
|
|
|
| 352 |
original_confidence_value = original_confidence
|
| 353 |
was_corrected = len(rules) > 0
|
| 354 |
|
|
@@ -357,6 +368,7 @@ class OntologyEmotionModel:
|
|
| 357 |
new_confidence = original_confidence * conf_mult
|
| 358 |
new_emotion = original_emotion
|
| 359 |
|
|
|
|
| 360 |
has_negative = any('негативное слово' in r for r in rules)
|
| 361 |
has_positive = any('позитивное слово' in r for r in rules)
|
| 362 |
|
|
@@ -372,6 +384,7 @@ class OntologyEmotionModel:
|
|
| 372 |
new_emotion = 'радость'
|
| 373 |
rules.append("коррекция: позитивные слова")
|
| 374 |
|
|
|
|
| 375 |
for rule in rules:
|
| 376 |
if rule.startswith("инверсия негатива:"):
|
| 377 |
new_emotion = 'радость'
|
|
@@ -383,6 +396,7 @@ class OntologyEmotionModel:
|
|
| 383 |
new_emotion = 'грусть'
|
| 384 |
break
|
| 385 |
|
|
|
|
| 386 |
sarcasm_flag = adj['sarcasm'] > 0.5
|
| 387 |
if sarcasm_flag:
|
| 388 |
new_emotion = 'сарказм'
|
|
@@ -390,12 +404,16 @@ class OntologyEmotionModel:
|
|
| 390 |
if "саркастическая фраза" in str(rules):
|
| 391 |
new_confidence = min(new_confidence * 1.1, 0.95)
|
| 392 |
|
|
|
|
| 393 |
if any('восклицание' in r for r in rules):
|
| 394 |
new_confidence = min(new_confidence * 1.2, 1.0)
|
| 395 |
|
|
|
|
|
|
|
| 396 |
if not was_corrected and original_confidence_value < 0.9:
|
| 397 |
new_confidence = min(new_confidence * 1.10, 1.0)
|
| 398 |
|
|
|
|
| 399 |
new_confidence = min(new_confidence, 1.0)
|
| 400 |
|
| 401 |
return {
|
|
@@ -511,6 +529,7 @@ class CascadeEmotionClassifier:
|
|
| 511 |
lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
|
| 512 |
lstm_pred_dict = {'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist()}
|
| 513 |
|
|
|
|
| 514 |
lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
|
| 515 |
|
| 516 |
if lstm_onto['adjusted_prediction']['confidence'] >= self.threshold:
|
|
@@ -518,7 +537,6 @@ class CascadeEmotionClassifier:
|
|
| 518 |
final = lstm_onto['adjusted_prediction']
|
| 519 |
used = "LSTM + онтология"
|
| 520 |
rules_applied = lstm_onto['rule_analysis']['rules_applied']
|
| 521 |
-
class_probs = {emo: float(prob) for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', lstm_pred_dict['probabilities']))}
|
| 522 |
else:
|
| 523 |
self.stats['bert'] += 1
|
| 524 |
enc = self.tokenizer(text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt').to(self.device)
|
|
@@ -529,11 +547,11 @@ class CascadeEmotionClassifier:
|
|
| 529 |
bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
|
| 530 |
bert_pred_dict = {'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist()}
|
| 531 |
|
|
|
|
| 532 |
bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
|
| 533 |
final = bert_onto['adjusted_prediction']
|
| 534 |
used = "BERT + онтология"
|
| 535 |
rules_applied = bert_onto['rule_analysis']['rules_applied']
|
| 536 |
-
class_probs = {emo: float(prob) for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', bert_pred_dict['probabilities']))}
|
| 537 |
|
| 538 |
return {
|
| 539 |
'text': text,
|
|
@@ -541,8 +559,7 @@ class CascadeEmotionClassifier:
|
|
| 541 |
'confidence': float(final['confidence']),
|
| 542 |
'used_model': used,
|
| 543 |
'rules_applied': rules_applied,
|
| 544 |
-
'was_corrected_by_ontology': len(rules_applied) > 0
|
| 545 |
-
'class_probabilities': class_probs
|
| 546 |
}
|
| 547 |
|
| 548 |
# ============================================================
|
|
@@ -553,17 +570,21 @@ def load_model():
|
|
| 553 |
print("Загрузка модели...")
|
| 554 |
model_dir = 'model'
|
| 555 |
|
|
|
|
| 556 |
with open(f'{model_dir}/model_info.json', 'r', encoding='utf-8') as f:
|
| 557 |
model_info = json.load(f)
|
| 558 |
|
|
|
|
| 559 |
with open(f'{model_dir}/vocab.json', 'r', encoding='utf-8') as f:
|
| 560 |
vocab = json.load(f)
|
| 561 |
|
|
|
|
| 562 |
print("📂 Создание label_encoder...")
|
| 563 |
label_encoder = LabelEncoder()
|
| 564 |
label_encoder.classes_ = np.array(model_info['classes'])
|
| 565 |
print(f"✅ label_encoder создан, классы: {list(label_encoder.classes_)}")
|
| 566 |
|
|
|
|
| 567 |
print("📂 Создание онтологии...")
|
| 568 |
ontology_model = OntologyEmotionModel(
|
| 569 |
emotions=list(label_encoder.classes_),
|
|
@@ -572,6 +593,7 @@ def load_model():
|
|
| 572 |
)
|
| 573 |
print("✅ Онтология создана")
|
| 574 |
|
|
|
|
| 575 |
print("📂 Загрузка LSTM...")
|
| 576 |
lstm_model = EmotionLSTM(
|
| 577 |
vocab_size=len(vocab),
|
|
@@ -585,6 +607,7 @@ def load_model():
|
|
| 585 |
lstm_model.load_state_dict(checkpoint['model_state_dict'])
|
| 586 |
print("✅ LSTM загружена")
|
| 587 |
|
|
|
|
| 588 |
print("📂 Загрузка BERT...")
|
| 589 |
bert_model = EmotionBERT(
|
| 590 |
bert_model_name=model_info['bert_model_name'],
|
|
@@ -594,6 +617,7 @@ def load_model():
|
|
| 594 |
bert_model.load_state_dict(torch.load(f'{model_dir}/bert_model.pth', map_location=device, weights_only=False))
|
| 595 |
print("✅ BERT загружена")
|
| 596 |
|
|
|
|
| 597 |
print("📂 Загрузка токенизатора...")
|
| 598 |
try:
|
| 599 |
tokenizer = BertTokenizer.from_pretrained(model_dir)
|
|
@@ -604,6 +628,7 @@ def load_model():
|
|
| 604 |
tokenizer = BertTokenizer.from_pretrained('DeepPavlov/rubert-base-cased')
|
| 605 |
print("✅ Токенизатор загружен из Hugging Face")
|
| 606 |
|
|
|
|
| 607 |
print("📂 Создание каскадного классификатора...")
|
| 608 |
cascade = CascadeEmotionClassifier(
|
| 609 |
lstm_model=lstm_model,
|
|
@@ -649,6 +674,7 @@ async def predict(text: str = Form(...)):
|
|
| 649 |
try:
|
| 650 |
result = classifier.predict(text)
|
| 651 |
|
|
|
|
| 652 |
rules_display = []
|
| 653 |
for rule in result['rules_applied'][:10]:
|
| 654 |
if ':' in rule:
|
|
@@ -657,107 +683,17 @@ async def predict(text: str = Form(...)):
|
|
| 657 |
else:
|
| 658 |
rules_display.append(f"<span class='rule-tag'>{rule}</span>")
|
| 659 |
|
| 660 |
-
# Формируем вероятности для отображения
|
| 661 |
-
probs_display = []
|
| 662 |
-
for emo, prob in result['class_probabilities'].items():
|
| 663 |
-
percentage = prob * 100
|
| 664 |
-
probs_display.append(f"""
|
| 665 |
-
<div class="prob-item">
|
| 666 |
-
<span class="prob-label">{emo}</span>
|
| 667 |
-
<div class="prob-bar-container">
|
| 668 |
-
<div class="prob-bar" style="width: {percentage}%"></div>
|
| 669 |
-
</div>
|
| 670 |
-
<span class="prob-value">{percentage:.1f}%</span>
|
| 671 |
-
</div>
|
| 672 |
-
""")
|
| 673 |
-
|
| 674 |
return JSONResponse({
|
| 675 |
"success": True,
|
| 676 |
"emotion": result['predicted_emotion'],
|
| 677 |
"confidence": f"{result['confidence']*100:.1f}%",
|
| 678 |
"used_model": result['used_model'],
|
| 679 |
"rules": "".join(rules_display) if rules_display else "Нет правил",
|
| 680 |
-
"was_corrected": str(result['was_corrected_by_ontology'])
|
| 681 |
-
"probabilities": "".join(probs_display)
|
| 682 |
})
|
| 683 |
except Exception as e:
|
| 684 |
return JSONResponse({"error": str(e)}, status_code=500)
|
| 685 |
|
| 686 |
-
@app.post("/upload")
|
| 687 |
-
async def upload_csv(
|
| 688 |
-
file: UploadFile = File(...),
|
| 689 |
-
text_column: str = Form("text")
|
| 690 |
-
):
|
| 691 |
-
if not classifier:
|
| 692 |
-
raise HTTPException(status_code=503, detail="Модель не загружена")
|
| 693 |
-
|
| 694 |
-
# 1. Проверка расширения
|
| 695 |
-
if not file.filename.endswith('.csv'):
|
| 696 |
-
raise HTTPException(400, "Поддерживаются только CSV файлы")
|
| 697 |
-
|
| 698 |
-
# 2. Ограничение размера (100 МБ)
|
| 699 |
-
contents = await file.read()
|
| 700 |
-
if len(contents) > 100 * 1024 * 1024:
|
| 701 |
-
raise HTTPException(400, "Файл слишком большой (максимум 100 МБ)")
|
| 702 |
-
|
| 703 |
-
# 3. Чтение CSV (пробуем utf-8, потом cp1251)
|
| 704 |
-
try:
|
| 705 |
-
df = pd.read_csv(io.BytesIO(contents), encoding='utf-8')
|
| 706 |
-
except UnicodeDecodeError:
|
| 707 |
-
try:
|
| 708 |
-
df = pd.read_csv(io.BytesIO(contents), encoding='cp1251')
|
| 709 |
-
except Exception as e:
|
| 710 |
-
raise HTTPException(400, f"Ошибка чтения CSV: {str(e)}")
|
| 711 |
-
|
| 712 |
-
# 4. Проверка наличия столбца
|
| 713 |
-
if text_column not in df.columns:
|
| 714 |
-
raise HTTPException(400, f"Столбец '{text_column}' не найден в файле")
|
| 715 |
-
|
| 716 |
-
# 5. Ограничение количества строк (максимум 1000, чтобы не превысить лимиты времени)
|
| 717 |
-
MAX_ROWS = 1000
|
| 718 |
-
if len(df) > MAX_ROWS:
|
| 719 |
-
raise HTTPException(400, f"Файл содержит более {MAX_ROWS} строк, что превышает лимит")
|
| 720 |
-
|
| 721 |
-
# 6. Предобработка: берём только непустые тексты
|
| 722 |
-
texts = df[text_column].fillna('').astype(str).tolist()
|
| 723 |
-
texts = [t for t in texts if t.strip()]
|
| 724 |
-
|
| 725 |
-
if not texts:
|
| 726 |
-
raise HTTPException(400, "Нет валидных текстов для анализа")
|
| 727 |
-
|
| 728 |
-
# 7. Обработка каждой строки
|
| 729 |
-
import time
|
| 730 |
-
start = time.time()
|
| 731 |
-
results = []
|
| 732 |
-
|
| 733 |
-
for text in texts:
|
| 734 |
-
pred = classifier.predict(text)
|
| 735 |
-
results.append({
|
| 736 |
-
"text": text[:200], # обрезаем для экономии места
|
| 737 |
-
"emotion": pred['predicted_emotion'],
|
| 738 |
-
"confidence": pred['confidence'],
|
| 739 |
-
"probabilities": pred['class_probabilities']
|
| 740 |
-
})
|
| 741 |
-
|
| 742 |
-
elapsed = time.time() - start
|
| 743 |
-
|
| 744 |
-
# 8. Агрегированная статистика
|
| 745 |
-
emotion_counts = {}
|
| 746 |
-
for r in results:
|
| 747 |
-
emo = r['emotion']
|
| 748 |
-
emotion_counts[emo] = emotion_counts.get(emo, 0) + 1
|
| 749 |
-
|
| 750 |
-
avg_confidence = sum(r['confidence'] for r in results) / len(results)
|
| 751 |
-
|
| 752 |
-
# 9. Ответ
|
| 753 |
-
return JSONResponse({
|
| 754 |
-
"total_processed": len(results),
|
| 755 |
-
"processing_time": round(elapsed, 2),
|
| 756 |
-
"average_confidence": round(avg_confidence, 2),
|
| 757 |
-
"emotion_counts": emotion_counts,
|
| 758 |
-
"details": results
|
| 759 |
-
})
|
| 760 |
-
|
| 761 |
@app.get("/health")
|
| 762 |
async def health_check():
|
| 763 |
return {"status": "healthy", "model_loaded": classifier is not None}
|
|
|
|
| 6 |
import torch.nn as nn
|
| 7 |
import numpy as np
|
| 8 |
import re
|
|
|
|
|
|
|
| 9 |
from typing import Dict, List, Any, Optional
|
| 10 |
from collections import defaultdict, Counter
|
| 11 |
import networkx as nx
|
| 12 |
import pymorphy3
|
| 13 |
import requests
|
| 14 |
+
from fastapi import FastAPI, Request, Form, HTTPException
|
| 15 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 16 |
from fastapi.templating import Jinja2Templates
|
| 17 |
import uvicorn
|
|
|
|
| 91 |
elif sentiment == 'negative':
|
| 92 |
self.sentiment_lexicon[lemma] = 'грусть'
|
| 93 |
added += 1
|
| 94 |
+
except Exception as e:
|
| 95 |
continue
|
| 96 |
print(f" Добавлено слов из RuSentiLex: {added}")
|
| 97 |
|
| 98 |
def _load_rusentilex(self):
|
| 99 |
"""Загружает RuSentiLex из локального файла в папке model"""
|
| 100 |
+
import os
|
| 101 |
+
|
| 102 |
+
# Пути для поиска файла RuSentiLex
|
| 103 |
possible_paths = [
|
| 104 |
'model/rusentilex.csv',
|
| 105 |
'rusentilex.csv',
|
| 106 |
'/app/model/rusentilex.csv',
|
| 107 |
os.path.join(os.path.dirname(__file__), 'model', 'rusentilex.csv')
|
| 108 |
]
|
| 109 |
+
|
| 110 |
loaded = False
|
| 111 |
+
|
| 112 |
print("📂 Поиск RuSentiLex...")
|
| 113 |
+
|
| 114 |
+
# Пробуем загрузить из локального файла
|
| 115 |
for path in possible_paths:
|
| 116 |
if os.path.exists(path):
|
| 117 |
try:
|
|
|
|
| 123 |
break
|
| 124 |
except Exception as e:
|
| 125 |
print(f"⚠️ Ошибка при загрузке {path}: {e}")
|
| 126 |
+
|
| 127 |
+
# Если локально не нашли, пробуем скачать из интернета
|
| 128 |
if not loaded:
|
| 129 |
print("⚠️ Локальный файл RuSentiLex не найден, пробуем скачать...")
|
| 130 |
url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
|
|
|
|
| 136 |
loaded = True
|
| 137 |
except Exception as e:
|
| 138 |
print(f"⚠️ Не удалось загрузить RuSentiLex из репозитория: {e}")
|
| 139 |
+
|
| 140 |
if not loaded:
|
| 141 |
print("⚠️ RuSentiLex не загружен. Используется только статистический лексикон.")
|
| 142 |
+
|
| 143 |
+
# Выводим статистику
|
| 144 |
print(f"📊 Всего слов в лексиконе: {len(self.sentiment_lexicon)}")
|
| 145 |
|
| 146 |
def init_ontology_level1(self):
|
|
|
|
| 359 |
adj = rule_analysis['adjustments']
|
| 360 |
rules = rule_analysis['rules_applied']
|
| 361 |
|
| 362 |
+
# Сохраняем исходную уверенность для проверки коррекции
|
| 363 |
original_confidence_value = original_confidence
|
| 364 |
was_corrected = len(rules) > 0
|
| 365 |
|
|
|
|
| 368 |
new_confidence = original_confidence * conf_mult
|
| 369 |
new_emotion = original_emotion
|
| 370 |
|
| 371 |
+
# Если есть негативные слова и нет позитивных, корректируем эмоцию
|
| 372 |
has_negative = any('негативное слово' in r for r in rules)
|
| 373 |
has_positive = any('позитивное слово' in r for r in rules)
|
| 374 |
|
|
|
|
| 384 |
new_emotion = 'радость'
|
| 385 |
rules.append("коррекция: позитивные слова")
|
| 386 |
|
| 387 |
+
# Инверсия на основе правил
|
| 388 |
for rule in rules:
|
| 389 |
if rule.startswith("инверсия негатива:"):
|
| 390 |
new_emotion = 'радость'
|
|
|
|
| 396 |
new_emotion = 'грусть'
|
| 397 |
break
|
| 398 |
|
| 399 |
+
# Сарказм (контраст + маркеры)
|
| 400 |
sarcasm_flag = adj['sarcasm'] > 0.5
|
| 401 |
if sarcasm_flag:
|
| 402 |
new_emotion = 'сарказм'
|
|
|
|
| 404 |
if "саркастическая фраза" in str(rules):
|
| 405 |
new_confidence = min(new_confidence * 1.1, 0.95)
|
| 406 |
|
| 407 |
+
# Восклицания
|
| 408 |
if any('восклицание' in r for r in rules):
|
| 409 |
new_confidence = min(new_confidence * 1.2, 1.0)
|
| 410 |
|
| 411 |
+
# Если онтология не применила коррекции, а уверенность была менее 90%,
|
| 412 |
+
# то повышаем уверенность на 10% (но не более 100%)
|
| 413 |
if not was_corrected and original_confidence_value < 0.9:
|
| 414 |
new_confidence = min(new_confidence * 1.10, 1.0)
|
| 415 |
|
| 416 |
+
# Ограничиваем максимум 1.0 (100%)
|
| 417 |
new_confidence = min(new_confidence, 1.0)
|
| 418 |
|
| 419 |
return {
|
|
|
|
| 529 |
lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
|
| 530 |
lstm_pred_dict = {'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist()}
|
| 531 |
|
| 532 |
+
# Применяем онтологию к LSTM
|
| 533 |
lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
|
| 534 |
|
| 535 |
if lstm_onto['adjusted_prediction']['confidence'] >= self.threshold:
|
|
|
|
| 537 |
final = lstm_onto['adjusted_prediction']
|
| 538 |
used = "LSTM + онтология"
|
| 539 |
rules_applied = lstm_onto['rule_analysis']['rules_applied']
|
|
|
|
| 540 |
else:
|
| 541 |
self.stats['bert'] += 1
|
| 542 |
enc = self.tokenizer(text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt').to(self.device)
|
|
|
|
| 547 |
bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
|
| 548 |
bert_pred_dict = {'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist()}
|
| 549 |
|
| 550 |
+
# Применяем онтологию к BERT
|
| 551 |
bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
|
| 552 |
final = bert_onto['adjusted_prediction']
|
| 553 |
used = "BERT + онтология"
|
| 554 |
rules_applied = bert_onto['rule_analysis']['rules_applied']
|
|
|
|
| 555 |
|
| 556 |
return {
|
| 557 |
'text': text,
|
|
|
|
| 559 |
'confidence': float(final['confidence']),
|
| 560 |
'used_model': used,
|
| 561 |
'rules_applied': rules_applied,
|
| 562 |
+
'was_corrected_by_ontology': len(rules_applied) > 0
|
|
|
|
| 563 |
}
|
| 564 |
|
| 565 |
# ============================================================
|
|
|
|
| 570 |
print("Загрузка модели...")
|
| 571 |
model_dir = 'model'
|
| 572 |
|
| 573 |
+
# Загружаем информацию о модели
|
| 574 |
with open(f'{model_dir}/model_info.json', 'r', encoding='utf-8') as f:
|
| 575 |
model_info = json.load(f)
|
| 576 |
|
| 577 |
+
# Загружаем vocab
|
| 578 |
with open(f'{model_dir}/vocab.json', 'r', encoding='utf-8') as f:
|
| 579 |
vocab = json.load(f)
|
| 580 |
|
| 581 |
+
# СОЗДАЁМ label_encoder из model_info
|
| 582 |
print("📂 Создание label_encoder...")
|
| 583 |
label_encoder = LabelEncoder()
|
| 584 |
label_encoder.classes_ = np.array(model_info['classes'])
|
| 585 |
print(f"✅ label_encoder создан, классы: {list(label_encoder.classes_)}")
|
| 586 |
|
| 587 |
+
# СОЗДАЁМ онтологию
|
| 588 |
print("📂 Создание онтологии...")
|
| 589 |
ontology_model = OntologyEmotionModel(
|
| 590 |
emotions=list(label_encoder.classes_),
|
|
|
|
| 593 |
)
|
| 594 |
print("✅ Онтология создана")
|
| 595 |
|
| 596 |
+
# LSTM
|
| 597 |
print("📂 Загрузка LSTM...")
|
| 598 |
lstm_model = EmotionLSTM(
|
| 599 |
vocab_size=len(vocab),
|
|
|
|
| 607 |
lstm_model.load_state_dict(checkpoint['model_state_dict'])
|
| 608 |
print("✅ LSTM загружена")
|
| 609 |
|
| 610 |
+
# BERT
|
| 611 |
print("📂 Загрузка BERT...")
|
| 612 |
bert_model = EmotionBERT(
|
| 613 |
bert_model_name=model_info['bert_model_name'],
|
|
|
|
| 617 |
bert_model.load_state_dict(torch.load(f'{model_dir}/bert_model.pth', map_location=device, weights_only=False))
|
| 618 |
print("✅ BERT загружена")
|
| 619 |
|
| 620 |
+
# Токенизатор
|
| 621 |
print("📂 Загрузка токенизатора...")
|
| 622 |
try:
|
| 623 |
tokenizer = BertTokenizer.from_pretrained(model_dir)
|
|
|
|
| 628 |
tokenizer = BertTokenizer.from_pretrained('DeepPavlov/rubert-base-cased')
|
| 629 |
print("✅ Токенизатор загружен из Hugging Face")
|
| 630 |
|
| 631 |
+
# Каскад
|
| 632 |
print("📂 Создание каскадного классификатора...")
|
| 633 |
cascade = CascadeEmotionClassifier(
|
| 634 |
lstm_model=lstm_model,
|
|
|
|
| 674 |
try:
|
| 675 |
result = classifier.predict(text)
|
| 676 |
|
| 677 |
+
# Форматируем правила для отображения
|
| 678 |
rules_display = []
|
| 679 |
for rule in result['rules_applied'][:10]:
|
| 680 |
if ':' in rule:
|
|
|
|
| 683 |
else:
|
| 684 |
rules_display.append(f"<span class='rule-tag'>{rule}</span>")
|
| 685 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
return JSONResponse({
|
| 687 |
"success": True,
|
| 688 |
"emotion": result['predicted_emotion'],
|
| 689 |
"confidence": f"{result['confidence']*100:.1f}%",
|
| 690 |
"used_model": result['used_model'],
|
| 691 |
"rules": "".join(rules_display) if rules_display else "Нет правил",
|
| 692 |
+
"was_corrected": str(result['was_corrected_by_ontology'])
|
|
|
|
| 693 |
})
|
| 694 |
except Exception as e:
|
| 695 |
return JSONResponse({"error": str(e)}, status_code=500)
|
| 696 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
@app.get("/health")
|
| 698 |
async def health_check():
|
| 699 |
return {"status": "healthy", "model_loaded": classifier is not None}
|