import os import sys import json import pickle import torch import torch.nn as nn import numpy as np import re from typing import Dict, List, Any, Optional from collections import defaultdict import networkx as nx import pymorphy3 from fastapi import FastAPI, Request, Form, HTTPException from fastapi.responses import HTMLResponse, JSONResponse from fastapi.templating import Jinja2Templates import uvicorn from transformers import BertTokenizer, BertModel import warnings warnings.filterwarnings('ignore') # Определяем устройство device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"Используется устройство: {device}") # ============================================================ # ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ # ============================================================ def clean_russian_text(text): if not isinstance(text, str): return "" text = text.lower() text = re.sub(r'http\S+|www\S+|https\S+', '', text) text = re.sub(r'\S+@\S+', '', text) smileys = { ':)': ' смайлик_радость ', ')': ' смайлик_радость ', ':(': ' смайлик_грусть ', '(': ' смайлик_грусть ', ':D': ' смайлик_смех ', ';)': ' смайлик_подмигивание ', } for smiley, replacement in smileys.items(): text = text.replace(smiley, replacement) text = re.sub(r'[^\w\sа-яё.,!?;:)(-]', ' ', text) text = re.sub(r'\s+', ' ', text).strip() return text # ============================================================ # КЛАССЫ МОДЕЛЕЙ (упрощенные для инференса) # ============================================================ class EmotionLSTM(nn.Module): def __init__(self, vocab_size, embed_dim=100, hidden_dim=256, num_classes=5, dropout=0.3, num_layers=2): super().__init__() self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) self.lstm = nn.LSTM( embed_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=dropout if num_layers > 1 else 0 ) self.dropout = nn.Dropout(dropout) self.classifier = nn.Sequential( nn.Linear(hidden_dim * 2, 128), nn.ReLU(), nn.Dropout(dropout), nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, num_classes) ) def forward(self, x, return_confidence=False): embedded = self.embedding(x) lstm_out, (hidden, cell) = self.lstm(embedded) lstm_last = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1) features = self.dropout(lstm_last) logits = self.classifier(features) if return_confidence: probs = torch.softmax(logits, dim=1) conf, _ = torch.max(probs, dim=1) return logits, conf return logits class EmotionBERT(nn.Module): def __init__(self, bert_model_name, num_classes, dropout=0.3): super().__init__() self.bert = BertModel.from_pretrained(bert_model_name) hidden = self.bert.config.hidden_size self.classifier = nn.Sequential( nn.Dropout(dropout), nn.Linear(hidden, 256), nn.ReLU(), nn.Dropout(dropout), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, num_classes) ) def forward(self, input_ids, attention_mask, return_confidence=False): out = self.bert(input_ids, attention_mask, return_dict=True) cls = out.last_hidden_state[:, 0, :] logits = self.classifier(cls) if return_confidence: probs = torch.softmax(logits, dim=1) conf, _ = torch.max(probs, dim=1) return logits, conf return logits class OntologyEmotionModel: def __init__(self, emotions: List[str]): self.emotions = emotions self.morph = pymorphy3.MorphAnalyzer() self.ontology_graph = nx.DiGraph() self.sentiment_lexicon = {} self.linguistic_rules = { 'усилители': {'words': ['очень', 'сильно', 'крайне'], 'weight': 0.3}, 'ослабители': {'words': ['слегка', 'немного', 'чуть-чуть'], 'weight': -0.2}, 'отрицания': {'words': ['не', 'ни', 'нет'], 'weight': -0.5}, } self.init_ontology_level1() def init_ontology_level1(self): self.emotion_definitions = { 'радость': {'valence': 'positive', 'arousal': 'high'}, 'грусть': {'valence': 'negative', 'arousal': 'low'}, 'злость': {'valence': 'negative', 'arousal': 'high'}, 'страх': {'valence': 'negative', 'arousal': 'high'}, 'сарказм': {'valence': 'negative', 'arousal': 'high'}, } for emotion in self.emotions: if emotion in self.emotion_definitions: self.ontology_graph.add_node(emotion, **self.emotion_definitions[emotion]) def apply_linguistic_rules(self, text: str) -> Dict: rules_applied = [] words = text.lower().split() lemmas = [self.morph.parse(w)[0].normal_form for w in words] for category, rule in self.linguistic_rules.items(): for word in rule['words']: if word in lemmas: rules_applied.append(f"{category}: {word}") return {'rules_applied': rules_applied, 'lemmas': lemmas} def adjust_prediction_with_rules(self, prediction: Dict, rule_analysis: Dict) -> Dict: return { 'emotion': prediction['emotion'], 'confidence': prediction['confidence'], 'rules_applied': rule_analysis['rules_applied'] } def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict: rule_analysis = self.apply_linguistic_rules(text) adjusted = self.adjust_prediction_with_rules(model_prediction, rule_analysis) return { 'rule_analysis': rule_analysis, 'adjusted_prediction': adjusted } def get_statistics(self) -> Dict: return { 'ontology_nodes': len(self.ontology_graph.nodes), 'linguistic_rules': len(self.linguistic_rules), } class CascadeEmotionClassifier: def __init__(self, lstm_model, bert_model, vocab, tokenizer, label_encoder, ontology_model, threshold=0.95, device='cpu', max_length_lstm=100, max_length_bert=128): self.lstm_model = lstm_model self.bert_model = bert_model self.vocab = vocab self.tokenizer = tokenizer self.label_encoder = label_encoder self.ontology_model = ontology_model self.threshold = threshold self.device = device self.max_length_lstm = max_length_lstm self.max_length_bert = max_length_bert self.lstm_model.eval() self.bert_model.eval() self.lstm_model.to(device) self.bert_model.to(device) self.stats = {'total': 0, 'lstm': 0, 'bert': 0} def text_to_sequence(self, text): words = str(text).split()[:self.max_length_lstm] sequence = [self.vocab.get(word, self.vocab.get('', 1)) for word in words] if len(sequence) < self.max_length_lstm: sequence += [self.vocab.get('', 0)] * (self.max_length_lstm - len(sequence)) return sequence[:self.max_length_lstm] def predict(self, text): self.stats['total'] += 1 text_clean = clean_russian_text(text) # LSTM prediction seq = torch.LongTensor([self.text_to_sequence(text_clean)]).to(self.device) with torch.no_grad(): lstm_logits, lstm_conf = self.lstm_model(seq, return_confidence=True) lstm_probs = torch.softmax(lstm_logits, dim=1) lstm_pred = lstm_probs.argmax().item() lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0] lstm_pred_dict = { 'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist() } # Применяем онтологию lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict) lstm_adjusted = lstm_onto['adjusted_prediction'] if lstm_adjusted['confidence'] >= self.threshold: self.stats['lstm'] += 1 final = lstm_adjusted used_model = "LSTM с онтологией" else: # BERT prediction self.stats['bert'] += 1 enc = self.tokenizer( text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt' ).to(self.device) with torch.no_grad(): bert_logits, bert_conf = self.bert_model( enc['input_ids'], enc['attention_mask'], return_confidence=True ) bert_probs = torch.softmax(bert_logits, dim=1) bert_pred = bert_probs.argmax().item() bert_emo = self.label_encoder.inverse_transform([bert_pred])[0] bert_pred_dict = { 'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist() } bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict) bert_adjusted = bert_onto['adjusted_prediction'] final = bert_adjusted used_model = "BERT с онтологией" lstm_onto = bert_onto result = { 'text': text, 'predicted_emotion': final['emotion'], 'confidence': float(final['confidence']), 'used_model': used_model, 'rules_applied': lstm_onto['rule_analysis']['rules_applied'], 'class_probabilities': { emo: float(prob) for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', lstm_pred_dict['probabilities'])) } } return result # ============================================================ # ЗАГРУЗКА МОДЕЛИ # ============================================================ def load_model(): print("Загрузка модели...") model_dir = 'model' # Загружаем информацию о модели with open(f'{model_dir}/model_info.json', 'r', encoding='utf-8') as f: model_info = json.load(f) # Загружаем vocab with open(f'{model_dir}/vocab.json', 'r', encoding='utf-8') as f: vocab = json.load(f) # Загружаем label encoder with open(f'{model_dir}/label_encoder.pkl', 'rb') as f: label_encoder = pickle.load(f) # Загружаем онтологию with open(f'{model_dir}/ontology_model.pkl', 'rb') as f: ontology_model = pickle.load(f) # Создаем и загружаем LSTM lstm_model = EmotionLSTM( vocab_size=len(vocab), embed_dim=model_info.get('embed_dim', 100), hidden_dim=256, num_classes=model_info['num_classes'], dropout=0.3, num_layers=2 ) checkpoint = torch.load(f'{model_dir}/lstm_model.pth', map_location=device, weights_only=True) lstm_model.load_state_dict(checkpoint['model_state_dict']) # Создаем и загружаем BERT bert_model = EmotionBERT( bert_model_name=model_info['bert_model_name'], num_classes=model_info['num_classes'], dropout=0.3 ) bert_model.load_state_dict(torch.load(f'{model_dir}/bert_model.pth', map_location=device, weights_only=True)) # Загружаем токенизатор tokenizer = BertTokenizer.from_pretrained(model_dir) # Создаем каскадный классификатор cascade = CascadeEmotionClassifier( lstm_model=lstm_model, bert_model=bert_model, vocab=vocab, tokenizer=tokenizer, label_encoder=label_encoder, ontology_model=ontology_model, threshold=model_info.get('threshold', 0.95), device=device, max_length_lstm=model_info.get('max_length_lstm', 100), max_length_bert=model_info.get('max_length_bert', 128) ) print("✅ Модель успешно загружена!") return cascade, model_info # ============================================================ # FASTAPI ПРИЛОЖЕНИЕ # ============================================================ app = FastAPI(title="Emotion Analysis with BERT and Ontology") # Настраиваем шаблоны templates = Jinja2Templates(directory="templates") # Глобальная переменная для модели classifier = None model_info = None @app.on_event("startup") async def startup_event(): global classifier, model_info classifier, model_info = load_model() @app.get("/", response_class=HTMLResponse) async def home(request: Request): return templates.TemplateResponse( "index.html", { "request": request, "classes": classifier.label_encoder.classes_.tolist() if classifier else [] } ) @app.post("/predict") async def predict(text: str = Form(...)): if not classifier: raise HTTPException(status_code=503, detail="Модель еще не загружена") if not text or len(text.strip()) < 3: return JSONResponse({ "error": "Текст слишком короткий. Введите хотя бы 3 символа." }, status_code=400) try: result = classifier.predict(text) # Форматируем правила для отображения rules_display = [] for rule in result['rules_applied'][:10]: if ':' in rule: cat, val = rule.split(':', 1) rules_display.append(f"{cat}: {val.strip()}") else: rules_display.append(f"{rule}") # Форматируем вероятности probs_display = [] for emotion, prob in result['class_probabilities'].items(): percentage = prob * 100 probs_display.append(f"""
{emotion}
{percentage:.1f}%
""") return JSONResponse({ "success": True, "text": result['text'][:200] + "..." if len(result['text']) > 200 else result['text'], "emotion": result['predicted_emotion'], "confidence": f"{result['confidence']*100:.1f}%", "used_model": result['used_model'], "rules": "".join(rules_display) if rules_display else "Нет примененных правил", "probabilities": "".join(probs_display) }) except Exception as e: return JSONResponse({ "error": f"Ошибка при анализе: {str(e)}" }, status_code=500) @app.get("/stats") async def get_stats(): if not classifier: raise HTTPException(status_code=503, detail="Модель еще не загружена") stats = classifier.stats return JSONResponse({ "total_predictions": stats['total'], "lstm_used": stats['lstm'], "bert_used": stats['bert'], "lstm_percentage": f"{(stats['lstm']/max(stats['total'],1))*100:.1f}%", "bert_percentage": f"{(stats['bert']/max(stats['total'],1))*100:.1f}%" }) @app.get("/health") async def health_check(): return {"status": "healthy", "model_loaded": classifier is not None} # Для запуска if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) uvicorn.run(app, host="0.0.0.0", port=port)