Spaces:

sentimentanalyzer01
/

sentiment_analyzer

Runtime error

App Files Files Community

sentimentanalyzer01 commited on Mar 21

Commit

9ce56d0

verified ·

1 Parent(s): 78ff76b

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -215

app.py CHANGED Viewed

@@ -19,13 +19,9 @@ from transformers import BertTokenizer, BertModel
 import warnings
 warnings.filterwarnings('ignore')
-# Определяем устройство
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(f"Используется устройство: {device}")
-# ============================================================
-# ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ
-# ============================================================
 def clean_russian_text(text):
     if not isinstance(text, str):
         return ""
@@ -44,11 +40,11 @@ def clean_russian_text(text):
     return text
 # ============================================================
-# ПОЛНАЯ ОНТОЛОГИЧЕСКАЯ МОДЕЛЬ (как в вашем Colab коде)
 # ============================================================
 class OntologyEmotionModel:
-    def __init__(self, emotions: List[str]):
         self.emotions = emotions
         self.morph = pymorphy3.MorphAnalyzer()
         self.ontology_graph = nx.DiGraph()
@@ -57,50 +53,74 @@ class OntologyEmotionModel:
         self.verified_hypotheses = defaultdict(list)
         self.sentiment_lexicon = {}
         self.rule_stats = {}
         self._load_rusentilex()
         self.init_ontology_level1()
         self.init_ontology_level2()
     def _load_rusentilex(self):
-        # Создаем небольшой лексикон вручную (без загрузки из интернета)
-        self.sentiment_lexicon = {
-            'хороший': 'радость', 'отличный': 'радость', 'прекрасный': 'радость',
-            'плохой': 'грусть', 'ужасный': 'грусть', 'отвратительный': 'грусть',
-            'злой': 'злость', 'бесит': 'злость', 'ненавижу': 'злость',
-            'страшно': 'страх', 'боюсь': 'страх', 'опасно': 'страх'
-        }
-        print(f"Лексикон загружен, слов: {len(self.sentiment_lexicon)}")
     def init_ontology_level1(self):
         self.emotion_definitions = {
             'радость': {
-                'valence': 'positive',
-                'arousal': 'high',
                 'definition': 'Позитивное эмоциональное состояние',
                 'opposite': ['грусть', 'злость']
             },
             'грусть': {
-                'valence': 'negative',
-                'arousal': 'low',
                 'definition': 'Негативное эмоциональное состояние',
                 'opposite': ['радость']
             },
             'злость': {
-                'valence': 'negative',
-                'arousal': 'high',
                 'definition': 'Негативное эм��циональное состояние',
                 'opposite': ['радость']
             },
             'страх': {
-                'valence': 'negative',
-                'arousal': 'high',
                 'definition': 'Эмоциональная реакция на угрозу',
                 'opposite': ['уверенность', 'спокойствие']
             },
             'сарказм': {
-                'valence': 'negative',
-                'arousal': 'high',
                 'definition': 'Язвительная насмешка',
                 'opposite': ['радость']
             }
@@ -110,7 +130,6 @@ class OntologyEmotionModel:
                 self.ontology_graph.add_node(emotion, **self.emotion_definitions[emotion])
             else:
                 self.ontology_graph.add_node(emotion, valence='neutral', arousal='neutral')
         for emotion, data in self.emotion_definitions.items():
             if 'opposite' in data:
                 for opposite in data['opposite']:
@@ -119,61 +138,62 @@ class OntologyEmotionModel:
     def init_ontology_level2(self):
         self.linguistic_rules = {
-            'усилители': {
-                'words': ['очень', 'сильно', 'крайне', 'чрезвычайно', 'невероятно', 'абсолютно'],
-                'effect': 'increase_arousal',
-                'weight': 0.3
-            },
-            'ослабители': {
-                'words': ['слегка', 'немного', 'чуть-чуть', 'отчасти', 'несколько'],
-                'effect': 'decrease_arousal',
-                'weight': -0.2
-            },
-            'отрицания': {
-                'words': ['не', 'ни', 'нет', 'нельзя', 'невозможно'],
-                'effect': 'negation',
-                'weight': -0.5
-            },
-            'восклицания': {
-                'patterns': [r'!+', r'\?+', r'\.{3,}'],
-                'effect': 'increase_arousal',
-                'weight': 0.4
-            },
-            'вопросительные': {
-                'patterns': [r'\?+'],
-                'effect': 'uncertainty',
-                'weight': 0.2
-            },
-            'сарказм_маркеры': {
-                'words': ['какой', 'такой', 'прям', 'ага', 'ну да'],
-                'effect': 'sarcasm',
-                'weight': 0.3
-            }
         }
     def add_empirical_knowledge(self, text: str, emotion: str, confidence: float):
-        self.empirical_base[emotion].append({
-            'text': text,
-            'confidence': confidence,
-            'timestamp': pd.Timestamp.now()
-        })
         if len(self.empirical_base[emotion]) > 1000:
             self.empirical_base[emotion] = self.empirical_base[emotion][-1000:]
     def apply_linguistic_rules(self, text: str) -> Dict:
         rules_applied = []
         adjustments = {'valence': 0, 'arousal': 0, 'uncertainty': 0, 'sarcasm': 0}
         words = text.lower().split()
         parsed = [self.morph.parse(w)[0] for w in words]
         lemmas = [p.normal_form for p in parsed]
         for category, rule in self.linguistic_rules.items():
             if 'words' in rule:
                 for word in rule['words']:
                     if word in lemmas:
                         rules_applied.append(f"{category}: {word}")
-                        effect = rule['effect']
-                        weight = rule['weight']
                         if effect == 'increase_arousal':
                             adjustments['arousal'] += weight
                         elif effect == 'decrease_arousal':
@@ -192,7 +212,6 @@ class OntologyEmotionModel:
                         elif rule['effect'] == 'uncertainty':
                             adjustments['uncertainty'] += weight
-        # Обработка отрицания
         if 'не' in lemmas:
             idx = lemmas.index('не')
             if idx + 1 < len(lemmas) and lemmas[idx+1] == 'очень':
@@ -201,101 +220,105 @@ class OntologyEmotionModel:
                 rules_applied.append("сочетание: не очень")
             else:
                 for j in range(idx+1, min(idx+4, len(lemmas))):
-                    sentiment = self.sentiment_lexicon.get(lemmas[j], 'neutral')
-                    if sentiment in ('грусть', 'злость', 'страх'):
-                        adjustments['valence'] += 1.0
-                        rules_applied.append(f"инверсия негатива: не {lemmas[j]}")
-                    elif sentiment == 'радость':
-                        adjustments['valence'] -= 1.0
-                        rules_applied.append(f"инверсия позитива: не {lemmas[j]}")
-                    break
-        return {
-            'rules_applied': rules_applied,
-            'adjustments': adjustments,
-            'lemmas': lemmas
-        }
     def adjust_prediction_with_rules(self, prediction: Dict, rule_analysis: Dict) -> Dict:
         original_emotion = prediction['emotion']
         original_confidence = prediction['confidence']
         adj = rule_analysis['adjustments']
         rules = rule_analysis['rules_applied']
         conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
         conf_mult = np.clip(conf_mult, 0.5, 1.5)
         new_confidence = original_confidence * conf_mult
         new_emotion = original_emotion
         for rule in rules:
             if rule.startswith("инверсия негатива:"):
                 new_emotion = 'радость'
                 break
             elif rule.startswith("инверсия позитива:"):
-                if adj['arousal'] > 0.3:
-                    new_emotion = 'злость'
-                else:
-                    new_emotion = 'грусть'
                 break
-        sarcasm_flag = adj['sarcasm'] > 0.5
-        if sarcasm_flag and original_emotion == 'радость':
             new_emotion = 'сарказм'
             new_confidence *= 0.8
         if any('восклицание' in r for r in rules):
             new_confidence = min(new_confidence * 1.2, 1.0)
-        return {
-            'emotion': new_emotion,
-            'confidence': new_confidence,
-            'rules_applied': rules
-        }
     def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
         rule_analysis = self.apply_linguistic_rules(text)
         adjusted = self.adjust_prediction_with_rules(model_prediction, rule_analysis)
         return {
             'rule_analysis': rule_analysis,
-            'adjusted_prediction': adjusted
         }
     def get_statistics(self) -> Dict:
         return {
             'ontology_nodes': len(self.ontology_graph.nodes),
             'ontology_edges': len(self.ontology_graph.edges),
             'linguistic_rules': len(self.linguistic_rules),
             'emotions_covered': len(self.emotions),
-            'lexicon_size': len(self.sentiment_lexicon)
         }
 # ============================================================
 # КЛАССЫ МОДЕЛЕЙ LSTM и BERT
 # ============================================================
 class EmotionLSTM(nn.Module):
-    def __init__(self, vocab_size, embed_dim=128, hidden_dim=256,
-                 num_classes=3, dropout=0.3, num_layers=2):
         super().__init__()
         self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
-        self.lstm = nn.LSTM(
-            embed_dim,
-            hidden_dim,
-            num_layers=num_layers,
-            batch_first=True,
-            bidirectional=True,
-            dropout=dropout if num_layers > 1 else 0
-        )
         self.dropout = nn.Dropout(dropout)
         self.classifier = nn.Sequential(
-            nn.Linear(hidden_dim * 2, 128),
-            nn.ReLU(),
-            nn.Dropout(dropout),
-            nn.Linear(128, 64),
-            nn.ReLU(),
-            nn.Linear(64, num_classes)
         )
     def forward(self, x, return_confidence=False):
         embedded = self.embedding(x)
         lstm_out, (hidden, cell) = self.lstm(embedded)
@@ -312,17 +335,12 @@ class EmotionBERT(nn.Module):
     def __init__(self, bert_model_name, num_classes, dropout=0.3):
         super().__init__()
         self.bert = BertModel.from_pretrained(bert_model_name)
-        for p in list(self.bert.parameters())[:-50]:
-            p.requires_grad = False
         hidden = self.bert.config.hidden_size
         self.classifier = nn.Sequential(
-            nn.Dropout(dropout),
-            nn.Linear(hidden, 256), nn.ReLU(),
-            nn.Dropout(dropout),
-            nn.Linear(256, 128), nn.ReLU(),
             nn.Linear(128, num_classes)
         )
     def forward(self, input_ids, attention_mask, return_confidence=False):
         out = self.bert(input_ids, attention_mask, return_dict=True)
         cls = out.last_hidden_state[:, 0, :]
@@ -333,13 +351,8 @@ class EmotionBERT(nn.Module):
             return logits, conf
         return logits
-# ============================================================
-# КАСКАДНЫЙ КЛАССИФИКАТОР
-# ============================================================
 class CascadeEmotionClassifier:
-    def __init__(self, lstm_model, bert_model, vocab, tokenizer,
-                 label_encoder, ontology_model, threshold=0.95, device='cpu',
-                 max_length_lstm=100, max_length_bert=128):
         self.lstm_model = lstm_model
         self.bert_model = bert_model
         self.vocab = vocab
@@ -350,83 +363,59 @@ class CascadeEmotionClassifier:
         self.device = device
         self.max_length_lstm = max_length_lstm
         self.max_length_bert = max_length_bert
         self.lstm_model.eval()
         self.bert_model.eval()
         self.lstm_model.to(device)
         self.bert_model.to(device)
-        self.stats = {'total': 0, 'lstm': 0, 'bert': 0}
     def text_to_sequence(self, text):
         words = str(text).split()[:self.max_length_lstm]
         sequence = [self.vocab.get(word, self.vocab.get('<UNK>', 1)) for word in words]
         if len(sequence) < self.max_length_lstm:
             sequence += [self.vocab.get('<PAD>', 0)] * (self.max_length_lstm - len(sequence))
         return sequence[:self.max_length_lstm]
     def predict(self, text):
         self.stats['total'] += 1
         text_clean = clean_russian_text(text)
         seq = torch.LongTensor([self.text_to_sequence(text_clean)]).to(self.device)
         with torch.no_grad():
             lstm_logits, lstm_conf = self.lstm_model(seq, return_confidence=True)
             lstm_probs = torch.softmax(lstm_logits, dim=1)
             lstm_pred = lstm_probs.argmax().item()
         lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
-        lstm_pred_dict = {
-            'emotion': lstm_emo,
-            'confidence': lstm_conf.item(),
-            'probabilities': lstm_probs[0].cpu().numpy().tolist()
-        }
         lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
-        lstm_adjusted = lstm_onto['adjusted_prediction']
-        if lstm_adjusted['confidence'] >= self.threshold:
             self.stats['lstm'] += 1
-            final = lstm_adjusted
-            used_model = "LSTM + онтология"
         else:
             self.stats['bert'] += 1
-            enc = self.tokenizer(text_clean, truncation=True, padding=True,
-                                 max_length=self.max_length_bert, return_tensors='pt').to(self.device)
             with torch.no_grad():
                 bert_logits, bert_conf = self.bert_model(enc['input_ids'], enc['attention_mask'], return_confidence=True)
                 bert_probs = torch.softmax(bert_logits, dim=1)
                 bert_pred = bert_probs.argmax().item()
             bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
-            bert_pred_dict = {
-                'emotion': bert_emo,
-                'confidence': bert_conf.item(),
-                'probabilities': bert_probs[0].cpu().numpy().tolist()
-            }
             bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
-            bert_adjusted = bert_onto['adjusted_prediction']
-            final = bert_adjusted
-            used_model = "BERT + онтология"
-            lstm_onto = bert_onto
         return {
             'text': text,
             'predicted_emotion': final['emotion'],
             'confidence': float(final['confidence']),
-            'used_model': used_model,
-            'rules_applied': lstm_onto['rule_analysis']['rules_applied'],
-            'class_probabilities': {
-                emo: float(prob)
-                for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', lstm_pred_dict['probabilities']))
-            },
-            'was_corrected': len(lstm_onto['rule_analysis']['rules_applied']) > 0
         }
 # ============================================================
-# ЗАГРУЗКА МОДЕЛИ
 # ============================================================
 def load_model():
     print("Загрузка модели...")
     model_dir = 'model'
@@ -440,10 +429,11 @@ def load_model():
     with open(f'{model_dir}/label_encoder.pkl', 'rb') as f:
         label_encoder = pickle.load(f)
-    # СОЗДАЁМ ОНТОЛОГИЮ ЗАНОВО (сохраняем все функции, но без загрузки из файла)
-    print("📂 Создание онтологии...")
-    ontology_model = OntologyEmotionModel(emotions=list(label_encoder.classes_))
-    print(f"✅ Онтология создана, классов: {len(label_encoder.classes_)}")
     # LSTM
     lstm_model = EmotionLSTM(
@@ -488,6 +478,7 @@ def load_model():
 # ============================================================
 # FASTAPI ПРИЛОЖЕНИЕ
 # ============================================================
 app = FastAPI(title="Emotion Analysis with BERT and Ontology")
 templates = Jinja2Templates(directory="templates")
@@ -501,25 +492,16 @@ async def startup_event():
 @app.get("/", response_class=HTMLResponse)
 async def home(request: Request):
-    return templates.TemplateResponse(
-        "index.html",
-        {
-            "request": request,
-            "classes": classifier.label_encoder.classes_.tolist() if classifier else []
-        }
-    )
 @app.post("/predict")
 async def predict(text: str = Form(...)):
     if not classifier:
-        raise HTTPException(status_code=503, detail="Модель еще не загружена")
     if not text or len(text.strip()) < 3:
         return JSONResponse({"error": "Введите хотя бы 3 символа."}, status_code=400)
     try:
         result = classifier.predict(text)
         rules_display = []
         for rule in result['rules_applied'][:10]:
             if ':' in rule:
@@ -527,48 +509,17 @@ async def predict(text: str = Form(...)):
                 rules_display.append(f"<span class='rule-tag'>{cat}: {val}</span>")
             else:
                 rules_display.append(f"<span class='rule-tag'>{rule}</span>")
-        probs_display = []
-        for emotion, prob in result['class_probabilities'].items():
-            percentage = prob * 100
-            probs_display.append(f"""
-                <div class="prob-item">
-                    <span class="prob-label">{emotion}</span>
-                    <div class="prob-bar-container">
-                        <div class="prob-bar" style="width: {percentage}%"></div>
-                    </div>
-                    <span class="prob-value">{percentage:.1f}%</span>
-                </div>
-            """)
         return JSONResponse({
             "success": True,
-            "text": result['text'][:200] + "..." if len(result['text']) > 200 else result['text'],
             "emotion": result['predicted_emotion'],
             "confidence": f"{result['confidence']*100:.1f}%",
             "used_model": result['used_model'],
             "rules": "".join(rules_display) if rules_display else "Нет правил",
-            "probabilities": "".join(probs_display),
-            "was_corrected": result['was_corrected']
         })
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
-@app.get("/stats")
-async def get_stats():
-    if not classifier:
-        raise HTTPException(status_code=503, detail="Модель не загружена")
-    stats = classifier.stats
-    onto_stats = classifier.ontology_model.get_statistics()
-    return JSONResponse({
-        "total_predictions": stats['total'],
-        "lstm_used": stats['lstm'],
-        "bert_used": stats['bert'],
-        "ontology_stats": onto_stats
-    })
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": classifier is not None}

 import warnings
 warnings.filterwarnings('ignore')
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(f"Используется устройство: {device}")
 def clean_russian_text(text):
     if not isinstance(text, str):
         return ""
     return text
 # ============================================================
+# ПОЛНЫЙ КЛАСС ОНТОЛОГИИ (как в Colab)
 # ============================================================
 class OntologyEmotionModel:
+    def __init__(self, emotions: List[str], train_texts: List[str] = None, train_labels: List[int] = None):
         self.emotions = emotions
         self.morph = pymorphy3.MorphAnalyzer()
         self.ontology_graph = nx.DiGraph()
         self.verified_hypotheses = defaultdict(list)
         self.sentiment_lexicon = {}
         self.rule_stats = {}
+        if train_texts is not None and train_labels is not None:
+            self._build_sentiment_lexicon(train_texts, train_labels)
         self._load_rusentilex()
         self.init_ontology_level1()
         self.init_ontology_level2()
+    def _build_sentiment_lexicon(self, texts: List[str], labels: List[int]):
+        word_class_counts = defaultdict(lambda: np.zeros(len(self.emotions)))
+        for text, label in zip(texts, labels):
+            words = set(clean_russian_text(text).split())
+            for word in words:
+                lemma = self.morph.parse(word)[0].normal_form
+                word_class_counts[lemma][label] += 1
+        for lemma, counts in word_class_counts.items():
+            prob = counts / (counts.sum() + 1e-10)
+            if prob.max() > 0.6 and counts.sum() > 5:
+                dominant_class = self.emotions[np.argmax(prob)]
+                self.sentiment_lexicon[lemma] = dominant_class
+    def _parse_rusentilex(self, content):
+        lines = content.splitlines()
+        for line in lines[1:]:
+            parts = line.strip().split(',')
+            if len(parts) >= 3:
+                word = parts[0].strip().lower()
+                sentiment = parts[2].strip().lower()
+                lemma = self.morph.parse(word)[0].normal_form
+                if sentiment == 'positive':
+                    self.sentiment_lexicon[lemma] = 'радость'
+                elif sentiment == 'negative':
+                    self.sentiment_lexicon[lemma] = 'грусть'
     def _load_rusentilex(self):
+        url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
+        try:
+            r = requests.get(url, timeout=10)
+            if r.status_code == 200:
+                self._parse_rusentilex(r.text)
+                print("RuSentiLex загружен")
+        except Exception as e:
+            print(f"RuSentiLex не загружен: {e}")
     def init_ontology_level1(self):
         self.emotion_definitions = {
             'радость': {
+                'valence': 'positive', 'arousal': 'high',
                 'definition': 'Позитивное эмоциональное состояние',
                 'opposite': ['грусть', 'злость']
             },
             'грусть': {
+                'valence': 'negative', 'arousal': 'low',
                 'definition': 'Негативное эмоциональное состояние',
                 'opposite': ['радость']
             },
             'злость': {
+                'valence': 'negative', 'arousal': 'high',
                 'definition': 'Негативное эм��циональное состояние',
                 'opposite': ['радость']
             },
             'страх': {
+                'valence': 'negative', 'arousal': 'high',
                 'definition': 'Эмоциональная реакция на угрозу',
                 'opposite': ['уверенность', 'спокойствие']
             },
             'сарказм': {
+                'valence': 'negative', 'arousal': 'high',
                 'definition': 'Язвительная насмешка',
                 'opposite': ['радость']
             }
                 self.ontology_graph.add_node(emotion, **self.emotion_definitions[emotion])
             else:
                 self.ontology_graph.add_node(emotion, valence='neutral', arousal='neutral')
         for emotion, data in self.emotion_definitions.items():
             if 'opposite' in data:
                 for opposite in data['opposite']:
     def init_ontology_level2(self):
         self.linguistic_rules = {
+            'усилители': {'words': ['очень', 'сильно', 'крайне', 'чрезвычайно', 'невероятно', 'абсолютно'], 'effect': 'increase_arousal', 'weight': 0.3, 'learnable': True},
+            'ослабители': {'words': ['слегка', 'немного', 'чуть-чуть', 'отчасти', 'несколько'], 'effect': 'decrease_arousal', 'weight': -0.2, 'learnable': True},
+            'отрицания': {'words': ['не', 'ни', 'нет', 'нельзя', 'невозможно'], 'effect': 'negation', 'weight': -0.5, 'learnable': True},
+            'восклицания': {'patterns': [r'!+', r'\?+', r'\.{3,}'], 'effect': 'increase_arousal', 'weight': 0.4, 'learnable': True},
+            'вопросительные': {'patterns': [r'\?+'], 'effect': 'uncertainty', 'weight': 0.2, 'learnable': True},
+            'сарказм_маркеры': {'words': ['какой', 'такой', 'прям', 'ага', 'ну да'], 'effect': 'sarcasm', 'weight': 0.3, 'learnable': True}
         }
     def add_empirical_knowledge(self, text: str, emotion: str, confidence: float):
+        self.empirical_base[emotion].append({'text': text, 'confidence': confidence})
         if len(self.empirical_base[emotion]) > 1000:
             self.empirical_base[emotion] = self.empirical_base[emotion][-1000:]
+    def formulate_hypothesis(self, text: str, model_prediction: Dict, rule_based_prediction: Dict) -> Dict:
+        hypothesis_id = f"hyp_{len(self.hypotheses_db) + 1:06d}"
+        hypothesis = {
+            'id': hypothesis_id, 'text': text,
+            'model_prediction': model_prediction,
+            'rule_based_prediction': rule_based_prediction,
+            'disagreement': self.calculate_disagreement(model_prediction, rule_based_prediction),
+            'status': 'pending'
+        }
+        self.hypotheses_db[hypothesis_id] = hypothesis
+        return hypothesis
+    def verify_hypothesis(self, hypothesis_id: str, actual_emotion: str = None) -> Dict:
+        if hypothesis_id not in self.hypotheses_db:
+            return None
+        hypothesis = self.hypotheses_db[hypothesis_id]
+        if actual_emotion:
+            model_correct = hypothesis['model_prediction']['emotion'] == actual_emotion
+            rule_correct = hypothesis['rule_based_prediction']['emotion'] == actual_emotion
+            if model_correct and not rule_correct:
+                hypothesis['status'] = 'model_superior'
+            elif rule_correct and not model_correct:
+                hypothesis['status'] = 'rule_superior'
+            elif model_correct and rule_correct:
+                hypothesis['status'] = 'both_correct'
+            else:
+                hypothesis['status'] = 'both_incorrect'
+        return hypothesis
     def apply_linguistic_rules(self, text: str) -> Dict:
         rules_applied = []
         adjustments = {'valence': 0, 'arousal': 0, 'uncertainty': 0, 'sarcasm': 0}
         words = text.lower().split()
         parsed = [self.morph.parse(w)[0] for w in words]
         lemmas = [p.normal_form for p in parsed]
+        pos_tags = [p.tag.POS for p in parsed]
         for category, rule in self.linguistic_rules.items():
             if 'words' in rule:
                 for word in rule['words']:
                     if word in lemmas:
                         rules_applied.append(f"{category}: {word}")
+                        effect = rule['effect']; weight = rule['weight']
                         if effect == 'increase_arousal':
                             adjustments['arousal'] += weight
                         elif effect == 'decrease_arousal':
                         elif rule['effect'] == 'uncertainty':
                             adjustments['uncertainty'] += weight
         if 'не' in lemmas:
             idx = lemmas.index('не')
             if idx + 1 < len(lemmas) and lemmas[idx+1] == 'очень':
                 rules_applied.append("сочетание: не очень")
             else:
                 for j in range(idx+1, min(idx+4, len(lemmas))):
+                    if pos_tags[j] in ('ADJF', 'ADJS', 'ADVB'):
+                        target_word = lemmas[j]
+                        sentiment = self.sentiment_lexicon.get(target_word, 'neutral')
+                        if sentiment in ('грусть', 'злость', 'страх'):
+                            adjustments['valence'] += 1.0
+                            rules_applied.append(f"инверсия негатива: не {target_word}")
+                        elif sentiment == 'радость':
+                            adjustments['valence'] -= 1.0
+                            rules_applied.append(f"инверсия позитива: не {target_word}")
+                        break
+        pos_words = [w for w in lemmas if self.sentiment_lexicon.get(w) == 'радость']
+        neg_words = [w for w in lemmas if self.sentiment_lexicon.get(w) in ('грусть', 'злость', 'страх')]
+        if pos_words and neg_words:
+            adjustments['sarcasm'] += 0.5
+            rules_applied.append(f"контраст тональности: позитив {pos_words[:2]} vs негатив {neg_words[:2]}")
+        return {'rules_applied': rules_applied, 'adjustments': adjustments, 'lemmas': lemmas}
+    def calculate_disagreement(self, pred1: Dict, pred2: Dict) -> float:
+        if pred1['emotion'] == pred2['emotion']:
+            return 0.0
+        emotions = list(self.emotion_definitions.keys())
+        idx1 = emotions.index(pred1['emotion']) if pred1['emotion'] in emotions else -1
+        idx2 = emotions.index(pred2['emotion']) if pred2['emotion'] in emotions else -1
+        if idx1 == -1 or idx2 == -1:
+            return 0.5
+        distance = abs(idx1 - idx2) / len(emotions)
+        return 0.7 * distance
+    def explain_transition(self, from_emotion: str, to_emotion: str) -> List[str]:
+        try:
+            return nx.shortest_path(self.ontology_graph, source=from_emotion, target=to_emotion)
+        except:
+            return []
     def adjust_prediction_with_rules(self, prediction: Dict, rule_analysis: Dict) -> Dict:
         original_emotion = prediction['emotion']
         original_confidence = prediction['confidence']
         adj = rule_analysis['adjustments']
         rules = rule_analysis['rules_applied']
         conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
         conf_mult = np.clip(conf_mult, 0.5, 1.5)
         new_confidence = original_confidence * conf_mult
         new_emotion = original_emotion
         for rule in rules:
             if rule.startswith("инверсия негатива:"):
                 new_emotion = 'радость'
                 break
             elif rule.startswith("инверсия позитива:"):
+                new_emotion = 'грусть' if adj['arousal'] <= 0.3 else 'злость'
                 break
+        if adj['sarcasm'] > 0.5 and original_emotion == 'радость':
             new_emotion = 'сарказм'
             new_confidence *= 0.8
         if any('восклицание' in r for r in rules):
             new_confidence = min(new_confidence * 1.2, 1.0)
+        return {'emotion': new_emotion, 'confidence': new_confidence, 'rules_applied': rules}
     def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
         rule_analysis = self.apply_linguistic_rules(text)
         adjusted = self.adjust_prediction_with_rules(model_prediction, rule_analysis)
+        disagreement = self.calculate_disagreement(model_prediction, adjusted)
+        hypothesis = self.formulate_hypothesis(text, model_prediction, adjusted) if disagreement > 0.2 else None
         return {
             'rule_analysis': rule_analysis,
+            'adjusted_prediction': adjusted,
+            'disagreement': disagreement,
+            'hypothesis': hypothesis
         }
     def get_statistics(self) -> Dict:
         return {
             'ontology_nodes': len(self.ontology_graph.nodes),
             'ontology_edges': len(self.ontology_graph.edges),
             'linguistic_rules': len(self.linguistic_rules),
             'emotions_covered': len(self.emotions),
+            'pending_hypotheses': len([h for h in self.hypotheses_db.values() if h['status'] == 'pending'])
         }
 # ============================================================
 # КЛАССЫ МОДЕЛЕЙ LSTM и BERT
 # ============================================================
 class EmotionLSTM(nn.Module):
+    def __init__(self, vocab_size, embed_dim=128, hidden_dim=256, num_classes=3, dropout=0.3, num_layers=2):
         super().__init__()
         self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
+        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
         self.dropout = nn.Dropout(dropout)
         self.classifier = nn.Sequential(
+            nn.Linear(hidden_dim * 2, 128), nn.ReLU(), nn.Dropout(dropout),
+            nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, num_classes)
         )
     def forward(self, x, return_confidence=False):
         embedded = self.embedding(x)
         lstm_out, (hidden, cell) = self.lstm(embedded)
     def __init__(self, bert_model_name, num_classes, dropout=0.3):
         super().__init__()
         self.bert = BertModel.from_pretrained(bert_model_name)
         hidden = self.bert.config.hidden_size
         self.classifier = nn.Sequential(
+            nn.Dropout(dropout), nn.Linear(hidden, 256), nn.ReLU(),
+            nn.Dropout(dropout), nn.Linear(256, 128), nn.ReLU(),
             nn.Linear(128, num_classes)
         )
     def forward(self, input_ids, attention_mask, return_confidence=False):
         out = self.bert(input_ids, attention_mask, return_dict=True)
         cls = out.last_hidden_state[:, 0, :]
             return logits, conf
         return logits
 class CascadeEmotionClassifier:
+    def __init__(self, lstm_model, bert_model, vocab, tokenizer, label_encoder, ontology_model, threshold=0.95, device='cpu', max_length_lstm=100, max_length_bert=128):
         self.lstm_model = lstm_model
         self.bert_model = bert_model
         self.vocab = vocab
         self.device = device
         self.max_length_lstm = max_length_lstm
         self.max_length_bert = max_length_bert
         self.lstm_model.eval()
         self.bert_model.eval()
         self.lstm_model.to(device)
         self.bert_model.to(device)
+        self.stats = {'total': 0, 'lstm': 0, 'bert': 0, 'corrections': 0}
     def text_to_sequence(self, text):
         words = str(text).split()[:self.max_length_lstm]
         sequence = [self.vocab.get(word, self.vocab.get('<UNK>', 1)) for word in words]
         if len(sequence) < self.max_length_lstm:
             sequence += [self.vocab.get('<PAD>', 0)] * (self.max_length_lstm - len(sequence))
         return sequence[:self.max_length_lstm]
     def predict(self, text):
         self.stats['total'] += 1
         text_clean = clean_russian_text(text)
         seq = torch.LongTensor([self.text_to_sequence(text_clean)]).to(self.device)
         with torch.no_grad():
             lstm_logits, lstm_conf = self.lstm_model(seq, return_confidence=True)
             lstm_probs = torch.softmax(lstm_logits, dim=1)
             lstm_pred = lstm_probs.argmax().item()
         lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
+        lstm_pred_dict = {'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist()}
         lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
+        if lstm_onto['adjusted_prediction']['confidence'] >= self.threshold:
             self.stats['lstm'] += 1
+            final = lstm_onto['adjusted_prediction']
+            used = "LSTM + онтология"
         else:
             self.stats['bert'] += 1
+            enc = self.tokenizer(text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt').to(self.device)
             with torch.no_grad():
                 bert_logits, bert_conf = self.bert_model(enc['input_ids'], enc['attention_mask'], return_confidence=True)
                 bert_probs = torch.softmax(bert_logits, dim=1)
                 bert_pred = bert_probs.argmax().item()
             bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
+            bert_pred_dict = {'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist()}
             bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
+            final = bert_onto['adjusted_prediction']
+            used = "BERT + онтология"
         return {
             'text': text,
             'predicted_emotion': final['emotion'],
             'confidence': float(final['confidence']),
+            'used_model': used,
+            'rules_applied': bert_onto['rule_analysis']['rules_applied'],
+            'was_corrected_by_ontology': len(bert_onto['rule_analysis']['rules_applied']) > 0
         }
 # ============================================================
+# ЗАГРУЗКА МОДЕЛИ (с загрузкой сохранённой онтологии)
 # ============================================================
 def load_model():
     print("Загрузка модели...")
     model_dir = 'model'
     with open(f'{model_dir}/label_encoder.pkl', 'rb') as f:
         label_encoder = pickle.load(f)
+    # Загружаем сохранённую онтологию
+    print("📂 Загрузка сохранённой онтологии...")
+    with open(f'{model_dir}/ontology_model.pkl', 'rb') as f:
+        ontology_model = pickle.load(f)
+    print("✅ Онтология загружена")
     # LSTM
     lstm_model = EmotionLSTM(
 # ============================================================
 # FASTAPI ПРИЛОЖЕНИЕ
 # ============================================================
 app = FastAPI(title="Emotion Analysis with BERT and Ontology")
 templates = Jinja2Templates(directory="templates")
 @app.get("/", response_class=HTMLResponse)
 async def home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
 @app.post("/predict")
 async def predict(text: str = Form(...)):
     if not classifier:
+        raise HTTPException(status_code=503, detail="Модель не загружена")
     if not text or len(text.strip()) < 3:
         return JSONResponse({"error": "Введите хотя бы 3 символа."}, status_code=400)
     try:
         result = classifier.predict(text)
         rules_display = []
         for rule in result['rules_applied'][:10]:
             if ':' in rule:
                 rules_display.append(f"<span class='rule-tag'>{cat}: {val}</span>")
             else:
                 rules_display.append(f"<span class='rule-tag'>{rule}</span>")
         return JSONResponse({
             "success": True,
             "emotion": result['predicted_emotion'],
             "confidence": f"{result['confidence']*100:.1f}%",
             "used_model": result['used_model'],
             "rules": "".join(rules_display) if rules_display else "Нет правил",
+            "was_corrected": result['was_corrected_by_ontology']
         })
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": classifier is not None}