sentimentanalyzer01 commited on
Commit
9ce56d0
·
verified ·
1 Parent(s): 78ff76b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -215
app.py CHANGED
@@ -19,13 +19,9 @@ from transformers import BertTokenizer, BertModel
19
  import warnings
20
  warnings.filterwarnings('ignore')
21
 
22
- # Определяем устройство
23
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
24
  print(f"Используется устройство: {device}")
25
 
26
- # ============================================================
27
- # ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ
28
- # ============================================================
29
  def clean_russian_text(text):
30
  if not isinstance(text, str):
31
  return ""
@@ -44,11 +40,11 @@ def clean_russian_text(text):
44
  return text
45
 
46
  # ============================================================
47
- # ПОЛНАЯ ОНТОЛОГИЧЕСКАЯ МОДЕЛЬ (как в вашем Colab коде)
48
  # ============================================================
49
 
50
  class OntologyEmotionModel:
51
- def __init__(self, emotions: List[str]):
52
  self.emotions = emotions
53
  self.morph = pymorphy3.MorphAnalyzer()
54
  self.ontology_graph = nx.DiGraph()
@@ -57,50 +53,74 @@ class OntologyEmotionModel:
57
  self.verified_hypotheses = defaultdict(list)
58
  self.sentiment_lexicon = {}
59
  self.rule_stats = {}
60
-
 
 
 
61
  self._load_rusentilex()
62
  self.init_ontology_level1()
63
  self.init_ontology_level2()
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def _load_rusentilex(self):
66
- # Создаем небольшой лексикон вручную (без загрузки из интернета)
67
- self.sentiment_lexicon = {
68
- 'хороший': 'радость', 'отличный': 'радость', 'прекрасный': 'радость',
69
- 'плохой': 'грусть', 'ужасный': 'грусть', 'отвратительный': 'грусть',
70
- 'злой': 'злость', 'бесит': 'злость', 'ненавижу': 'злость',
71
- 'страшно': 'страх', 'боюсь': 'страх', 'опасно': 'страх'
72
- }
73
- print(f"Лексикон загружен, слов: {len(self.sentiment_lexicon)}")
74
 
75
  def init_ontology_level1(self):
76
  self.emotion_definitions = {
77
  'радость': {
78
- 'valence': 'positive',
79
- 'arousal': 'high',
80
  'definition': 'Позитивное эмоциональное состояние',
81
  'opposite': ['грусть', 'злость']
82
  },
83
  'грусть': {
84
- 'valence': 'negative',
85
- 'arousal': 'low',
86
  'definition': 'Негативное эмоциональное состояние',
87
  'opposite': ['радость']
88
  },
89
  'злость': {
90
- 'valence': 'negative',
91
- 'arousal': 'high',
92
  'definition': 'Негативное эм��циональное состояние',
93
  'opposite': ['радость']
94
  },
95
  'страх': {
96
- 'valence': 'negative',
97
- 'arousal': 'high',
98
  'definition': 'Эмоциональная реакция на угрозу',
99
  'opposite': ['уверенность', 'спокойствие']
100
  },
101
  'сарказм': {
102
- 'valence': 'negative',
103
- 'arousal': 'high',
104
  'definition': 'Язвительная насмешка',
105
  'opposite': ['радость']
106
  }
@@ -110,7 +130,6 @@ class OntologyEmotionModel:
110
  self.ontology_graph.add_node(emotion, **self.emotion_definitions[emotion])
111
  else:
112
  self.ontology_graph.add_node(emotion, valence='neutral', arousal='neutral')
113
-
114
  for emotion, data in self.emotion_definitions.items():
115
  if 'opposite' in data:
116
  for opposite in data['opposite']:
@@ -119,61 +138,62 @@ class OntologyEmotionModel:
119
 
120
  def init_ontology_level2(self):
121
  self.linguistic_rules = {
122
- 'усилители': {
123
- 'words': ['очень',ильно', 'крайне', 'чрезвычайно', 'невероятно', 'абсолютно'],
124
- 'effect': 'increase_arousal',
125
- 'weight': 0.3
126
- },
127
- 'ослабители': {
128
- 'words': ['слегка', 'немного', 'чуть-чуть', 'отчасти', 'несколько'],
129
- 'effect': 'decrease_arousal',
130
- 'weight': -0.2
131
- },
132
- 'отрицания': {
133
- 'words': ['не', 'ни', 'нет', 'нельзя', 'невозможно'],
134
- 'effect': 'negation',
135
- 'weight': -0.5
136
- },
137
- 'восклицания': {
138
- 'patterns': [r'!+', r'\?+', r'\.{3,}'],
139
- 'effect': 'increase_arousal',
140
- 'weight': 0.4
141
- },
142
- 'вопросительные': {
143
- 'patterns': [r'\?+'],
144
- 'effect': 'uncertainty',
145
- 'weight': 0.2
146
- },
147
- 'сарказм_маркеры': {
148
- 'words': ['какой', 'такой', 'прям', 'ага', 'ну да'],
149
- 'effect': 'sarcasm',
150
- 'weight': 0.3
151
- }
152
  }
153
 
154
  def add_empirical_knowledge(self, text: str, emotion: str, confidence: float):
155
- self.empirical_base[emotion].append({
156
- 'text': text,
157
- 'confidence': confidence,
158
- 'timestamp': pd.Timestamp.now()
159
- })
160
  if len(self.empirical_base[emotion]) > 1000:
161
  self.empirical_base[emotion] = self.empirical_base[emotion][-1000:]
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  def apply_linguistic_rules(self, text: str) -> Dict:
164
  rules_applied = []
165
  adjustments = {'valence': 0, 'arousal': 0, 'uncertainty': 0, 'sarcasm': 0}
166
  words = text.lower().split()
167
  parsed = [self.morph.parse(w)[0] for w in words]
168
  lemmas = [p.normal_form for p in parsed]
 
169
 
170
  for category, rule in self.linguistic_rules.items():
171
  if 'words' in rule:
172
  for word in rule['words']:
173
  if word in lemmas:
174
  rules_applied.append(f"{category}: {word}")
175
- effect = rule['effect']
176
- weight = rule['weight']
177
  if effect == 'increase_arousal':
178
  adjustments['arousal'] += weight
179
  elif effect == 'decrease_arousal':
@@ -192,7 +212,6 @@ class OntologyEmotionModel:
192
  elif rule['effect'] == 'uncertainty':
193
  adjustments['uncertainty'] += weight
194
 
195
- # Обработка отрицания
196
  if 'не' in lemmas:
197
  idx = lemmas.index('не')
198
  if idx + 1 < len(lemmas) and lemmas[idx+1] == 'очень':
@@ -201,101 +220,105 @@ class OntologyEmotionModel:
201
  rules_applied.append("сочетание: не очень")
202
  else:
203
  for j in range(idx+1, min(idx+4, len(lemmas))):
204
- sentiment = self.sentiment_lexicon.get(lemmas[j], 'neutral')
205
- if sentiment in ('грусть', 'злость', 'страх'):
206
- adjustments['valence'] += 1.0
207
- rules_applied.append(f"инверсия негатива: не {lemmas[j]}")
208
- elif sentiment == 'радость':
209
- adjustments['valence'] -= 1.0
210
- rules_applied.append(f"инверсия позитива: не {lemmas[j]}")
211
- break
 
 
212
 
213
- return {
214
- 'rules_applied': rules_applied,
215
- 'adjustments': adjustments,
216
- 'lemmas': lemmas
217
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  def adjust_prediction_with_rules(self, prediction: Dict, rule_analysis: Dict) -> Dict:
220
  original_emotion = prediction['emotion']
221
  original_confidence = prediction['confidence']
222
  adj = rule_analysis['adjustments']
223
  rules = rule_analysis['rules_applied']
224
-
225
  conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
226
  conf_mult = np.clip(conf_mult, 0.5, 1.5)
227
  new_confidence = original_confidence * conf_mult
228
  new_emotion = original_emotion
229
-
230
  for rule in rules:
231
  if rule.startswith("инверсия негатива:"):
232
  new_emotion = 'радость'
233
  break
234
  elif rule.startswith("инверсия позитива:"):
235
- if adj['arousal'] > 0.3:
236
- new_emotion = 'злость'
237
- else:
238
- new_emotion = 'грусть'
239
  break
240
-
241
- sarcasm_flag = adj['sarcasm'] > 0.5
242
- if sarcasm_flag and original_emotion == 'радость':
243
  new_emotion = 'сарказм'
244
  new_confidence *= 0.8
245
-
246
  if any('восклицание' in r for r in rules):
247
  new_confidence = min(new_confidence * 1.2, 1.0)
248
-
249
- return {
250
- 'emotion': new_emotion,
251
- 'confidence': new_confidence,
252
- 'rules_applied': rules
253
- }
254
-
255
  def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
256
  rule_analysis = self.apply_linguistic_rules(text)
257
  adjusted = self.adjust_prediction_with_rules(model_prediction, rule_analysis)
258
-
 
259
  return {
260
  'rule_analysis': rule_analysis,
261
- 'adjusted_prediction': adjusted
 
 
262
  }
263
-
264
  def get_statistics(self) -> Dict:
265
  return {
266
  'ontology_nodes': len(self.ontology_graph.nodes),
267
  'ontology_edges': len(self.ontology_graph.edges),
268
  'linguistic_rules': len(self.linguistic_rules),
269
  'emotions_covered': len(self.emotions),
270
- 'lexicon_size': len(self.sentiment_lexicon)
271
  }
272
 
273
  # ============================================================
274
  # КЛАССЫ МОДЕЛЕЙ LSTM и BERT
275
  # ============================================================
 
276
  class EmotionLSTM(nn.Module):
277
- def __init__(self, vocab_size, embed_dim=128, hidden_dim=256,
278
- num_classes=3, dropout=0.3, num_layers=2):
279
  super().__init__()
280
  self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
281
- self.lstm = nn.LSTM(
282
- embed_dim,
283
- hidden_dim,
284
- num_layers=num_layers,
285
- batch_first=True,
286
- bidirectional=True,
287
- dropout=dropout if num_layers > 1 else 0
288
- )
289
  self.dropout = nn.Dropout(dropout)
290
  self.classifier = nn.Sequential(
291
- nn.Linear(hidden_dim * 2, 128),
292
- nn.ReLU(),
293
- nn.Dropout(dropout),
294
- nn.Linear(128, 64),
295
- nn.ReLU(),
296
- nn.Linear(64, num_classes)
297
  )
298
-
299
  def forward(self, x, return_confidence=False):
300
  embedded = self.embedding(x)
301
  lstm_out, (hidden, cell) = self.lstm(embedded)
@@ -312,17 +335,12 @@ class EmotionBERT(nn.Module):
312
  def __init__(self, bert_model_name, num_classes, dropout=0.3):
313
  super().__init__()
314
  self.bert = BertModel.from_pretrained(bert_model_name)
315
- for p in list(self.bert.parameters())[:-50]:
316
- p.requires_grad = False
317
  hidden = self.bert.config.hidden_size
318
  self.classifier = nn.Sequential(
319
- nn.Dropout(dropout),
320
- nn.Linear(hidden, 256), nn.ReLU(),
321
- nn.Dropout(dropout),
322
- nn.Linear(256, 128), nn.ReLU(),
323
  nn.Linear(128, num_classes)
324
  )
325
-
326
  def forward(self, input_ids, attention_mask, return_confidence=False):
327
  out = self.bert(input_ids, attention_mask, return_dict=True)
328
  cls = out.last_hidden_state[:, 0, :]
@@ -333,13 +351,8 @@ class EmotionBERT(nn.Module):
333
  return logits, conf
334
  return logits
335
 
336
- # ============================================================
337
- # КАСКАДНЫЙ КЛАССИФИКАТОР
338
- # ============================================================
339
  class CascadeEmotionClassifier:
340
- def __init__(self, lstm_model, bert_model, vocab, tokenizer,
341
- label_encoder, ontology_model, threshold=0.95, device='cpu',
342
- max_length_lstm=100, max_length_bert=128):
343
  self.lstm_model = lstm_model
344
  self.bert_model = bert_model
345
  self.vocab = vocab
@@ -350,83 +363,59 @@ class CascadeEmotionClassifier:
350
  self.device = device
351
  self.max_length_lstm = max_length_lstm
352
  self.max_length_bert = max_length_bert
353
-
354
  self.lstm_model.eval()
355
  self.bert_model.eval()
356
  self.lstm_model.to(device)
357
  self.bert_model.to(device)
358
-
359
- self.stats = {'total': 0, 'lstm': 0, 'bert': 0}
360
-
361
  def text_to_sequence(self, text):
362
  words = str(text).split()[:self.max_length_lstm]
363
  sequence = [self.vocab.get(word, self.vocab.get('<UNK>', 1)) for word in words]
364
  if len(sequence) < self.max_length_lstm:
365
  sequence += [self.vocab.get('<PAD>', 0)] * (self.max_length_lstm - len(sequence))
366
  return sequence[:self.max_length_lstm]
367
-
368
  def predict(self, text):
369
  self.stats['total'] += 1
370
  text_clean = clean_russian_text(text)
371
-
372
  seq = torch.LongTensor([self.text_to_sequence(text_clean)]).to(self.device)
373
  with torch.no_grad():
374
  lstm_logits, lstm_conf = self.lstm_model(seq, return_confidence=True)
375
  lstm_probs = torch.softmax(lstm_logits, dim=1)
376
  lstm_pred = lstm_probs.argmax().item()
377
-
378
  lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
379
- lstm_pred_dict = {
380
- 'emotion': lstm_emo,
381
- 'confidence': lstm_conf.item(),
382
- 'probabilities': lstm_probs[0].cpu().numpy().tolist()
383
- }
384
-
385
  lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
386
- lstm_adjusted = lstm_onto['adjusted_prediction']
387
-
388
- if lstm_adjusted['confidence'] >= self.threshold:
389
  self.stats['lstm'] += 1
390
- final = lstm_adjusted
391
- used_model = "LSTM + онтология"
392
  else:
393
  self.stats['bert'] += 1
394
- enc = self.tokenizer(text_clean, truncation=True, padding=True,
395
- max_length=self.max_length_bert, return_tensors='pt').to(self.device)
396
  with torch.no_grad():
397
  bert_logits, bert_conf = self.bert_model(enc['input_ids'], enc['attention_mask'], return_confidence=True)
398
  bert_probs = torch.softmax(bert_logits, dim=1)
399
  bert_pred = bert_probs.argmax().item()
400
-
401
  bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
402
- bert_pred_dict = {
403
- 'emotion': bert_emo,
404
- 'confidence': bert_conf.item(),
405
- 'probabilities': bert_probs[0].cpu().numpy().tolist()
406
- }
407
-
408
  bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
409
- bert_adjusted = bert_onto['adjusted_prediction']
410
- final = bert_adjusted
411
- used_model = "BERT + онтология"
412
- lstm_onto = bert_onto
413
-
414
  return {
415
  'text': text,
416
  'predicted_emotion': final['emotion'],
417
  'confidence': float(final['confidence']),
418
- 'used_model': used_model,
419
- 'rules_applied': lstm_onto['rule_analysis']['rules_applied'],
420
- 'class_probabilities': {
421
- emo: float(prob)
422
- for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', lstm_pred_dict['probabilities']))
423
- },
424
- 'was_corrected': len(lstm_onto['rule_analysis']['rules_applied']) > 0
425
  }
426
 
427
  # ============================================================
428
- # ЗАГРУЗКА МОДЕЛИ
429
  # ============================================================
 
430
  def load_model():
431
  print("Загрузка модели...")
432
  model_dir = 'model'
@@ -440,10 +429,11 @@ def load_model():
440
  with open(f'{model_dir}/label_encoder.pkl', 'rb') as f:
441
  label_encoder = pickle.load(f)
442
 
443
- # СОЗДАЁМ ОНТОЛОГИЮ ЗАНОВО (сохраняем все функции, но без загрузки из файла)
444
- print("📂 Создание онтологии...")
445
- ontology_model = OntologyEmotionModel(emotions=list(label_encoder.classes_))
446
- print(f"✅ Онтология создана, классов: {len(label_encoder.classes_)}")
 
447
 
448
  # LSTM
449
  lstm_model = EmotionLSTM(
@@ -488,6 +478,7 @@ def load_model():
488
  # ============================================================
489
  # FASTAPI ПРИЛОЖЕНИЕ
490
  # ============================================================
 
491
  app = FastAPI(title="Emotion Analysis with BERT and Ontology")
492
  templates = Jinja2Templates(directory="templates")
493
 
@@ -501,25 +492,16 @@ async def startup_event():
501
 
502
  @app.get("/", response_class=HTMLResponse)
503
  async def home(request: Request):
504
- return templates.TemplateResponse(
505
- "index.html",
506
- {
507
- "request": request,
508
- "classes": classifier.label_encoder.classes_.tolist() if classifier else []
509
- }
510
- )
511
 
512
  @app.post("/predict")
513
  async def predict(text: str = Form(...)):
514
  if not classifier:
515
- raise HTTPException(status_code=503, detail="Модель еще не загружена")
516
-
517
  if not text or len(text.strip()) < 3:
518
  return JSONResponse({"error": "Введите хотя бы 3 символа."}, status_code=400)
519
-
520
  try:
521
  result = classifier.predict(text)
522
-
523
  rules_display = []
524
  for rule in result['rules_applied'][:10]:
525
  if ':' in rule:
@@ -527,48 +509,17 @@ async def predict(text: str = Form(...)):
527
  rules_display.append(f"<span class='rule-tag'>{cat}: {val}</span>")
528
  else:
529
  rules_display.append(f"<span class='rule-tag'>{rule}</span>")
530
-
531
- probs_display = []
532
- for emotion, prob in result['class_probabilities'].items():
533
- percentage = prob * 100
534
- probs_display.append(f"""
535
- <div class="prob-item">
536
- <span class="prob-label">{emotion}</span>
537
- <div class="prob-bar-container">
538
- <div class="prob-bar" style="width: {percentage}%"></div>
539
- </div>
540
- <span class="prob-value">{percentage:.1f}%</span>
541
- </div>
542
- """)
543
-
544
  return JSONResponse({
545
  "success": True,
546
- "text": result['text'][:200] + "..." if len(result['text']) > 200 else result['text'],
547
  "emotion": result['predicted_emotion'],
548
  "confidence": f"{result['confidence']*100:.1f}%",
549
  "used_model": result['used_model'],
550
  "rules": "".join(rules_display) if rules_display else "Нет правил",
551
- "probabilities": "".join(probs_display),
552
- "was_corrected": result['was_corrected']
553
  })
554
  except Exception as e:
555
  return JSONResponse({"error": str(e)}, status_code=500)
556
 
557
- @app.get("/stats")
558
- async def get_stats():
559
- if not classifier:
560
- raise HTTPException(status_code=503, detail="Модель не загружена")
561
-
562
- stats = classifier.stats
563
- onto_stats = classifier.ontology_model.get_statistics()
564
-
565
- return JSONResponse({
566
- "total_predictions": stats['total'],
567
- "lstm_used": stats['lstm'],
568
- "bert_used": stats['bert'],
569
- "ontology_stats": onto_stats
570
- })
571
-
572
  @app.get("/health")
573
  async def health_check():
574
  return {"status": "healthy", "model_loaded": classifier is not None}
 
19
  import warnings
20
  warnings.filterwarnings('ignore')
21
 
 
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
23
  print(f"Используется устройство: {device}")
24
 
 
 
 
25
  def clean_russian_text(text):
26
  if not isinstance(text, str):
27
  return ""
 
40
  return text
41
 
42
  # ============================================================
43
+ # ПОЛНЫЙ КЛАСС ОНТОЛОГИИ (как в Colab)
44
  # ============================================================
45
 
46
  class OntologyEmotionModel:
47
+ def __init__(self, emotions: List[str], train_texts: List[str] = None, train_labels: List[int] = None):
48
  self.emotions = emotions
49
  self.morph = pymorphy3.MorphAnalyzer()
50
  self.ontology_graph = nx.DiGraph()
 
53
  self.verified_hypotheses = defaultdict(list)
54
  self.sentiment_lexicon = {}
55
  self.rule_stats = {}
56
+
57
+ if train_texts is not None and train_labels is not None:
58
+ self._build_sentiment_lexicon(train_texts, train_labels)
59
+
60
  self._load_rusentilex()
61
  self.init_ontology_level1()
62
  self.init_ontology_level2()
63
 
64
+ def _build_sentiment_lexicon(self, texts: List[str], labels: List[int]):
65
+ word_class_counts = defaultdict(lambda: np.zeros(len(self.emotions)))
66
+ for text, label in zip(texts, labels):
67
+ words = set(clean_russian_text(text).split())
68
+ for word in words:
69
+ lemma = self.morph.parse(word)[0].normal_form
70
+ word_class_counts[lemma][label] += 1
71
+ for lemma, counts in word_class_counts.items():
72
+ prob = counts / (counts.sum() + 1e-10)
73
+ if prob.max() > 0.6 and counts.sum() > 5:
74
+ dominant_class = self.emotions[np.argmax(prob)]
75
+ self.sentiment_lexicon[lemma] = dominant_class
76
+
77
+ def _parse_rusentilex(self, content):
78
+ lines = content.splitlines()
79
+ for line in lines[1:]:
80
+ parts = line.strip().split(',')
81
+ if len(parts) >= 3:
82
+ word = parts[0].strip().lower()
83
+ sentiment = parts[2].strip().lower()
84
+ lemma = self.morph.parse(word)[0].normal_form
85
+ if sentiment == 'positive':
86
+ self.sentiment_lexicon[lemma] = 'радость'
87
+ elif sentiment == 'negative':
88
+ self.sentiment_lexicon[lemma] = 'грусть'
89
+
90
  def _load_rusentilex(self):
91
+ url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
92
+ try:
93
+ r = requests.get(url, timeout=10)
94
+ if r.status_code == 200:
95
+ self._parse_rusentilex(r.text)
96
+ print("RuSentiLex загружен")
97
+ except Exception as e:
98
+ print(f"RuSentiLex не загружен: {e}")
99
 
100
  def init_ontology_level1(self):
101
  self.emotion_definitions = {
102
  'радость': {
103
+ 'valence': 'positive', 'arousal': 'high',
 
104
  'definition': 'Позитивное эмоциональное состояние',
105
  'opposite': ['грусть', 'злость']
106
  },
107
  'грусть': {
108
+ 'valence': 'negative', 'arousal': 'low',
 
109
  'definition': 'Негативное эмоциональное состояние',
110
  'opposite': ['радость']
111
  },
112
  'злость': {
113
+ 'valence': 'negative', 'arousal': 'high',
 
114
  'definition': 'Негативное эм��циональное состояние',
115
  'opposite': ['радость']
116
  },
117
  'страх': {
118
+ 'valence': 'negative', 'arousal': 'high',
 
119
  'definition': 'Эмоциональная реакция на угрозу',
120
  'opposite': ['уверенность', 'спокойствие']
121
  },
122
  'сарказм': {
123
+ 'valence': 'negative', 'arousal': 'high',
 
124
  'definition': 'Язвительная насмешка',
125
  'opposite': ['радость']
126
  }
 
130
  self.ontology_graph.add_node(emotion, **self.emotion_definitions[emotion])
131
  else:
132
  self.ontology_graph.add_node(emotion, valence='neutral', arousal='neutral')
 
133
  for emotion, data in self.emotion_definitions.items():
134
  if 'opposite' in data:
135
  for opposite in data['opposite']:
 
138
 
139
  def init_ontology_level2(self):
140
  self.linguistic_rules = {
141
+ 'усилители': {'words': ['очень', 'сильно', 'крайне', 'чрезвычайно', 'невероятно', 'абсолютно'], 'effect': 'increase_arousal', 'weight': 0.3, 'learnable': True},
142
+ 'ослабители': {'words': ['слегка', 'немного', 'чуть-чуть', 'отчасти', 'несколько'], 'effect': 'decrease_arousal', 'weight': -0.2, 'learnable': True},
143
+ 'отрицания': {'words': ['не', 'ни', 'нет', 'нельзя', 'невозможно'], 'effect': 'negation', 'weight': -0.5, 'learnable': True},
144
+ 'восклицания': {'patterns': [r'!+', r'\?+', r'\.{3,}'], 'effect': 'increase_arousal', 'weight': 0.4, 'learnable': True},
145
+ 'вопросительные': {'patterns': [r'\?+'], 'effect': 'uncertainty', 'weight': 0.2, 'learnable': True},
146
+ 'сарказм_маркеры': {'words': ['какой', 'такой', 'прям', 'ага', 'ну да'], 'effect': 'sarcasm', 'weight': 0.3, 'learnable': True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
 
149
  def add_empirical_knowledge(self, text: str, emotion: str, confidence: float):
150
+ self.empirical_base[emotion].append({'text': text, 'confidence': confidence})
 
 
 
 
151
  if len(self.empirical_base[emotion]) > 1000:
152
  self.empirical_base[emotion] = self.empirical_base[emotion][-1000:]
153
 
154
+ def formulate_hypothesis(self, text: str, model_prediction: Dict, rule_based_prediction: Dict) -> Dict:
155
+ hypothesis_id = f"hyp_{len(self.hypotheses_db) + 1:06d}"
156
+ hypothesis = {
157
+ 'id': hypothesis_id, 'text': text,
158
+ 'model_prediction': model_prediction,
159
+ 'rule_based_prediction': rule_based_prediction,
160
+ 'disagreement': self.calculate_disagreement(model_prediction, rule_based_prediction),
161
+ 'status': 'pending'
162
+ }
163
+ self.hypotheses_db[hypothesis_id] = hypothesis
164
+ return hypothesis
165
+
166
+ def verify_hypothesis(self, hypothesis_id: str, actual_emotion: str = None) -> Dict:
167
+ if hypothesis_id not in self.hypotheses_db:
168
+ return None
169
+ hypothesis = self.hypotheses_db[hypothesis_id]
170
+ if actual_emotion:
171
+ model_correct = hypothesis['model_prediction']['emotion'] == actual_emotion
172
+ rule_correct = hypothesis['rule_based_prediction']['emotion'] == actual_emotion
173
+ if model_correct and not rule_correct:
174
+ hypothesis['status'] = 'model_superior'
175
+ elif rule_correct and not model_correct:
176
+ hypothesis['status'] = 'rule_superior'
177
+ elif model_correct and rule_correct:
178
+ hypothesis['status'] = 'both_correct'
179
+ else:
180
+ hypothesis['status'] = 'both_incorrect'
181
+ return hypothesis
182
+
183
  def apply_linguistic_rules(self, text: str) -> Dict:
184
  rules_applied = []
185
  adjustments = {'valence': 0, 'arousal': 0, 'uncertainty': 0, 'sarcasm': 0}
186
  words = text.lower().split()
187
  parsed = [self.morph.parse(w)[0] for w in words]
188
  lemmas = [p.normal_form for p in parsed]
189
+ pos_tags = [p.tag.POS for p in parsed]
190
 
191
  for category, rule in self.linguistic_rules.items():
192
  if 'words' in rule:
193
  for word in rule['words']:
194
  if word in lemmas:
195
  rules_applied.append(f"{category}: {word}")
196
+ effect = rule['effect']; weight = rule['weight']
 
197
  if effect == 'increase_arousal':
198
  adjustments['arousal'] += weight
199
  elif effect == 'decrease_arousal':
 
212
  elif rule['effect'] == 'uncertainty':
213
  adjustments['uncertainty'] += weight
214
 
 
215
  if 'не' in lemmas:
216
  idx = lemmas.index('не')
217
  if idx + 1 < len(lemmas) and lemmas[idx+1] == 'очень':
 
220
  rules_applied.append("сочетание: не очень")
221
  else:
222
  for j in range(idx+1, min(idx+4, len(lemmas))):
223
+ if pos_tags[j] in ('ADJF', 'ADJS', 'ADVB'):
224
+ target_word = lemmas[j]
225
+ sentiment = self.sentiment_lexicon.get(target_word, 'neutral')
226
+ if sentiment in (русть', 'злость', 'страх'):
227
+ adjustments['valence'] += 1.0
228
+ rules_applied.append(f"инверсия негатива: не {target_word}")
229
+ elif sentiment == 'радость':
230
+ adjustments['valence'] -= 1.0
231
+ rules_applied.append(f"инверсия позитива: не {target_word}")
232
+ break
233
 
234
+ pos_words = [w for w in lemmas if self.sentiment_lexicon.get(w) == 'радость']
235
+ neg_words = [w for w in lemmas if self.sentiment_lexicon.get(w) in ('грусть', 'злость', 'страх')]
236
+ if pos_words and neg_words:
237
+ adjustments['sarcasm'] += 0.5
238
+ rules_applied.append(f"контраст тональности: позитив {pos_words[:2]} vs негатив {neg_words[:2]}")
239
+
240
+ return {'rules_applied': rules_applied, 'adjustments': adjustments, 'lemmas': lemmas}
241
+
242
+ def calculate_disagreement(self, pred1: Dict, pred2: Dict) -> float:
243
+ if pred1['emotion'] == pred2['emotion']:
244
+ return 0.0
245
+ emotions = list(self.emotion_definitions.keys())
246
+ idx1 = emotions.index(pred1['emotion']) if pred1['emotion'] in emotions else -1
247
+ idx2 = emotions.index(pred2['emotion']) if pred2['emotion'] in emotions else -1
248
+ if idx1 == -1 or idx2 == -1:
249
+ return 0.5
250
+ distance = abs(idx1 - idx2) / len(emotions)
251
+ return 0.7 * distance
252
+
253
+ def explain_transition(self, from_emotion: str, to_emotion: str) -> List[str]:
254
+ try:
255
+ return nx.shortest_path(self.ontology_graph, source=from_emotion, target=to_emotion)
256
+ except:
257
+ return []
258
 
259
  def adjust_prediction_with_rules(self, prediction: Dict, rule_analysis: Dict) -> Dict:
260
  original_emotion = prediction['emotion']
261
  original_confidence = prediction['confidence']
262
  adj = rule_analysis['adjustments']
263
  rules = rule_analysis['rules_applied']
264
+
265
  conf_mult = 1.0 + adj['arousal'] * 0.2 + adj['uncertainty'] * 0.1 - abs(adj['valence']) * 0.1
266
  conf_mult = np.clip(conf_mult, 0.5, 1.5)
267
  new_confidence = original_confidence * conf_mult
268
  new_emotion = original_emotion
269
+
270
  for rule in rules:
271
  if rule.startswith("инверсия негатива:"):
272
  new_emotion = 'радость'
273
  break
274
  elif rule.startswith("инверсия позитива:"):
275
+ new_emotion = 'грусть' if adj['arousal'] <= 0.3 else 'злость'
 
 
 
276
  break
277
+
278
+ if adj['sarcasm'] > 0.5 and original_emotion == 'радость':
 
279
  new_emotion = 'сарказм'
280
  new_confidence *= 0.8
281
+
282
  if any('восклицание' in r for r in rules):
283
  new_confidence = min(new_confidence * 1.2, 1.0)
284
+
285
+ return {'emotion': new_emotion, 'confidence': new_confidence, 'rules_applied': rules}
286
+
 
 
 
 
287
  def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
288
  rule_analysis = self.apply_linguistic_rules(text)
289
  adjusted = self.adjust_prediction_with_rules(model_prediction, rule_analysis)
290
+ disagreement = self.calculate_disagreement(model_prediction, adjusted)
291
+ hypothesis = self.formulate_hypothesis(text, model_prediction, adjusted) if disagreement > 0.2 else None
292
  return {
293
  'rule_analysis': rule_analysis,
294
+ 'adjusted_prediction': adjusted,
295
+ 'disagreement': disagreement,
296
+ 'hypothesis': hypothesis
297
  }
298
+
299
  def get_statistics(self) -> Dict:
300
  return {
301
  'ontology_nodes': len(self.ontology_graph.nodes),
302
  'ontology_edges': len(self.ontology_graph.edges),
303
  'linguistic_rules': len(self.linguistic_rules),
304
  'emotions_covered': len(self.emotions),
305
+ 'pending_hypotheses': len([h for h in self.hypotheses_db.values() if h['status'] == 'pending'])
306
  }
307
 
308
  # ============================================================
309
  # КЛАССЫ МОДЕЛЕЙ LSTM и BERT
310
  # ============================================================
311
+
312
  class EmotionLSTM(nn.Module):
313
+ def __init__(self, vocab_size, embed_dim=128, hidden_dim=256, num_classes=3, dropout=0.3, num_layers=2):
 
314
  super().__init__()
315
  self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
316
+ self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
 
 
 
 
 
 
 
317
  self.dropout = nn.Dropout(dropout)
318
  self.classifier = nn.Sequential(
319
+ nn.Linear(hidden_dim * 2, 128), nn.ReLU(), nn.Dropout(dropout),
320
+ nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, num_classes)
 
 
 
 
321
  )
 
322
  def forward(self, x, return_confidence=False):
323
  embedded = self.embedding(x)
324
  lstm_out, (hidden, cell) = self.lstm(embedded)
 
335
  def __init__(self, bert_model_name, num_classes, dropout=0.3):
336
  super().__init__()
337
  self.bert = BertModel.from_pretrained(bert_model_name)
 
 
338
  hidden = self.bert.config.hidden_size
339
  self.classifier = nn.Sequential(
340
+ nn.Dropout(dropout), nn.Linear(hidden, 256), nn.ReLU(),
341
+ nn.Dropout(dropout), nn.Linear(256, 128), nn.ReLU(),
 
 
342
  nn.Linear(128, num_classes)
343
  )
 
344
  def forward(self, input_ids, attention_mask, return_confidence=False):
345
  out = self.bert(input_ids, attention_mask, return_dict=True)
346
  cls = out.last_hidden_state[:, 0, :]
 
351
  return logits, conf
352
  return logits
353
 
 
 
 
354
  class CascadeEmotionClassifier:
355
+ def __init__(self, lstm_model, bert_model, vocab, tokenizer, label_encoder, ontology_model, threshold=0.95, device='cpu', max_length_lstm=100, max_length_bert=128):
 
 
356
  self.lstm_model = lstm_model
357
  self.bert_model = bert_model
358
  self.vocab = vocab
 
363
  self.device = device
364
  self.max_length_lstm = max_length_lstm
365
  self.max_length_bert = max_length_bert
 
366
  self.lstm_model.eval()
367
  self.bert_model.eval()
368
  self.lstm_model.to(device)
369
  self.bert_model.to(device)
370
+ self.stats = {'total': 0, 'lstm': 0, 'bert': 0, 'corrections': 0}
371
+
 
372
  def text_to_sequence(self, text):
373
  words = str(text).split()[:self.max_length_lstm]
374
  sequence = [self.vocab.get(word, self.vocab.get('<UNK>', 1)) for word in words]
375
  if len(sequence) < self.max_length_lstm:
376
  sequence += [self.vocab.get('<PAD>', 0)] * (self.max_length_lstm - len(sequence))
377
  return sequence[:self.max_length_lstm]
378
+
379
  def predict(self, text):
380
  self.stats['total'] += 1
381
  text_clean = clean_russian_text(text)
 
382
  seq = torch.LongTensor([self.text_to_sequence(text_clean)]).to(self.device)
383
  with torch.no_grad():
384
  lstm_logits, lstm_conf = self.lstm_model(seq, return_confidence=True)
385
  lstm_probs = torch.softmax(lstm_logits, dim=1)
386
  lstm_pred = lstm_probs.argmax().item()
 
387
  lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
388
+ lstm_pred_dict = {'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist()}
 
 
 
 
 
389
  lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
390
+ if lstm_onto['adjusted_prediction']['confidence'] >= self.threshold:
 
 
391
  self.stats['lstm'] += 1
392
+ final = lstm_onto['adjusted_prediction']
393
+ used = "LSTM + онтология"
394
  else:
395
  self.stats['bert'] += 1
396
+ enc = self.tokenizer(text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt').to(self.device)
 
397
  with torch.no_grad():
398
  bert_logits, bert_conf = self.bert_model(enc['input_ids'], enc['attention_mask'], return_confidence=True)
399
  bert_probs = torch.softmax(bert_logits, dim=1)
400
  bert_pred = bert_probs.argmax().item()
 
401
  bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
402
+ bert_pred_dict = {'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist()}
 
 
 
 
 
403
  bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
404
+ final = bert_onto['adjusted_prediction']
405
+ used = "BERT + онтология"
 
 
 
406
  return {
407
  'text': text,
408
  'predicted_emotion': final['emotion'],
409
  'confidence': float(final['confidence']),
410
+ 'used_model': used,
411
+ 'rules_applied': bert_onto['rule_analysis']['rules_applied'],
412
+ 'was_corrected_by_ontology': len(bert_onto['rule_analysis']['rules_applied']) > 0
 
 
 
 
413
  }
414
 
415
  # ============================================================
416
+ # ЗАГРУЗКА МОДЕЛИ (с загрузкой сохранённой онтологии)
417
  # ============================================================
418
+
419
  def load_model():
420
  print("Загрузка модели...")
421
  model_dir = 'model'
 
429
  with open(f'{model_dir}/label_encoder.pkl', 'rb') as f:
430
  label_encoder = pickle.load(f)
431
 
432
+ # Загружаем сохранённую онтологию
433
+ print("📂 Загрузка сохранённой онтологии...")
434
+ with open(f'{model_dir}/ontology_model.pkl', 'rb') as f:
435
+ ontology_model = pickle.load(f)
436
+ print("✅ Онтология загружена")
437
 
438
  # LSTM
439
  lstm_model = EmotionLSTM(
 
478
  # ============================================================
479
  # FASTAPI ПРИЛОЖЕНИЕ
480
  # ============================================================
481
+
482
  app = FastAPI(title="Emotion Analysis with BERT and Ontology")
483
  templates = Jinja2Templates(directory="templates")
484
 
 
492
 
493
  @app.get("/", response_class=HTMLResponse)
494
  async def home(request: Request):
495
+ return templates.TemplateResponse("index.html", {"request": request})
 
 
 
 
 
 
496
 
497
  @app.post("/predict")
498
  async def predict(text: str = Form(...)):
499
  if not classifier:
500
+ raise HTTPException(status_code=503, detail="Модель не загружена")
 
501
  if not text or len(text.strip()) < 3:
502
  return JSONResponse({"error": "Введите хотя бы 3 символа."}, status_code=400)
 
503
  try:
504
  result = classifier.predict(text)
 
505
  rules_display = []
506
  for rule in result['rules_applied'][:10]:
507
  if ':' in rule:
 
509
  rules_display.append(f"<span class='rule-tag'>{cat}: {val}</span>")
510
  else:
511
  rules_display.append(f"<span class='rule-tag'>{rule}</span>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  return JSONResponse({
513
  "success": True,
 
514
  "emotion": result['predicted_emotion'],
515
  "confidence": f"{result['confidence']*100:.1f}%",
516
  "used_model": result['used_model'],
517
  "rules": "".join(rules_display) if rules_display else "Нет правил",
518
+ "was_corrected": result['was_corrected_by_ontology']
 
519
  })
520
  except Exception as e:
521
  return JSONResponse({"error": str(e)}, status_code=500)
522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  @app.get("/health")
524
  async def health_check():
525
  return {"status": "healthy", "model_loaded": classifier is not None}