Spaces:

sentimentanalyzer01
/

sentiment_analyzer

Runtime error

App Files Files Community

sentimentanalyzer01 commited on Mar 25

Commit

dd2fc2e

verified ·

1 Parent(s): 62aa224

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -99

app.py CHANGED Viewed

@@ -6,14 +6,12 @@ import torch
 import torch.nn as nn
 import numpy as np
 import re
-import pandas as pd
-import io
 from typing import Dict, List, Any, Optional
 from collections import defaultdict, Counter
 import networkx as nx
 import pymorphy3
 import requests
-from fastapi import FastAPI, Request, Form, HTTPException, UploadFile, File
 from fastapi.responses import HTMLResponse, JSONResponse
 from fastapi.templating import Jinja2Templates
 import uvicorn
@@ -93,20 +91,27 @@ class OntologyEmotionModel:
                     elif sentiment == 'negative':
                         self.sentiment_lexicon[lemma] = 'грусть'
                         added += 1
-            except Exception:
                 continue
         print(f"  Добавлено слов из RuSentiLex: {added}")
     def _load_rusentilex(self):
         """Загружает RuSentiLex из локального файла в папке model"""
         possible_paths = [
             'model/rusentilex.csv',
             'rusentilex.csv',
             '/app/model/rusentilex.csv',
             os.path.join(os.path.dirname(__file__), 'model', 'rusentilex.csv')
         ]
         loaded = False
         print("📂 Поиск RuSentiLex...")
         for path in possible_paths:
             if os.path.exists(path):
                 try:
@@ -118,6 +123,8 @@ class OntologyEmotionModel:
                     break
                 except Exception as e:
                     print(f"⚠️ Ошибка при загрузке {path}: {e}")
         if not loaded:
             print("⚠️ Локальный файл RuSentiLex не найден, пробуем скачать...")
             url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
@@ -129,8 +136,11 @@ class OntologyEmotionModel:
                     loaded = True
             except Exception as e:
                 print(f"⚠️ Не удалось загрузить RuSentiLex из репозитория: {e}")
         if not loaded:
             print("⚠️ RuSentiLex не загружен. Используется только статистический лексикон.")
         print(f"📊 Всего слов в лексиконе: {len(self.sentiment_lexicon)}")
     def init_ontology_level1(self):
@@ -349,6 +359,7 @@ class OntologyEmotionModel:
         adj = rule_analysis['adjustments']
         rules = rule_analysis['rules_applied']
         original_confidence_value = original_confidence
         was_corrected = len(rules) > 0
@@ -357,6 +368,7 @@ class OntologyEmotionModel:
         new_confidence = original_confidence * conf_mult
         new_emotion = original_emotion
         has_negative = any('негативное слово' in r for r in rules)
         has_positive = any('позитивное слово' in r for r in rules)
@@ -372,6 +384,7 @@ class OntologyEmotionModel:
             new_emotion = 'радость'
             rules.append("коррекция: позитивные слова")
         for rule in rules:
             if rule.startswith("инверсия негатива:"):
                 new_emotion = 'радость'
@@ -383,6 +396,7 @@ class OntologyEmotionModel:
                     new_emotion = 'грусть'
                 break
         sarcasm_flag = adj['sarcasm'] > 0.5
         if sarcasm_flag:
             new_emotion = 'сарказм'
@@ -390,12 +404,16 @@ class OntologyEmotionModel:
             if "саркастическая фраза" in str(rules):
                 new_confidence = min(new_confidence * 1.1, 0.95)
         if any('восклицание' in r for r in rules):
             new_confidence = min(new_confidence * 1.2, 1.0)
         if not was_corrected and original_confidence_value < 0.9:
             new_confidence = min(new_confidence * 1.10, 1.0)
         new_confidence = min(new_confidence, 1.0)
         return {
@@ -511,6 +529,7 @@ class CascadeEmotionClassifier:
         lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
         lstm_pred_dict = {'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist()}
         lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
         if lstm_onto['adjusted_prediction']['confidence'] >= self.threshold:
@@ -518,7 +537,6 @@ class CascadeEmotionClassifier:
             final = lstm_onto['adjusted_prediction']
             used = "LSTM + онтология"
             rules_applied = lstm_onto['rule_analysis']['rules_applied']
-            class_probs = {emo: float(prob) for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', lstm_pred_dict['probabilities']))}
         else:
             self.stats['bert'] += 1
             enc = self.tokenizer(text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt').to(self.device)
@@ -529,11 +547,11 @@ class CascadeEmotionClassifier:
             bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
             bert_pred_dict = {'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist()}
             bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
             final = bert_onto['adjusted_prediction']
             used = "BERT + онтология"
             rules_applied = bert_onto['rule_analysis']['rules_applied']
-            class_probs = {emo: float(prob) for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', bert_pred_dict['probabilities']))}
         return {
             'text': text,
@@ -541,8 +559,7 @@ class CascadeEmotionClassifier:
             'confidence': float(final['confidence']),
             'used_model': used,
             'rules_applied': rules_applied,
-            'was_corrected_by_ontology': len(rules_applied) > 0,
-            'class_probabilities': class_probs
         }
 # ============================================================
@@ -553,17 +570,21 @@ def load_model():
     print("Загрузка модели...")
     model_dir = 'model'
     with open(f'{model_dir}/model_info.json', 'r', encoding='utf-8') as f:
         model_info = json.load(f)
     with open(f'{model_dir}/vocab.json', 'r', encoding='utf-8') as f:
         vocab = json.load(f)
     print("📂 Создание label_encoder...")
     label_encoder = LabelEncoder()
     label_encoder.classes_ = np.array(model_info['classes'])
     print(f"✅ label_encoder создан, классы: {list(label_encoder.classes_)}")
     print("📂 Создание онтологии...")
     ontology_model = OntologyEmotionModel(
         emotions=list(label_encoder.classes_),
@@ -572,6 +593,7 @@ def load_model():
     )
     print("✅ Онтология создана")
     print("📂 Загрузка LSTM...")
     lstm_model = EmotionLSTM(
         vocab_size=len(vocab),
@@ -585,6 +607,7 @@ def load_model():
     lstm_model.load_state_dict(checkpoint['model_state_dict'])
     print("✅ LSTM загружена")
     print("📂 Загрузка BERT...")
     bert_model = EmotionBERT(
         bert_model_name=model_info['bert_model_name'],
@@ -594,6 +617,7 @@ def load_model():
     bert_model.load_state_dict(torch.load(f'{model_dir}/bert_model.pth', map_location=device, weights_only=False))
     print("✅ BERT загружена")
     print("📂 Загрузка токенизатора...")
     try:
         tokenizer = BertTokenizer.from_pretrained(model_dir)
@@ -604,6 +628,7 @@ def load_model():
         tokenizer = BertTokenizer.from_pretrained('DeepPavlov/rubert-base-cased')
         print("✅ Токенизатор загружен из Hugging Face")
     print("📂 Создание каскадного классификатора...")
     cascade = CascadeEmotionClassifier(
         lstm_model=lstm_model,
@@ -649,6 +674,7 @@ async def predict(text: str = Form(...)):
     try:
         result = classifier.predict(text)
         rules_display = []
         for rule in result['rules_applied'][:10]:
             if ':' in rule:
@@ -657,107 +683,17 @@ async def predict(text: str = Form(...)):
             else:
                 rules_display.append(f"<span class='rule-tag'>{rule}</span>")
-        # Формируем вероятности для отображения
-        probs_display = []
-        for emo, prob in result['class_probabilities'].items():
-            percentage = prob * 100
-            probs_display.append(f"""
-                <div class="prob-item">
-                    <span class="prob-label">{emo}</span>
-                    <div class="prob-bar-container">
-                        <div class="prob-bar" style="width: {percentage}%"></div>
-                    </div>
-                    <span class="prob-value">{percentage:.1f}%</span>
-                </div>
-            """)
         return JSONResponse({
             "success": True,
             "emotion": result['predicted_emotion'],
             "confidence": f"{result['confidence']*100:.1f}%",
             "used_model": result['used_model'],
             "rules": "".join(rules_display) if rules_display else "Нет правил",
-            "was_corrected": str(result['was_corrected_by_ontology']),
-            "probabilities": "".join(probs_display)
         })
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/upload")
-async def upload_csv(
-    file: UploadFile = File(...),
-    text_column: str = Form("text")
-):
-    if not classifier:
-        raise HTTPException(status_code=503, detail="Модель не загружена")
-    # 1. Проверка расширения
-    if not file.filename.endswith('.csv'):
-        raise HTTPException(400, "Поддерживаются только CSV файлы")
-    # 2. Ограничение размера (100 МБ)
-    contents = await file.read()
-    if len(contents) > 100 * 1024 * 1024:
-        raise HTTPException(400, "Файл слишком большой (максимум 100 МБ)")
-    # 3. Чтение CSV (пробуем utf-8, потом cp1251)
-    try:
-        df = pd.read_csv(io.BytesIO(contents), encoding='utf-8')
-    except UnicodeDecodeError:
-        try:
-            df = pd.read_csv(io.BytesIO(contents), encoding='cp1251')
-        except Exception as e:
-            raise HTTPException(400, f"Ошибка чтения CSV: {str(e)}")
-    # 4. Проверка наличия столбца
-    if text_column not in df.columns:
-        raise HTTPException(400, f"Столбец '{text_column}' не найден в файле")
-    # 5. Ограничение количества строк (максимум 1000, чтобы не превысить лимиты времени)
-    MAX_ROWS = 1000
-    if len(df) > MAX_ROWS:
-        raise HTTPException(400, f"Файл содержит более {MAX_ROWS} строк, что превышает лимит")
-    # 6. Предобработка: берём только непустые тексты
-    texts = df[text_column].fillna('').astype(str).tolist()
-    texts = [t for t in texts if t.strip()]
-    if not texts:
-        raise HTTPException(400, "Нет валидных текстов для анализа")
-    # 7. Обработка каждой строки
-    import time
-    start = time.time()
-    results = []
-    for text in texts:
-        pred = classifier.predict(text)
-        results.append({
-            "text": text[:200],  # обрезаем для экономии места
-            "emotion": pred['predicted_emotion'],
-            "confidence": pred['confidence'],
-            "probabilities": pred['class_probabilities']
-        })
-    elapsed = time.time() - start
-    # 8. Агрегированная статистика
-    emotion_counts = {}
-    for r in results:
-        emo = r['emotion']
-        emotion_counts[emo] = emotion_counts.get(emo, 0) + 1
-    avg_confidence = sum(r['confidence'] for r in results) / len(results)
-    # 9. Ответ
-    return JSONResponse({
-        "total_processed": len(results),
-        "processing_time": round(elapsed, 2),
-        "average_confidence": round(avg_confidence, 2),
-        "emotion_counts": emotion_counts,
-        "details": results
-    })
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": classifier is not None}

 import torch.nn as nn
 import numpy as np
 import re
 from typing import Dict, List, Any, Optional
 from collections import defaultdict, Counter
 import networkx as nx
 import pymorphy3
 import requests
+from fastapi import FastAPI, Request, Form, HTTPException
 from fastapi.responses import HTMLResponse, JSONResponse
 from fastapi.templating import Jinja2Templates
 import uvicorn
                     elif sentiment == 'negative':
                         self.sentiment_lexicon[lemma] = 'грусть'
                         added += 1
+            except Exception as e:
                 continue
         print(f"  Добавлено слов из RuSentiLex: {added}")
     def _load_rusentilex(self):
         """Загружает RuSentiLex из локального файла в папке model"""
+        import os
+        # Пути для поиска файла RuSentiLex
         possible_paths = [
             'model/rusentilex.csv',
             'rusentilex.csv',
             '/app/model/rusentilex.csv',
             os.path.join(os.path.dirname(__file__), 'model', 'rusentilex.csv')
         ]
         loaded = False
         print("📂 Поиск RuSentiLex...")
+        # Пробуем загрузить из локального файла
         for path in possible_paths:
             if os.path.exists(path):
                 try:
                     break
                 except Exception as e:
                     print(f"⚠️ Ошибка при загрузке {path}: {e}")
+        # Если локально не нашли, пробуем скачать из интернета
         if not loaded:
             print("⚠️ Локальный файл RuSentiLex не найден, пробуем скачать...")
             url = "https://raw.githubusercontent.com/nicolay-r/sentiment-relation-classifiers/master/data/rusentilex.csv"
                     loaded = True
             except Exception as e:
                 print(f"⚠️ Не удалось загрузить RuSentiLex из репозитория: {e}")
         if not loaded:
             print("⚠️ RuSentiLex не загружен. Используется только статистический лексикон.")
+        # Выводим статистику
         print(f"📊 Всего слов в лексиконе: {len(self.sentiment_lexicon)}")
     def init_ontology_level1(self):
         adj = rule_analysis['adjustments']
         rules = rule_analysis['rules_applied']
+        # Сохраняем исходную уверенность для проверки коррекции
         original_confidence_value = original_confidence
         was_corrected = len(rules) > 0
         new_confidence = original_confidence * conf_mult
         new_emotion = original_emotion
+        # Если есть негативные слова и нет позитивных, корректируем эмоцию
         has_negative = any('негативное слово' in r for r in rules)
         has_positive = any('позитивное слово' in r for r in rules)
             new_emotion = 'радость'
             rules.append("коррекция: позитивные слова")
+        # Инверсия на основе правил
         for rule in rules:
             if rule.startswith("инверсия негатива:"):
                 new_emotion = 'радость'
                     new_emotion = 'грусть'
                 break
+        # Сарказм (контраст + маркеры)
         sarcasm_flag = adj['sarcasm'] > 0.5
         if sarcasm_flag:
             new_emotion = 'сарказм'
             if "саркастическая фраза" in str(rules):
                 new_confidence = min(new_confidence * 1.1, 0.95)
+        # Восклицания
         if any('восклицание' in r for r in rules):
             new_confidence = min(new_confidence * 1.2, 1.0)
+        # Если онтология не применила коррекции, а уверенность была менее 90%,
+        # то повышаем уверенность на 10% (но не более 100%)
         if not was_corrected and original_confidence_value < 0.9:
             new_confidence = min(new_confidence * 1.10, 1.0)
+        # Ограничиваем максимум 1.0 (100%)
         new_confidence = min(new_confidence, 1.0)
         return {
         lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
         lstm_pred_dict = {'emotion': lstm_emo, 'confidence': lstm_conf.item(), 'probabilities': lstm_probs[0].cpu().numpy().tolist()}
+        # Применяем онтологию к LSTM
         lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
         if lstm_onto['adjusted_prediction']['confidence'] >= self.threshold:
             final = lstm_onto['adjusted_prediction']
             used = "LSTM + онтология"
             rules_applied = lstm_onto['rule_analysis']['rules_applied']
         else:
             self.stats['bert'] += 1
             enc = self.tokenizer(text_clean, truncation=True, padding=True, max_length=self.max_length_bert, return_tensors='pt').to(self.device)
             bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
             bert_pred_dict = {'emotion': bert_emo, 'confidence': bert_conf.item(), 'probabilities': bert_probs[0].cpu().numpy().tolist()}
+            # Применяем онтологию к BERT
             bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
             final = bert_onto['adjusted_prediction']
             used = "BERT + онтология"
             rules_applied = bert_onto['rule_analysis']['rules_applied']
         return {
             'text': text,
             'confidence': float(final['confidence']),
             'used_model': used,
             'rules_applied': rules_applied,
+            'was_corrected_by_ontology': len(rules_applied) > 0
         }
 # ============================================================
     print("Загрузка модели...")
     model_dir = 'model'
+    # Загружаем информацию о модели
     with open(f'{model_dir}/model_info.json', 'r', encoding='utf-8') as f:
         model_info = json.load(f)
+    # Загружаем vocab
     with open(f'{model_dir}/vocab.json', 'r', encoding='utf-8') as f:
         vocab = json.load(f)
+    # СОЗДАЁМ label_encoder из model_info
     print("📂 Создание label_encoder...")
     label_encoder = LabelEncoder()
     label_encoder.classes_ = np.array(model_info['classes'])
     print(f"✅ label_encoder создан, классы: {list(label_encoder.classes_)}")
+    # СОЗДАЁМ онтологию
     print("📂 Создание онтологии...")
     ontology_model = OntologyEmotionModel(
         emotions=list(label_encoder.classes_),
     )
     print("✅ Онтология создана")
+    # LSTM
     print("📂 Загрузка LSTM...")
     lstm_model = EmotionLSTM(
         vocab_size=len(vocab),
     lstm_model.load_state_dict(checkpoint['model_state_dict'])
     print("✅ LSTM загружена")
+    # BERT
     print("📂 Загрузка BERT...")
     bert_model = EmotionBERT(
         bert_model_name=model_info['bert_model_name'],
     bert_model.load_state_dict(torch.load(f'{model_dir}/bert_model.pth', map_location=device, weights_only=False))
     print("✅ BERT загружена")
+    # Токенизатор
     print("📂 Загрузка токенизатора...")
     try:
         tokenizer = BertTokenizer.from_pretrained(model_dir)
         tokenizer = BertTokenizer.from_pretrained('DeepPavlov/rubert-base-cased')
         print("✅ Токенизатор загружен из Hugging Face")
+    # Каскад
     print("📂 Создание каскадного классификатора...")
     cascade = CascadeEmotionClassifier(
         lstm_model=lstm_model,
     try:
         result = classifier.predict(text)
+        # Форматируем правила для отображения
         rules_display = []
         for rule in result['rules_applied'][:10]:
             if ':' in rule:
             else:
                 rules_display.append(f"<span class='rule-tag'>{rule}</span>")
         return JSONResponse({
             "success": True,
             "emotion": result['predicted_emotion'],
             "confidence": f"{result['confidence']*100:.1f}%",
             "used_model": result['used_model'],
             "rules": "".join(rules_display) if rules_display else "Нет правил",
+            "was_corrected": str(result['was_corrected_by_ontology'])
         })
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": classifier is not None}