import os
import sys
import json
import pickle
import torch
import torch.nn as nn
import numpy as np
import re
from typing import Dict, List, Any, Optional
from collections import defaultdict
import networkx as nx
import pymorphy3
from fastapi import FastAPI, Request, Form, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.templating import Jinja2Templates
import uvicorn
from transformers import BertTokenizer, BertModel
import warnings
warnings.filterwarnings('ignore')

# Определяем устройство
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Используется устройство: {device}")

# ============================================================
# ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ
# ============================================================
def clean_russian_text(text):
    if not isinstance(text, str):
        return ""
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'\S+@\S+', '', text)
    smileys = {
        ':)': ' смайлик_радость ', ')': ' смайлик_радость ',
        ':(': ' смайлик_грусть ', '(': ' смайлик_грусть ',
        ':D': ' смайлик_смех ', ';)': ' смайлик_подмигивание ',
    }
    for smiley, replacement in smileys.items():
        text = text.replace(smiley, replacement)
    text = re.sub(r'[^\w\sа-яё.,!?;:)(-]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# ============================================================
# КЛАССЫ МОДЕЛЕЙ (упрощенные для инференса)
# ============================================================
class EmotionLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim=100, hidden_dim=256,
                 num_classes=5, dropout=0.3, num_layers=2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(
            embed_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0
        )
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    
    def forward(self, x, return_confidence=False):
        embedded = self.embedding(x)
        lstm_out, (hidden, cell) = self.lstm(embedded)
        lstm_last = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        features = self.dropout(lstm_last)
        logits = self.classifier(features)
        if return_confidence:
            probs = torch.softmax(logits, dim=1)
            conf, _ = torch.max(probs, dim=1)
            return logits, conf
        return logits

class EmotionBERT(nn.Module):
    def __init__(self, bert_model_name, num_classes, dropout=0.3):
        super().__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        hidden = self.bert.config.hidden_size
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden, 256), nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, input_ids, attention_mask, return_confidence=False):
        out = self.bert(input_ids, attention_mask, return_dict=True)
        cls = out.last_hidden_state[:, 0, :]
        logits = self.classifier(cls)
        if return_confidence:
            probs = torch.softmax(logits, dim=1)
            conf, _ = torch.max(probs, dim=1)
            return logits, conf
        return logits

class OntologyEmotionModel:
    def __init__(self, emotions: List[str]):
        self.emotions = emotions
        self.morph = pymorphy3.MorphAnalyzer()
        self.ontology_graph = nx.DiGraph()
        self.sentiment_lexicon = {}
        self.linguistic_rules = {
            'усилители': {'words': ['очень', 'сильно', 'крайне'], 'weight': 0.3},
            'ослабители': {'words': ['слегка', 'немного', 'чуть-чуть'], 'weight': -0.2},
            'отрицания': {'words': ['не', 'ни', 'нет'], 'weight': -0.5},
        }
        self.init_ontology_level1()
    
    def init_ontology_level1(self):
        self.emotion_definitions = {
            'радость': {'valence': 'positive', 'arousal': 'high'},
            'грусть': {'valence': 'negative', 'arousal': 'low'},
            'злость': {'valence': 'negative', 'arousal': 'high'},
            'страх': {'valence': 'negative', 'arousal': 'high'},
            'сарказм': {'valence': 'negative', 'arousal': 'high'},
        }
        for emotion in self.emotions:
            if emotion in self.emotion_definitions:
                self.ontology_graph.add_node(emotion, **self.emotion_definitions[emotion])
    
    def apply_linguistic_rules(self, text: str) -> Dict:
        rules_applied = []
        words = text.lower().split()
        lemmas = [self.morph.parse(w)[0].normal_form for w in words]
        
        for category, rule in self.linguistic_rules.items():
            for word in rule['words']:
                if word in lemmas:
                    rules_applied.append(f"{category}: {word}")
        
        return {'rules_applied': rules_applied, 'lemmas': lemmas}
    
    def adjust_prediction_with_rules(self, prediction: Dict, rule_analysis: Dict) -> Dict:
        return {
            'emotion': prediction['emotion'],
            'confidence': prediction['confidence'],
            'rules_applied': rule_analysis['rules_applied']
        }
    
    def get_ontology_analysis(self, text: str, model_prediction: Dict) -> Dict:
        rule_analysis = self.apply_linguistic_rules(text)
        adjusted = self.adjust_prediction_with_rules(model_prediction, rule_analysis)
        return {
            'rule_analysis': rule_analysis,
            'adjusted_prediction': adjusted
        }
    
    def get_statistics(self) -> Dict:
        return {
            'ontology_nodes': len(self.ontology_graph.nodes),
            'linguistic_rules': len(self.linguistic_rules),
        }

class CascadeEmotionClassifier:
    def __init__(self, lstm_model, bert_model, vocab, tokenizer,
                 label_encoder, ontology_model, threshold=0.95, device='cpu',
                 max_length_lstm=100, max_length_bert=128):
        self.lstm_model = lstm_model
        self.bert_model = bert_model
        self.vocab = vocab
        self.tokenizer = tokenizer
        self.label_encoder = label_encoder
        self.ontology_model = ontology_model
        self.threshold = threshold
        self.device = device
        self.max_length_lstm = max_length_lstm
        self.max_length_bert = max_length_bert
        
        self.lstm_model.eval()
        self.bert_model.eval()
        self.lstm_model.to(device)
        self.bert_model.to(device)
        
        self.stats = {'total': 0, 'lstm': 0, 'bert': 0}
    
    def text_to_sequence(self, text):
        words = str(text).split()[:self.max_length_lstm]
        sequence = [self.vocab.get(word, self.vocab.get('<UNK>', 1)) for word in words]
        if len(sequence) < self.max_length_lstm:
            sequence += [self.vocab.get('<PAD>', 0)] * (self.max_length_lstm - len(sequence))
        return sequence[:self.max_length_lstm]
    
    def predict(self, text):
        self.stats['total'] += 1
        text_clean = clean_russian_text(text)
        
        # LSTM prediction
        seq = torch.LongTensor([self.text_to_sequence(text_clean)]).to(self.device)
        with torch.no_grad():
            lstm_logits, lstm_conf = self.lstm_model(seq, return_confidence=True)
            lstm_probs = torch.softmax(lstm_logits, dim=1)
            lstm_pred = lstm_probs.argmax().item()
        
        lstm_emo = self.label_encoder.inverse_transform([lstm_pred])[0]
        lstm_pred_dict = {
            'emotion': lstm_emo,
            'confidence': lstm_conf.item(),
            'probabilities': lstm_probs[0].cpu().numpy().tolist()
        }
        
        # Применяем онтологию
        lstm_onto = self.ontology_model.get_ontology_analysis(text_clean, lstm_pred_dict)
        lstm_adjusted = lstm_onto['adjusted_prediction']
        
        if lstm_adjusted['confidence'] >= self.threshold:
            self.stats['lstm'] += 1
            final = lstm_adjusted
            used_model = "LSTM с онтологией"
        else:
            # BERT prediction
            self.stats['bert'] += 1
            enc = self.tokenizer(
                text_clean,
                truncation=True,
                padding=True,
                max_length=self.max_length_bert,
                return_tensors='pt'
            ).to(self.device)
            
            with torch.no_grad():
                bert_logits, bert_conf = self.bert_model(
                    enc['input_ids'],
                    enc['attention_mask'],
                    return_confidence=True
                )
                bert_probs = torch.softmax(bert_logits, dim=1)
                bert_pred = bert_probs.argmax().item()
            
            bert_emo = self.label_encoder.inverse_transform([bert_pred])[0]
            bert_pred_dict = {
                'emotion': bert_emo,
                'confidence': bert_conf.item(),
                'probabilities': bert_probs[0].cpu().numpy().tolist()
            }
            
            bert_onto = self.ontology_model.get_ontology_analysis(text_clean, bert_pred_dict)
            bert_adjusted = bert_onto['adjusted_prediction']
            final = bert_adjusted
            used_model = "BERT с онтологией"
            lstm_onto = bert_onto
        
        result = {
            'text': text,
            'predicted_emotion': final['emotion'],
            'confidence': float(final['confidence']),
            'used_model': used_model,
            'rules_applied': lstm_onto['rule_analysis']['rules_applied'],
            'class_probabilities': {
                emo: float(prob) 
                for emo, prob in zip(self.label_encoder.classes_, final.get('probabilities', lstm_pred_dict['probabilities']))
            }
        }
        return result

# ============================================================
# ЗАГРУЗКА МОДЕЛИ
# ============================================================
def load_model():
    print("Загрузка модели...")
    model_dir = 'model'
    
    # Загружаем информацию о модели
    with open(f'{model_dir}/model_info.json', 'r', encoding='utf-8') as f:
        model_info = json.load(f)
    
    # Загружаем vocab
    with open(f'{model_dir}/vocab.json', 'r', encoding='utf-8') as f:
        vocab = json.load(f)
    
    # Загружаем label encoder
    with open(f'{model_dir}/label_encoder.pkl', 'rb') as f:
        label_encoder = pickle.load(f)
    
    # Загружаем онтологию
    with open(f'{model_dir}/ontology_model.pkl', 'rb') as f:
        ontology_model = pickle.load(f)
    
    # Создаем и загружаем LSTM
    lstm_model = EmotionLSTM(
        vocab_size=len(vocab),
        embed_dim=model_info.get('embed_dim', 100),
        hidden_dim=256,
        num_classes=model_info['num_classes'],
        dropout=0.3,
        num_layers=2
    )
    
    checkpoint = torch.load(f'{model_dir}/lstm_model.pth', map_location=device, weights_only=True)
    lstm_model.load_state_dict(checkpoint['model_state_dict'])
    
    # Создаем и загружаем BERT
    bert_model = EmotionBERT(
        bert_model_name=model_info['bert_model_name'],
        num_classes=model_info['num_classes'],
        dropout=0.3
    )
    bert_model.load_state_dict(torch.load(f'{model_dir}/bert_model.pth', map_location=device, weights_only=True))
    
    # Загружаем токенизатор
    tokenizer = BertTokenizer.from_pretrained(model_dir)
    
    # Создаем каскадный классификатор
    cascade = CascadeEmotionClassifier(
        lstm_model=lstm_model,
        bert_model=bert_model,
        vocab=vocab,
        tokenizer=tokenizer,
        label_encoder=label_encoder,
        ontology_model=ontology_model,
        threshold=model_info.get('threshold', 0.95),
        device=device,
        max_length_lstm=model_info.get('max_length_lstm', 100),
        max_length_bert=model_info.get('max_length_bert', 128)
    )
    
    print("✅ Модель успешно загружена!")
    return cascade, model_info

# ============================================================
# FASTAPI ПРИЛОЖЕНИЕ
# ============================================================
app = FastAPI(title="Emotion Analysis with BERT and Ontology")

# Настраиваем шаблоны
templates = Jinja2Templates(directory="templates")

# Глобальная переменная для модели
classifier = None
model_info = None

@app.on_event("startup")
async def startup_event():
    global classifier, model_info
    classifier, model_info = load_model()

@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
    return templates.TemplateResponse(
        "index.html",
        {
            "request": request,
            "classes": classifier.label_encoder.classes_.tolist() if classifier else []
        }
    )

@app.post("/predict")
async def predict(text: str = Form(...)):
    if not classifier:
        raise HTTPException(status_code=503, detail="Модель еще не загружена")
    
    if not text or len(text.strip()) < 3:
        return JSONResponse({
            "error": "Текст слишком короткий. Введите хотя бы 3 символа."
        }, status_code=400)
    
    try:
        result = classifier.predict(text)
        
        # Форматируем правила для отображения
        rules_display = []
        for rule in result['rules_applied'][:10]:
            if ':' in rule:
                cat, val = rule.split(':', 1)
                rules_display.append(f"<span class='rule-tag rule-{cat.strip()}'>{cat}: {val.strip()}</span>")
            else:
                rules_display.append(f"<span class='rule-tag'>{rule}</span>")
        
        # Форматируем вероятности
        probs_display = []
        for emotion, prob in result['class_probabilities'].items():
            percentage = prob * 100
            probs_display.append(f"""
                <div class="prob-item">
                    <span class="prob-label">{emotion}</span>
                    <div class="prob-bar-container">
                        <div class="prob-bar" style="width: {percentage}%"></div>
                    </div>
                    <span class="prob-value">{percentage:.1f}%</span>
                </div>
            """)
        
        return JSONResponse({
            "success": True,
            "text": result['text'][:200] + "..." if len(result['text']) > 200 else result['text'],
            "emotion": result['predicted_emotion'],
            "confidence": f"{result['confidence']*100:.1f}%",
            "used_model": result['used_model'],
            "rules": "".join(rules_display) if rules_display else "Нет примененных правил",
            "probabilities": "".join(probs_display)
        })
    except Exception as e:
        return JSONResponse({
            "error": f"Ошибка при анализе: {str(e)}"
        }, status_code=500)

@app.get("/stats")
async def get_stats():
    if not classifier:
        raise HTTPException(status_code=503, detail="Модель еще не загружена")
    
    stats = classifier.stats
    
    return JSONResponse({
        "total_predictions": stats['total'],
        "lstm_used": stats['lstm'],
        "bert_used": stats['bert'],
        "lstm_percentage": f"{(stats['lstm']/max(stats['total'],1))*100:.1f}%",
        "bert_percentage": f"{(stats['bert']/max(stats['total'],1))*100:.1f}%"
    })

@app.get("/health")
async def health_check():
    return {"status": "healthy", "model_loaded": classifier is not None}

# Для запуска
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run(app, host="0.0.0.0", port=port)