import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pickle
import os

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout, Layer, Concatenate
import tensorflow.keras.backend as K
import pickle
import os

# Custom Attention Layer
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', 
                                 shape=(input_shape[-1], 1), 
                                 initializer='random_normal', 
                                 trainable=True)
        self.b = self.add_weight(name='attention_bias', 
                                 shape=(input_shape[1], 1), 
                                 initializer='zeros', 
                                 trainable=True)
        super(Attention, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        output = x * a
        return K.sum(output, axis=1)

def train_advanced_model(file_path):
    print("Loading data for advanced model...")
    df = pd.read_csv(file_path)
    
    # Fill missing facts
    df['related_facts'] = df['related_facts'].fillna("No context provided.")
    
    # Advanced Preprocessing: Combine facts, question, and response
    # Structure: [FACTS] facts [SEP] [QUERY] question [SEP] [RES] response
    df['text'] = "[FACTS] " + df['related_facts'].astype(str) + \
                 " [QUERY] " + df['question'].astype(str) + \
                 " [RES] " + df['engine_response'].astype(str)
    
    y = df['best'].astype(int).values
    X_text = df['text'].astype(str).str.lower().values
    
    max_words = 15000
    max_len = 300
    
    tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ')
    tokenizer.fit_on_texts(X_text)
    X_seq = tokenizer.texts_to_sequences(X_text)
    X_pad = pad_sequences(X_seq, maxlen=max_len)
    
    X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.15, random_state=42, stratify=y)
    
    # Save tokenizer immediately so it's available as soon as model starts saving checkpoints
    with open('tokenizer_advanced.pickle', 'wb') as handle:
        pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Tokenizer saved.")

    # Advanced Arch: Bi-LSTM + Attention
    inputs = Input(shape=(max_len,))
    embed = Embedding(max_words, 128)(inputs)
    drop1 = SpatialDropout1D(0.3)(embed)
    lstm = Bidirectional(LSTM(64, return_sequences=True))(drop1)
    attn = Attention()(lstm)
    dense1 = Dense(64, activation='relu')(attn)
    drop2 = Dropout(0.4)(dense1)
    outputs = Dense(1, activation='sigmoid')(drop2)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    
    # Training with Checkpointing
    batch_size = 128
    epochs = 2
    class_weight = {0: 1.0, 1: len(y[y==0]) / len(y[y==1])}
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        'chatbot_performance_advanced.h5', 
        monitor='val_accuracy', 
        save_best_only=True, 
        mode='max',
        verbose=1
    )
    
    print("Training advanced model with Attention...")
    model.fit(
        X_train, y_train, 
        epochs=epochs, 
        batch_size=batch_size, 
        validation_split=0.1,
        class_weight=class_weight,
        callbacks=[checkpoint],
        verbose=1
    )
    
    print("Training complete.")


if __name__ == "__main__":
    train_advanced_model('BP_MHS_V1.csv')