import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, confusion_matrix import pickle import os import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout, Layer, Concatenate import tensorflow.keras.backend as K import pickle import os # Custom Attention Layer class Attention(Layer): def __init__(self, **kwargs): super(Attention, self).__init__(**kwargs) def build(self, input_shape): self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), initializer='random_normal', trainable=True) self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), initializer='zeros', trainable=True) super(Attention, self).build(input_shape) def call(self, x): e = K.tanh(K.dot(x, self.W) + self.b) a = K.softmax(e, axis=1) output = x * a return K.sum(output, axis=1) def train_advanced_model(file_path): print("Loading data for advanced model...") df = pd.read_csv(file_path) # Fill missing facts df['related_facts'] = df['related_facts'].fillna("No context provided.") # Advanced Preprocessing: Combine facts, question, and response # Structure: [FACTS] facts [SEP] [QUERY] question [SEP] [RES] response df['text'] = "[FACTS] " + df['related_facts'].astype(str) + \ " [QUERY] " + df['question'].astype(str) + \ " [RES] " + df['engine_response'].astype(str) y = df['best'].astype(int).values X_text = df['text'].astype(str).str.lower().values max_words = 15000 max_len = 300 tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ') tokenizer.fit_on_texts(X_text) X_seq = tokenizer.texts_to_sequences(X_text) X_pad = pad_sequences(X_seq, maxlen=max_len) X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.15, random_state=42, stratify=y) # Save tokenizer immediately so it's available as soon as model starts saving checkpoints with open('tokenizer_advanced.pickle', 'wb') as handle: pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL) print("Tokenizer saved.") # Advanced Arch: Bi-LSTM + Attention inputs = Input(shape=(max_len,)) embed = Embedding(max_words, 128)(inputs) drop1 = SpatialDropout1D(0.3)(embed) lstm = Bidirectional(LSTM(64, return_sequences=True))(drop1) attn = Attention()(lstm) dense1 = Dense(64, activation='relu')(attn) drop2 = Dropout(0.4)(dense1) outputs = Dense(1, activation='sigmoid')(drop2) model = Model(inputs=inputs, outputs=outputs) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Training with Checkpointing batch_size = 128 epochs = 2 class_weight = {0: 1.0, 1: len(y[y==0]) / len(y[y==1])} checkpoint = tf.keras.callbacks.ModelCheckpoint( 'chatbot_performance_advanced.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1 ) print("Training advanced model with Attention...") model.fit( X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1, class_weight=class_weight, callbacks=[checkpoint], verbose=1 ) print("Training complete.") if __name__ == "__main__": train_advanced_model('BP_MHS_V1.csv')