import streamlit as st
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle

# Global configuration for text processing
max_sequence_length = 100  # Maximum length of input sequences
embedding_dim = 100       # Dimension of word embeddings

def create_model(vocab_size):
    """
    Creates a Bidirectional LSTM model for sentiment analysis
    
    Args:
        vocab_size: Size of the vocabulary (number of unique words + 1)
    
    Returns:
        Compiled Keras model
    """
    model = Sequential([
        Input(shape=(max_sequence_length,)),
        Embedding(input_dim=vocab_size, output_dim=embedding_dim),  # Word embedding layer
        Bidirectional(LSTM(128, return_sequences=False)),           # Bidirectional LSTM
        Dropout(0.5),                                              # Dropout for regularization
        Dense(64, activation='relu'),                              # Dense hidden layer
        Dropout(0.5),                                              # Additional dropout
        Dense(3, activation='softmax')                             # Output layer (3 classes)
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

@st.cache_resource
def load_model_and_tokenizer():
    """
    Loads the pretrained model and tokenizer
    
    Returns:
        tuple: (model, tokenizer)
    """
    # Load the tokenizer from pickle file
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)
    
    # Create and load model weights
    vocab_size = len(tokenizer.word_index) + 1
    model = create_model(vocab_size)
    model.load_weights('lstm.keras')
    return model, tokenizer

def preprocess_text(text, tokenizer):
    """
    Preprocesses input text for model prediction
    
    Args:
        text: Input text string
        tokenizer: Keras tokenizer object
    
    Returns:
        Padded sequence ready for model input
    """
    sequences = tokenizer.texts_to_sequences([text])
    return pad_sequences(sequences, maxlen=max_sequence_length)

def main():
    """Main function for the Streamlit app"""
    st.title("Sentiment Analyzer")
    
    try:
        # Load model and tokenizer
        model, tokenizer = load_model_and_tokenizer()
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return
    
    # Text input area
    text = st.text_area("Enter text to analyze:", height=150)
    
    if st.button("Analyze"):
        if text:
            # Process input and make prediction
            processed_text = preprocess_text(text, tokenizer)
            prediction = model.predict(processed_text)
            sentiments = ['Negative', 'Neutral', 'Positive']
            result = sentiments[np.argmax(prediction)]
            
            # Display results
            st.write(f"Detected sentiment: **{result}**")
            
            # Show probability distribution
            probabilities = prediction[0]
            for sent, prob in zip(sentiments, probabilities):
                st.progress(float(prob))
                st.write(f"{sent}: {prob:.2%}")
        else:
            st.warning("Please enter text to analyze.")

if __name__ == "__main__":
    main()