File size: 3,511 Bytes

c0b8b15

import streamlit as st
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle

# Global configuration for text processing
max_sequence_length = 100  # Maximum length of input sequences
embedding_dim = 100       # Dimension of word embeddings

def create_model(vocab_size):
    """
    Creates a Bidirectional LSTM model for sentiment analysis
    
    Args:
        vocab_size: Size of the vocabulary (number of unique words + 1)
    
    Returns:
        Compiled Keras model
    """
    model = Sequential([
        Input(shape=(max_sequence_length,)),
        Embedding(input_dim=vocab_size, output_dim=embedding_dim),  # Word embedding layer
        Bidirectional(LSTM(128, return_sequences=False)),           # Bidirectional LSTM
        Dropout(0.5),                                              # Dropout for regularization
        Dense(64, activation='relu'),                              # Dense hidden layer
        Dropout(0.5),                                              # Additional dropout
        Dense(3, activation='softmax')                             # Output layer (3 classes)
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

@st.cache_resource
def load_model_and_tokenizer():
    """
    Loads the pretrained model and tokenizer
    
    Returns:
        tuple: (model, tokenizer)
    """
    # Load the tokenizer from pickle file
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)
    
    # Create and load model weights
    vocab_size = len(tokenizer.word_index) + 1
    model = create_model(vocab_size)
    model.load_weights('lstm.keras')
    return model, tokenizer

def preprocess_text(text, tokenizer):
    """
    Preprocesses input text for model prediction
    
    Args:
        text: Input text string
        tokenizer: Keras tokenizer object
    
    Returns:
        Padded sequence ready for model input
    """
    sequences = tokenizer.texts_to_sequences([text])
    return pad_sequences(sequences, maxlen=max_sequence_length)

def main():
    """Main function for the Streamlit app"""
    st.title("Sentiment Analyzer")
    
    try:
        # Load model and tokenizer
        model, tokenizer = load_model_and_tokenizer()
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return
    
    # Text input area
    text = st.text_area("Enter text to analyze:", height=150)
    
    if st.button("Analyze"):
        if text:
            # Process input and make prediction
            processed_text = preprocess_text(text, tokenizer)
            prediction = model.predict(processed_text)
            sentiments = ['Negative', 'Neutral', 'Positive']
            result = sentiments[np.argmax(prediction)]
            
            # Display results
            st.write(f"Detected sentiment: **{result}**")
            
            # Show probability distribution
            probabilities = prediction[0]
            for sent, prob in zip(sentiments, probabilities):
                st.progress(float(prob))
                st.write(f"{sent}: {prob:.2%}")
        else:
            st.warning("Please enter text to analyze.")

if __name__ == "__main__":
    main()