File size: 3,511 Bytes
c0b8b15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import streamlit as st
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle
# Global configuration for text processing
max_sequence_length = 100 # Maximum length of input sequences
embedding_dim = 100 # Dimension of word embeddings
def create_model(vocab_size):
"""
Creates a Bidirectional LSTM model for sentiment analysis
Args:
vocab_size: Size of the vocabulary (number of unique words + 1)
Returns:
Compiled Keras model
"""
model = Sequential([
Input(shape=(max_sequence_length,)),
Embedding(input_dim=vocab_size, output_dim=embedding_dim), # Word embedding layer
Bidirectional(LSTM(128, return_sequences=False)), # Bidirectional LSTM
Dropout(0.5), # Dropout for regularization
Dense(64, activation='relu'), # Dense hidden layer
Dropout(0.5), # Additional dropout
Dense(3, activation='softmax') # Output layer (3 classes)
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return model
@st.cache_resource
def load_model_and_tokenizer():
"""
Loads the pretrained model and tokenizer
Returns:
tuple: (model, tokenizer)
"""
# Load the tokenizer from pickle file
with open('tokenizer.pickle', 'rb') as handle:
tokenizer = pickle.load(handle)
# Create and load model weights
vocab_size = len(tokenizer.word_index) + 1
model = create_model(vocab_size)
model.load_weights('lstm.keras')
return model, tokenizer
def preprocess_text(text, tokenizer):
"""
Preprocesses input text for model prediction
Args:
text: Input text string
tokenizer: Keras tokenizer object
Returns:
Padded sequence ready for model input
"""
sequences = tokenizer.texts_to_sequences([text])
return pad_sequences(sequences, maxlen=max_sequence_length)
def main():
"""Main function for the Streamlit app"""
st.title("Sentiment Analyzer")
try:
# Load model and tokenizer
model, tokenizer = load_model_and_tokenizer()
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return
# Text input area
text = st.text_area("Enter text to analyze:", height=150)
if st.button("Analyze"):
if text:
# Process input and make prediction
processed_text = preprocess_text(text, tokenizer)
prediction = model.predict(processed_text)
sentiments = ['Negative', 'Neutral', 'Positive']
result = sentiments[np.argmax(prediction)]
# Display results
st.write(f"Detected sentiment: **{result}**")
# Show probability distribution
probabilities = prediction[0]
for sent, prob in zip(sentiments, probabilities):
st.progress(float(prob))
st.write(f"{sent}: {prob:.2%}")
else:
st.warning("Please enter text to analyze.")
if __name__ == "__main__":
main() |