VicGerardoPR commited on
Commit
c0b8b15
·
verified ·
1 Parent(s): 4012d0a

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +102 -0
  3. lstm.keras +3 -0
  4. requirements.txt +3 -0
  5. tokenizer.pickle +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ lstm.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tensorflow as tf
3
+ from tensorflow.keras.models import Sequential
4
+ from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
5
+ from tensorflow.keras.preprocessing.text import Tokenizer
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ import numpy as np
8
+ import pickle
9
+
10
+ # Global configuration for text processing
11
+ max_sequence_length = 100 # Maximum length of input sequences
12
+ embedding_dim = 100 # Dimension of word embeddings
13
+
14
+ def create_model(vocab_size):
15
+ """
16
+ Creates a Bidirectional LSTM model for sentiment analysis
17
+
18
+ Args:
19
+ vocab_size: Size of the vocabulary (number of unique words + 1)
20
+
21
+ Returns:
22
+ Compiled Keras model
23
+ """
24
+ model = Sequential([
25
+ Input(shape=(max_sequence_length,)),
26
+ Embedding(input_dim=vocab_size, output_dim=embedding_dim), # Word embedding layer
27
+ Bidirectional(LSTM(128, return_sequences=False)), # Bidirectional LSTM
28
+ Dropout(0.5), # Dropout for regularization
29
+ Dense(64, activation='relu'), # Dense hidden layer
30
+ Dropout(0.5), # Additional dropout
31
+ Dense(3, activation='softmax') # Output layer (3 classes)
32
+ ])
33
+ model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
34
+ return model
35
+
36
+ @st.cache_resource
37
+ def load_model_and_tokenizer():
38
+ """
39
+ Loads the pretrained model and tokenizer
40
+
41
+ Returns:
42
+ tuple: (model, tokenizer)
43
+ """
44
+ # Load the tokenizer from pickle file
45
+ with open('tokenizer.pickle', 'rb') as handle:
46
+ tokenizer = pickle.load(handle)
47
+
48
+ # Create and load model weights
49
+ vocab_size = len(tokenizer.word_index) + 1
50
+ model = create_model(vocab_size)
51
+ model.load_weights('lstm.keras')
52
+ return model, tokenizer
53
+
54
+ def preprocess_text(text, tokenizer):
55
+ """
56
+ Preprocesses input text for model prediction
57
+
58
+ Args:
59
+ text: Input text string
60
+ tokenizer: Keras tokenizer object
61
+
62
+ Returns:
63
+ Padded sequence ready for model input
64
+ """
65
+ sequences = tokenizer.texts_to_sequences([text])
66
+ return pad_sequences(sequences, maxlen=max_sequence_length)
67
+
68
+ def main():
69
+ """Main function for the Streamlit app"""
70
+ st.title("Sentiment Analyzer")
71
+
72
+ try:
73
+ # Load model and tokenizer
74
+ model, tokenizer = load_model_and_tokenizer()
75
+ except Exception as e:
76
+ st.error(f"Error loading model: {str(e)}")
77
+ return
78
+
79
+ # Text input area
80
+ text = st.text_area("Enter text to analyze:", height=150)
81
+
82
+ if st.button("Analyze"):
83
+ if text:
84
+ # Process input and make prediction
85
+ processed_text = preprocess_text(text, tokenizer)
86
+ prediction = model.predict(processed_text)
87
+ sentiments = ['Negative', 'Neutral', 'Positive']
88
+ result = sentiments[np.argmax(prediction)]
89
+
90
+ # Display results
91
+ st.write(f"Detected sentiment: **{result}**")
92
+
93
+ # Show probability distribution
94
+ probabilities = prediction[0]
95
+ for sent, prob in zip(sentiments, probabilities):
96
+ st.progress(float(prob))
97
+ st.write(f"{sent}: {prob:.2%}")
98
+ else:
99
+ st.warning("Please enter text to analyze.")
100
+
101
+ if __name__ == "__main__":
102
+ main()
lstm.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220e8b73da07021e2f69a0f30b727203550f516d22fec5b4a6dc77fb23eb00f2
3
+ size 134187206
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ tensorflow
3
+ numpy
tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5fbd6c4709795b7258e837f7c96bb5265e03e747f5b741c178c213ebf2175b
3
+ size 4803481