larrysim commited on
Commit
e0916e6
·
verified ·
1 Parent(s): 3dcce98

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tensorflow as tf
3
+ from tensorflow.keras.models import load_model
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ import numpy as np
6
+ import pickle
7
+ import re
8
+
9
+ # Set page config
10
+ st.set_page_config(
11
+ page_title="Next Word Predictor",
12
+ page_icon="🔮",
13
+ layout="centered"
14
+ )
15
+
16
+ # Custom CSS for styling
17
+ st.markdown("""
18
+ <style>
19
+ .main {
20
+ background-color: #f5f5f5;
21
+ }
22
+ .stTextInput>div>div>input {
23
+ background-color: #ffffff;
24
+ color: #000000;
25
+ }
26
+ .prediction-box {
27
+ background-color: #e6f7ff;
28
+ padding: 15px;
29
+ border-radius: 10px;
30
+ border-left: 5px solid #1890ff;
31
+ margin-top: 20px;
32
+ }
33
+ </style>
34
+ """, unsafe_allow_html=True)
35
+
36
+ @st.cache_resource
37
+ def load_models():
38
+ """Load the model and tokenizer with caching"""
39
+ try:
40
+ model = load_model('nextword_lstm_model.h5')
41
+ with open('tokenizer.pkl', 'rb') as f:
42
+ tokenizer = pickle.load(f)
43
+ return model, tokenizer
44
+ except Exception as e:
45
+ st.error(f"Error loading model: {str(e)}")
46
+ return None, None
47
+
48
+ def predict_next_word(model, tokenizer, seed_text, max_seq_len):
49
+ """Predict the next word given a seed text"""
50
+ try:
51
+ # Clean and preprocess the input text
52
+ seed_text = re.sub(r'[^\w\s]', '', seed_text.lower()).strip()
53
+
54
+ # Convert text to sequence
55
+ token_list = tokenizer.texts_to_sequences([seed_text])
56
+
57
+ if not token_list or not token_list[0]:
58
+ return "Please enter more meaningful text"
59
+
60
+ token_list = token_list[0]
61
+ token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
62
+
63
+ # Make prediction
64
+ predicted = model.predict(token_list, verbose=0)
65
+ predicted_word_index = np.argmax(predicted, axis=-1)[0]
66
+
67
+ # Find the word corresponding to the predicted index
68
+ for word, index in tokenizer.word_index.items():
69
+ if index == predicted_word_index:
70
+ return word.capitalize()
71
+
72
+ return "No prediction available"
73
+ except Exception as e:
74
+ return f"Error in prediction: {str(e)}"
75
+
76
+ def main():
77
+ st.title("🔮 Next Word Predictor")
78
+ st.markdown("Enter some text and I'll predict the next word using an LSTM model trained on a large corpus.")
79
+
80
+ # Load model and tokenizer
81
+ model, tokenizer = load_models()
82
+
83
+ if model is None or tokenizer is None:
84
+ st.error("Failed to load the model. Please check if model files are available.")
85
+ return
86
+
87
+ # Calculate max sequence length (you might want to set this based on your training)
88
+ max_seq_len = 20 # Adjust based on your model's training parameters
89
+
90
+ # Input section
91
+ st.subheader("Enter your text")
92
+ seed_text = st.text_input(
93
+ "Start typing...",
94
+ placeholder="Type something like 'I am going to'",
95
+ key="text_input"
96
+ )
97
+
98
+ # Prediction button
99
+ if st.button("Predict Next Word", type="primary"):
100
+ if seed_text.strip():
101
+ with st.spinner("Predicting..."):
102
+ next_word = predict_next_word(model, tokenizer, seed_text, max_seq_len)
103
+
104
+ # Display result
105
+ st.markdown(f"""
106
+ <div class="prediction-box">
107
+ <h3>Prediction</h3>
108
+ <p style="font-size: 20px; margin-bottom: 0;"><strong>{seed_text} <span style="color: #1890ff;">{next_word}</span></strong></p>
109
+ </div>
110
+ """, unsafe_allow_html=True)
111
+ else:
112
+ st.warning("Please enter some text first!")
113
+
114
+ # Information section
115
+ st.markdown("---")
116
+ st.subheader("About")
117
+ st.markdown("""
118
+ This app uses an LSTM neural network trained on a large text corpus to predict the next word in a sequence.
119
+
120
+ **How it works:**
121
+ - The model was trained on 20,000 text samples
122
+ - Uses word embeddings and LSTM layers
123
+ - Predicts the most likely next word based on context
124
+
125
+ **Try phrases like:**
126
+ - "I am going to"
127
+ - "The weather is"
128
+ - "Machine learning is"
129
+ """)
130
+
131
+ if __name__ == "__main__":
132
+ main()