MogulojuSai commited on
Commit
2eaa97d
·
verified ·
1 Parent(s): 9ae9103
Files changed (6) hide show
  1. app.py +86 -0
  2. hamlet.txt +0 -0
  3. lstm.ipynb +0 -0
  4. next_word_lstm.h5 +3 -0
  5. requirements.txt +12 -0
  6. tokenizer.pickle +3 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pickle
4
+ from tensorflow.keras.models import load_model
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
+ from tensorflow.keras.preprocessing.text import Tokenizer
7
+ from tensorflow.keras.utils import get_custom_objects
8
+ from tensorflow.keras.initializers import Orthogonal
9
+
10
+ # Register the Orthogonal initializer for compatibility
11
+ get_custom_objects()["Orthogonal"] = Orthogonal
12
+
13
+ # Load the LSTM model
14
+ def load_lstm_model():
15
+ try:
16
+ model = load_model('next_word_lstm.h5')
17
+ st.success("LSTM model loaded successfully!")
18
+ return model
19
+ except Exception as e:
20
+ st.error(f"Error loading the model: {e}")
21
+ return None
22
+
23
+ # Load the tokenizer
24
+ def load_tokenizer():
25
+ try:
26
+ with open('tokenizer.pickle', 'rb') as handle:
27
+ tokenizer = pickle.load(handle)
28
+ st.success("Tokenizer loaded successfully!")
29
+ return tokenizer
30
+ except Exception as e:
31
+ st.error(f"Error loading the tokenizer: {e}")
32
+ return None
33
+
34
+ # Function to predict the next word
35
+ def predict_next_word(model, tokenizer, text, max_sequence_len):
36
+ try:
37
+ # Convert the input text into a sequence of tokens
38
+ token_list = tokenizer.texts_to_sequences([text])[0]
39
+
40
+ # Ensure the sequence length matches max_sequence_len - 1
41
+ if len(token_list) >= max_sequence_len:
42
+ token_list = token_list[-(max_sequence_len - 1):]
43
+
44
+ # Pad the sequence to the required length
45
+ token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
46
+
47
+ # Predict the next word
48
+ predicted = model.predict(token_list, verbose=0)
49
+ predicted_index = np.argmax(predicted, axis=1)[0]
50
+
51
+ # Map the predicted index back to a word
52
+ for word, index in tokenizer.word_index.items():
53
+ if index == predicted_index:
54
+ return word
55
+ return None
56
+ except Exception as e:
57
+ st.error(f"Error during prediction: {e}")
58
+ return None
59
+
60
+ # Streamlit App
61
+ def main():
62
+ st.title("Next Word Prediction with LSTM")
63
+ st.write("This app predicts the next word in a sentence using an LSTM model trained on text data.")
64
+
65
+ # Load model and tokenizer
66
+ model = load_lstm_model()
67
+ tokenizer = load_tokenizer()
68
+
69
+ # Input text box
70
+ input_text = st.text_input("Enter a sentence:")
71
+
72
+ # Predict button
73
+ if st.button("Predict Next Word"):
74
+ if model is not None and tokenizer is not None:
75
+ max_sequence_len = model.input_shape[1] + 1
76
+ next_word = predict_next_word(model, tokenizer, input_text, max_sequence_len)
77
+
78
+ if next_word:
79
+ st.write(f"Predicted next word: **{next_word}**")
80
+ else:
81
+ st.warning("Could not predict the next word. Please try a different input.")
82
+ else:
83
+ st.error("Model or tokenizer not loaded properly.")
84
+
85
+ if __name__ == "__main__":
86
+ main()
hamlet.txt ADDED
The diff for this file is too large to render. See raw diff
 
lstm.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
next_word_lstm.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f9cfd56e804db2db801c99c7e3c3bd2c0c6f8be661e31ed4464bff70a88dc34
3
+ size 14675224
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ipykernel
2
+ numpy
3
+ pandas
4
+ matplotlib
5
+ seaborn
6
+ flask
7
+ scikit-learn
8
+ bs4
9
+ requests
10
+ tensorflow
11
+ tensorboard
12
+ nltk
tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ad43d31bbf9bd1f266d61ba781e5073f27c97b8900aa4400ad16c7d7ee57c8
3
+ size 187422