Pulkit996 commited on
Commit
8ec6f13
·
verified ·
1 Parent(s): b057b3a

Upload 3 files

Browse files
Files changed (3) hide show
  1. best_model.h5 +3 -0
  2. main.py +109 -0
  3. tokenizer.pkl +3 -0
best_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a55ea2b508446c771d4a931edc4e75858a8fda08dcbd222711ea7712ec34b3cb
3
+ size 71972196
main.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import numpy as np
4
+ import pickle
5
+ import tensorflow
6
+ from tensorflow.keras.models import load_model
7
+ from tensorflow.keras.preprocessing.image import load_img, img_to_array
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ from tensorflow.keras.applications.vgg16 import preprocess_input
10
+ from tensorflow.keras.applications.vgg16 import VGG16
11
+ from tensorflow.keras.models import Model
12
+ # Load the pre-trained model
13
+ model_path = "best_model.h5" # Replace with the actual path
14
+ model = load_model(model_path)
15
+
16
+ # Load the tokenizer
17
+ tokenizer_path = "tokenizer.pkl" # Replace with the actual path
18
+ with open(tokenizer_path, 'rb') as f:
19
+ tokenizer = pickle.load(f)
20
+
21
+ # Set the maximum length for captions
22
+ max_length = 35 # Replace with the actual max length
23
+
24
+ # Function to generate captions
25
+ def idx_to_word(integer, tokenizer):
26
+ for word, index in tokenizer.word_index.items():
27
+ if index == integer:
28
+ return word
29
+ return None
30
+
31
+ def predict_caption(model, image, tokenizer, max_length):
32
+ # Add start tag for generation process
33
+ in_text = 'startseq'
34
+
35
+ # Iterate over the max length of sequence
36
+ for i in range(max_length):
37
+ # Encode input sequence
38
+ sequence = tokenizer.texts_to_sequences([in_text])[0]
39
+
40
+ # Pad the sequence
41
+ sequence = pad_sequences([sequence], max_length)
42
+
43
+ # Predict next word
44
+ yhat = model.predict([image, sequence], verbose=0)
45
+
46
+ # Get index with high probability
47
+ yhat = np.argmax(yhat)
48
+
49
+ # Convert index to word
50
+ word = idx_to_word(yhat, tokenizer)
51
+
52
+ # Stop if word not found
53
+ if word is None:
54
+ break
55
+
56
+ # Append word as input for generating the next word
57
+ in_text += " " + word
58
+
59
+ # Stop if we reach end tag
60
+ if word == 'endseq':
61
+ break
62
+
63
+ return in_text
64
+
65
+ # Streamlit app
66
+ vgg_model = VGG16()
67
+ # restructure the model
68
+ vgg_model = Model(inputs=vgg_model.inputs,
69
+ outputs=vgg_model.layers[-2].output)
70
+
71
+
72
+ # ...
73
+
74
+ def main():
75
+ st.title("Image Caption Generator")
76
+ uploaded_file = st.file_uploader("Choose an image...", type="jpg")
77
+
78
+ if uploaded_file is not None:
79
+ image = Image.open(uploaded_file)
80
+ # Display the uploaded image with reduced width
81
+ st.image(image, caption="Uploaded Image.", use_column_width=True)
82
+ st.markdown(
83
+ f'<style>img{{max-width: 300px; max-height: 300px;margin: auto;}}</style>',
84
+ unsafe_allow_html=True
85
+ )
86
+
87
+ # Preprocess the image for model prediction
88
+ image = Image.open(uploaded_file)
89
+ image = image.resize((224, 224))
90
+ image_array = img_to_array(image)
91
+ image_array = image_array.reshape((1, image_array.shape[0], image_array.shape[1], image_array.shape[2]))
92
+ image_array = preprocess_input(image_array)
93
+
94
+ # Generate feature vector using the VGG model
95
+ feature = vgg_model.predict(image_array, verbose=0)
96
+
97
+ # Generate caption
98
+ caption = predict_caption(model, feature, tokenizer, max_length)
99
+
100
+ # Display the generated caption
101
+ st.subheader("Generated Caption:")
102
+ st.write(caption)
103
+
104
+
105
+ # ...
106
+
107
+
108
+ if __name__ == "__main__":
109
+ main()
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce6e9f33f50ed0710e5b22f5a2114ba9538b0ff7a859d54b8611126ab192d32
3
+ size 393484