Spaces:

lamelight07
/

ImageCaptioning_streamlit

Sleeping

App Files Files Community

sameerr007 commited on May 15, 2023

Commit

5e2bbb8

1 Parent(s): 1ce9e8d

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+import pickle
+st.header("Image Captioner")
+st.markdown("Building the framework may take upto a minute. Please be patient. Thank you!")
+features=pickle.load(open("features.pkl","rb"))
+all_captions=pickle.load(open("all_captions.pkl","rb"))
+from tensorflow.keras.preprocessing.text import Tokenizer
+tokenizer = Tokenizer()
+tokenizer.fit_on_texts(all_captions)
+vocab_size = len(tokenizer.word_index) + 1
+max_length = max(len(caption.split()) for caption in all_captions)
+from tensorflow import keras
+model = keras.models.load_model("best_model.h5")
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+def idx_to_word(integer, tokenizer):
+    for word, index in tokenizer.word_index.items():
+        if index == integer:
+            return word
+    return None
+# generate caption for an image
+import numpy as np
+def predict_caption(model, image, tokenizer, max_length):
+    # add start tag for generation process
+    in_text = 'startseq'
+    # iterate over the max length of sequence
+    for i in range(max_length):
+        # encode input sequence
+        sequence = tokenizer.texts_to_sequences([in_text])[0]
+        # pad the sequence
+        sequence = pad_sequences([sequence], max_length)
+        # predict next word
+        yhat = model.predict([image, sequence], verbose=0)
+        # get index with high probability
+        yhat = np.argmax(yhat)
+        # convert index to word
+        word = idx_to_word(yhat, tokenizer)
+        # stop if word not found
+        if word is None:
+            break
+        in_text += " " + word
+        if word == 'endseq':
+            break
+    return in_text
+from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
+from tensorflow.keras.models import Model
+import pyttsx3
+engine=pyttsx3.init()
+vgg_model = VGG16()
+vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
+from tensorflow.keras.preprocessing.image import img_to_array
+from PIL import Image
+uploaded_image=st.file_uploader("Upload image to be captioned",type=["jpg","png","jpeg",])
+image_path="bushman.jpeg"
+if(uploaded_image!=None):
+    display_image=Image.open(uploaded_image)
+    st.image(display_image)
+    if st.button("Caption"):
+        display_image=display_image.resize((224,224))
+        image = img_to_array(display_image)
+        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
+        image = preprocess_input(image)
+        feature = vgg_model.predict(image, verbose=0)
+        final=predict_caption(model, feature, tokenizer, max_length)
+        final_output=((" ").join(final.split(" ")[1:len(final.split(" "))-1]))
+        engine.say(final_output)
+        engine.runAndWait()