Spaces:

priyesh17
/

ImageCaptionGenerator

Sleeping

+import streamlit as st
+import tensorflow as tf
+from PIL import Image
+import numpy as np
+import io
+from util import generate_caption
+# Function to load the model
+# Streamlit app
+st.title("Image Caption Generator")
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg"])
+if uploaded_file is not None:
+    image = Image.open(uploaded_file)
+    image = image.resize((224, 224))
+    st.image(image, caption='Uploaded Image', use_column_width=True)
+    st.write("")
+    st.write("Generating caption...")
+    caption = generate_caption(image)
+    st.write(f"Caption: {caption}")
+# Add some information about the app
+st.sidebar.header("About")
+st.sidebar.info("This app uses a Deep Learning model(RNN model) along with VGG16 model(feature extractor) to generate captions for uploaded images.")
+st.sidebar.info("Upload an image to get started!")
+st.sidebar.info("The model is trained on Flickr8k dataset.")
+st.sidebar.info("By Priyesh Gawali")
+st.sidebar.markdown("[Github repository](https://github.com/Roronoa-17/Image_Caption_Generator.git)")

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+tensorflow
+numpy
+streamlit==1.35.0
+scikit-learn
+pickle-mixin
+Pillow
+gdown

tokenizer.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e21b2723c91942491147ae3d21fc27cb9afac743712c76497f6ddc376b24d8bf
+size 334824

util.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import tensorflow as tf
+from tensorflow.keras.applications.vgg16 import preprocess_input
+from tensorflow.keras.preprocessing.image import img_to_array
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import numpy as np
+import pickle
+CapGenerator = tf.keras.models.load_model('models/CapGen.h5')
+VGGMod = tf.keras.models.load_model('models/VGGModel.h5')
+max_length = 35
+with open('models/tokenizer.pickle', 'rb') as handle:
+    tokenizer = pickle.load(handle)
+vocab_size = len(tokenizer.word_index) + 1
+def idx_to_word(integer, tokenizer):
+    for word, index in tokenizer.word_index.items():
+        if index == integer:
+            return word
+    return None
+def predict_caption(model, image, tokenizer, max_length=max_length):
+    # add start tag for generation process
+    in_text = 'startseq'
+    # iterate over the max length of sequence
+    for i in range(max_length):
+        # encode input sequence
+        sequence = tokenizer.texts_to_sequences([in_text])[0]
+        # pad the sequence
+        sequence = pad_sequences([sequence], max_length)
+        # predict next word
+        yhat = model.predict([image, sequence], verbose=0)
+        # get index with high probability
+        yhat = np.argmax(yhat)
+        # convert index to word
+        word = idx_to_word(yhat, tokenizer)
+        # stop if word not found
+        if word is None:
+            break
+        # append word as input for generating next word
+        in_text += " " + word
+        # stop if we reach end tag
+        if word == 'endseq':
+            break
+    return in_text
+def feature_extractor(image):
+    # Img to np array
+    image = img_to_array(image)
+    # Reshaping
+    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
+    # Preprocessing for passing through VGG16
+    image = preprocess_input(image)
+    feature = VGGMod.predict(image, verbose=0)
+    return feature
+def generate_caption(image_name):
+    y_pred = predict_caption(CapGenerator, feature_extractor(image_name), tokenizer, max_length)
+    y_pred = y_pred[8:-7].upper()
+    return y_pred