UtkarshShivhare
/

image_captioning

Model card Files Files and versions

UtkarshShivhare commited on Jan 31, 2024

Commit

6e7fbce

·

verified ·

1 Parent(s): e58d43a

Upload 2 files

Files changed (2) hide show

app.py +89 -0
image_caption.h5 +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import streamlit as st
+import tensorflow as tf
+from PIL import Image
+import numpy as np
+import json
+from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input
+from tensorflow.keras.preprocessing.image import img_to_array
+from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Model
+from keras.models import load_model
+# Load the .h5 model
+model = load_model('image_caption.h5')
+with open('tokenizer_config.json', 'r') as f:
+    tokenizer_config = json.load(f)
+tokenizer = tokenizer_from_json(tokenizer_config)
+# tokenizer.word_index = eval(tokenizer_config)['word_index']
+max_length=35
+# Load pre-trained model
+vgg_model = VGG16()
+vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
+# Set Streamlit configurations
+st.set_page_config(page_title="Image Captioning App", layout="wide")
+# Function to preprocess the input image
+def preprocess_image(image):
+    image = image.convert("RGB")
+    image = image.resize((224, 224))
+    image = img_to_array(image)
+    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
+    image = preprocess_input(image)
+    return image
+# Function to make predictions on the input image
+def predict(image):
+    image = preprocess_image(image)
+    feature = vgg_model.predict(image, verbose=0)
+    preds = predict_caption(model, feature, tokenizer, max_length)
+    preds=preds[8:-7]
+    return preds
+def idx_word(integer,tok):
+    for word,index in tok.word_index.items():
+        if index== integer:
+            return word
+    return None
+def predict_caption(model,image,tok,max_len):
+    in_text="startseq"
+    for i in range(max_len):
+        seq=tok.texts_to_sequences([in_text])[0]
+        seq=pad_sequences([seq],max_len)
+        yhat = model.predict([image, seq], verbose=0)
+        yhat = np.argmax(yhat)
+        word = idx_word(yhat, tok)
+        if word is None:
+            break
+        in_text += " " + word
+        if word == 'endseq':
+            break
+    return in_text
+# Streamlit app
+def main():
+    st.title("Image Captioning App")
+    st.write("Upload an image and the app will predict its class.")
+    uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])
+    if uploaded_image is not None:
+        image = Image.open(uploaded_image)
+        st.image(image, caption='Uploaded Image', use_column_width=True)
+        st.write("")
+        if st.button("Generate Caption"):
+            with st.spinner("Generating..."):
+                predictions = predict(image)
+            st.write(f"Top Caption:{predictions}")
+# Run the app
+if __name__ == "__main__":
+    main()

image_caption.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28aea777059c891f1509787ea99576b58194a89905fd941f82de82695bd0b27e
+size 71970004