Spaces:

Pulkit996
/

Image_Caption_Generator

Build error

App Files Files Community

Pulkit996 commited on Feb 12, 2024

Commit

8ec6f13

verified ·

1 Parent(s): b057b3a

Upload 3 files

Browse files

Files changed (3) hide show

best_model.h5 +3 -0
main.py +109 -0
tokenizer.pkl +3 -0

best_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a55ea2b508446c771d4a931edc4e75858a8fda08dcbd222711ea7712ec34b3cb
+size 71972196

main.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import streamlit as st
+from PIL import Image
+import numpy as np
+import pickle
+import tensorflow
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing.image import load_img, img_to_array
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.applications.vgg16 import preprocess_input
+from tensorflow.keras.applications.vgg16 import VGG16
+from tensorflow.keras.models import Model
+# Load the pre-trained model
+model_path = "best_model.h5"  # Replace with the actual path
+model = load_model(model_path)
+# Load the tokenizer
+tokenizer_path = "tokenizer.pkl"  # Replace with the actual path
+with open(tokenizer_path, 'rb') as f:
+    tokenizer = pickle.load(f)
+# Set the maximum length for captions
+max_length = 35  # Replace with the actual max length
+# Function to generate captions
+def idx_to_word(integer, tokenizer):
+    for word, index in tokenizer.word_index.items():
+        if index == integer:
+            return word
+    return None
+def predict_caption(model, image, tokenizer, max_length):
+    # Add start tag for generation process
+    in_text = 'startseq'
+    # Iterate over the max length of sequence
+    for i in range(max_length):
+        # Encode input sequence
+        sequence = tokenizer.texts_to_sequences([in_text])[0]
+        # Pad the sequence
+        sequence = pad_sequences([sequence], max_length)
+        # Predict next word
+        yhat = model.predict([image, sequence], verbose=0)
+        # Get index with high probability
+        yhat = np.argmax(yhat)
+        # Convert index to word
+        word = idx_to_word(yhat, tokenizer)
+        # Stop if word not found
+        if word is None:
+            break
+        # Append word as input for generating the next word
+        in_text += " " + word
+        # Stop if we reach end tag
+        if word == 'endseq':
+            break
+    return in_text
+# Streamlit app
+vgg_model = VGG16()
+# restructure the model
+vgg_model = Model(inputs=vgg_model.inputs,
+                  outputs=vgg_model.layers[-2].output)
+# ...
+def main():
+    st.title("Image Caption Generator")
+    uploaded_file = st.file_uploader("Choose an image...", type="jpg")
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file)
+        # Display the uploaded image with reduced width
+        st.image(image, caption="Uploaded Image.", use_column_width=True)
+        st.markdown(
+            f'<style>img{{max-width: 300px; max-height: 300px;margin: auto;}}</style>',
+            unsafe_allow_html=True
+        )
+        # Preprocess the image for model prediction
+        image = Image.open(uploaded_file)
+        image = image.resize((224, 224))
+        image_array = img_to_array(image)
+        image_array = image_array.reshape((1, image_array.shape[0], image_array.shape[1], image_array.shape[2]))
+        image_array = preprocess_input(image_array)
+        # Generate feature vector using the VGG model
+        feature = vgg_model.predict(image_array, verbose=0)
+        # Generate caption
+        caption = predict_caption(model, feature, tokenizer, max_length)
+        # Display the generated caption
+        st.subheader("Generated Caption:")
+        st.write(caption)
+# ...
+if __name__ == "__main__":
+    main()

tokenizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ce6e9f33f50ed0710e5b22f5a2114ba9538b0ff7a859d54b8611126ab192d32
+size 393484