import streamlit as st from PIL import Image import numpy as np import pickle import tensorflow from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.image import load_img, img_to_array from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.applications.vgg16 import preprocess_input from tensorflow.keras.applications.vgg16 import VGG16 from tensorflow.keras.models import Model # Load the pre-trained model model_path = "best_model.h5" # Replace with the actual path model = load_model(model_path) # Load the tokenizer tokenizer_path = "tokenizer.pkl" # Replace with the actual path with open(tokenizer_path, 'rb') as f: tokenizer = pickle.load(f) # Set the maximum length for captions max_length = 35 # Replace with the actual max length # Function to generate captions def idx_to_word(integer, tokenizer): for word, index in tokenizer.word_index.items(): if index == integer: return word return None def predict_caption(model, image, tokenizer, max_length): # Add start tag for generation process in_text = 'startseq' # Iterate over the max length of sequence for i in range(max_length): # Encode input sequence sequence = tokenizer.texts_to_sequences([in_text])[0] # Pad the sequence sequence = pad_sequences([sequence], max_length) # Predict next word yhat = model.predict([image, sequence], verbose=0) # Get index with high probability yhat = np.argmax(yhat) # Convert index to word word = idx_to_word(yhat, tokenizer) # Stop if word not found if word is None: break # Append word as input for generating the next word in_text += " " + word # Stop if we reach end tag if word == 'endseq': break return in_text # Streamlit app vgg_model = VGG16() # restructure the model vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output) # ... def main(): st.title("Image Caption Generator") uploaded_file = st.file_uploader("Choose an image...", type="jpg") if uploaded_file is not None: image = Image.open(uploaded_file) # Display the uploaded image with reduced width st.image(image, caption="Uploaded Image.", use_column_width=True) st.markdown( f'', unsafe_allow_html=True ) # Preprocess the image for model prediction image = Image.open(uploaded_file) image = image.resize((224, 224)) image_array = img_to_array(image) image_array = image_array.reshape((1, image_array.shape[0], image_array.shape[1], image_array.shape[2])) image_array = preprocess_input(image_array) # Generate feature vector using the VGG model feature = vgg_model.predict(image_array, verbose=0) # Generate caption caption = predict_caption(model, feature, tokenizer, max_length) # Display the generated caption st.subheader("Generated Caption:") st.write(caption) # ... if __name__ == "__main__": main()