import tensorflow as tf import numpy as np from tensorflow.keras.applications.inception_v3 import preprocess_input from tensorflow.keras.models import Model, load_model from tensorflow.keras.preprocessing import image from tensorflow.keras.preprocessing.sequence import pad_sequences import tempfile # Paths token_path = 'saved_models/Flickr8k.token.txt' train_images_path = 'saved_models/Flickr_8k.trainImages.txt' test_images_path = 'saved_models/Flickr_8k.testImages.txt' model_path = 'saved_models/Final_Image_Captioning.h5' def preprocess_image(image_file): with tempfile.NamedTemporaryFile(suffix=".jpg") as temp_file: image_file.save(temp_file.name) img = image.load_img(temp_file.name, target_size=(299, 299)) img = image.img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) return img def generate_captions(image_file, sequence): # Load the pre-trained model model = load_model(model_path) # Preprocess the image img = preprocess_image(image_file) # Generate the caption caption = generate_caption_from_image(model, img, sequence) return caption def generate_caption_from_image(model, img, sequence): max_length = 34 start_token = "" end_token = "" wordtoix = np.load("wordtoix.npy", allow_pickle=True).item() ixtoword = np.load("ixtoword.npy", allow_pickle=True).item() initial_state = [np.zeros((1, 256)), np.zeros((1, 256))] # Generate caption using greedy search caption = start_token for _ in range(max_length): seq = [wordtoix[word] for word in caption.split() if word in wordtoix] seq += sequence # Append the given sequence seq = pad_sequences([seq], maxlen=max_length) y_pred, h, c = model.predict([img, seq] + initial_state) y_pred = np.argmax(y_pred, axis=-1) word = ixtoword[y_pred[0][0]] caption += " " + word if word == end_token: break # Update initial state initial_state = [h, c] # Remove start and end tokens caption = " ".join(caption.split()[1:-1]) return caption