Spaces:
Runtime error
Runtime error
File size: 2,163 Bytes
cac8567 b258952 375c40b b258952 15fc9af 375c40b b258952 6f3dcaa f086606 b258952 f086606 b258952 e2c4b41 b258952 375c40b b258952 e2c4b41 b258952 d15da10 8db8540 d15da10 a9b7c42 8686796 b258952 8db8540 b258952 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tempfile
# Paths
token_path = 'saved_models/Flickr8k.token.txt'
train_images_path = 'saved_models/Flickr_8k.trainImages.txt'
test_images_path = 'saved_models/Flickr_8k.testImages.txt'
model_path = 'saved_models/Final_Image_Captioning.h5'
def preprocess_image(image_file):
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp_file:
image_file.save(temp_file.name)
img = image.load_img(temp_file.name, target_size=(299, 299))
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
img = preprocess_input(img)
return img
def generate_captions(image_file, sequence):
# Load the pre-trained model
model = load_model(model_path)
# Preprocess the image
img = preprocess_image(image_file)
# Generate the caption
caption = generate_caption_from_image(model, img, sequence)
return caption
def generate_caption_from_image(model, img, sequence):
max_length = 34
start_token = "<start>"
end_token = "<end>"
wordtoix = np.load("wordtoix.npy", allow_pickle=True).item()
ixtoword = np.load("ixtoword.npy", allow_pickle=True).item()
initial_state = [np.zeros((1, 256)), np.zeros((1, 256))]
# Generate caption using greedy search
caption = start_token
for _ in range(max_length):
seq = [wordtoix[word] for word in caption.split() if word in wordtoix]
seq += sequence # Append the given sequence
seq = pad_sequences([seq], maxlen=max_length)
y_pred, h, c = model.predict([img, seq] + initial_state)
y_pred = np.argmax(y_pred, axis=-1)
word = ixtoword[y_pred[0][0]]
caption += " " + word
if word == end_token:
break
# Update initial state
initial_state = [h, c]
# Remove start and end tokens
caption = " ".join(caption.split()[1:-1])
return caption
|