Spaces:

harshrajsaxena
/

img

Runtime error

App Files Files Community

img / image_captioning.py

harshrajsaxena

Update image_captioning.py

cac8567 over 2 years ago

raw

history blame contribute delete

2.16 kB

	import tensorflow as tf
	import numpy as np
	from tensorflow.keras.applications.inception_v3 import preprocess_input
	from tensorflow.keras.models import Model, load_model
	from tensorflow.keras.preprocessing import image
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import tempfile

	# Paths
	token_path = 'saved_models/Flickr8k.token.txt'
	train_images_path = 'saved_models/Flickr_8k.trainImages.txt'
	test_images_path = 'saved_models/Flickr_8k.testImages.txt'
	model_path = 'saved_models/Final_Image_Captioning.h5'

	def preprocess_image(image_file):
	with tempfile.NamedTemporaryFile(suffix=".jpg") as temp_file:
	image_file.save(temp_file.name)
	img = image.load_img(temp_file.name, target_size=(299, 299))
	img = image.img_to_array(img)
	img = np.expand_dims(img, axis=0)
	img = preprocess_input(img)
	return img


	def generate_captions(image_file, sequence):
	# Load the pre-trained model
	model = load_model(model_path)

	# Preprocess the image
	img = preprocess_image(image_file)

	# Generate the caption
	caption = generate_caption_from_image(model, img, sequence)

	return caption

	def generate_caption_from_image(model, img, sequence):
	max_length = 34
	start_token = "<start>"
	end_token = "<end>"
	wordtoix = np.load("wordtoix.npy", allow_pickle=True).item()
	ixtoword = np.load("ixtoword.npy", allow_pickle=True).item()

	initial_state = [np.zeros((1, 256)), np.zeros((1, 256))]

	# Generate caption using greedy search
	caption = start_token
	for _ in range(max_length):
	seq = [wordtoix[word] for word in caption.split() if word in wordtoix]
	seq += sequence # Append the given sequence
	seq = pad_sequences([seq], maxlen=max_length)

	y_pred, h, c = model.predict([img, seq] + initial_state)
	y_pred = np.argmax(y_pred, axis=-1)
	word = ixtoword[y_pred[0][0]]
	caption += " " + word
	if word == end_token:
	break

	# Update initial state
	initial_state = [h, c]

	# Remove start and end tokens
	caption = " ".join(caption.split()[1:-1])

	return caption