Spaces:

Mr-Vicky-01
/

ImageCaptioner

Sleeping

App Files Files Community

ImageCaptioner / app.py

Mr-Vicky-01

Update app.py

e55aa90 verified almost 2 years ago

raw

history blame contribute delete

3.73 kB

	import gradio as gr
	import pickle
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.applications import EfficientNetB7
	from tensorflow.keras.applications.efficientnet import preprocess_input
	from tensorflow.keras.preprocessing.image import load_img, img_to_array
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Model
	from tensorflow.keras.models import model_from_json
	from keras.optimizers import Adam
	from PIL import Image


	# load vgg16 model
	pre_trained = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
	# Freeze the base model
	pre_trained.trainable = False

	x = tf.keras.layers.GlobalAveragePooling2D()(pre_trained.output)
	# restructure the model
	pre_trained_model = Model(inputs=pre_trained.input, outputs=x)

	###########################################################################################################
	# model = tf.keras.models.load_model("image_captioning_30k_model.h5")

	# Load model architecture
	with open("30k_model_architecture.json", "r") as json_file:
	loaded_model_json = json_file.read()

	# Create the optimizer without specifying the learning rate
	optimizer = Adam()

	# Set the learning rate separately
	optimizer.learning_rate.assign(0.001)

	# Load weights
	model = model_from_json(loaded_model_json)
	model.load_weights("30k_model_weights.h5")

	# Load optimizer state
	model.compile(optimizer=optimizer, loss='categorical_crossentropy')
	###########################################################################################################

	tokenizer = Tokenizer()
	with open("Image_Captioner_tokenizer_30k.pkl", "rb") as f:
	tokenizer = pickle.load(f)

	def idx_to_word(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
	if index == integer:
	return word
	return None

	# generate caption for an image
	def predict_caption(model, image, tokenizer, max_length):
	# add start tag for generation process
	in_text = 'startseq'
	# iterate over the max length of sequence
	for i in range(max_length):
	# encode input sequence
	sequence = tokenizer.texts_to_sequences([in_text])[0]
	# pad the sequence
	sequence = pad_sequences([sequence], max_length)
	# predict next word
	yhat = model.predict([image, sequence], verbose=0)
	# get index with high probability
	yhat = np.argmax(yhat)
	# convert index to word
	word = idx_to_word(yhat, tokenizer)
	# stop if word not found
	if word is None:
	break
	# append word as input for generating next word
	in_text += " " + word
	# stop if we reach end tag
	if word == 'endseq':
	break
	cut_text = ' '.join(in_text.split()[1:-1])
	return cut_text

	def google_image_testing(inp):
	# Convert input into jpg file
	input_image = Image.fromarray(inp)
	input_image.save("input_image.jpg")
	# Load input Image
	image_path = 'input_image.jpg'
	image = load_img(image_path, target_size=(224, 224))
	# convert image pixels to numpy array
	image = img_to_array(image)
	# reshape data for model
	image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
	# preprocess image for vgg
	image = preprocess_input(image)
	# extract features
	img_feature = pre_trained_model.predict(image, verbose=0)
	# predict the caption
	predicted = predict_caption(model, img_feature, tokenizer, max_length=74)
	return predicted

	demo = gr.Interface(fn=google_image_testing, inputs='image',outputs='text',title='Image Captioner')
	demo.launch(debug=True,share=True)