Spaces:
Sleeping
Sleeping
File size: 3,733 Bytes
57c6e94 b41fb04 2c4c677 57c6e94 2d3a3fe 57c6e94 2d3a3fe 57c6e94 2d3a3fe 57c6e94 2d3a3fe 57c6e94 7138b6d 5db415a 57c6e94 b41fb04 edffe6d b41fb04 5dee96a 5db415a 5dee96a 57c6e94 e55aa90 57c6e94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.models import model_from_json
from keras.optimizers import Adam
from PIL import Image
# load vgg16 model
pre_trained = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model
pre_trained.trainable = False
x = tf.keras.layers.GlobalAveragePooling2D()(pre_trained.output)
# restructure the model
pre_trained_model = Model(inputs=pre_trained.input, outputs=x)
###########################################################################################################
# model = tf.keras.models.load_model("image_captioning_30k_model.h5")
# Load model architecture
with open("30k_model_architecture.json", "r") as json_file:
loaded_model_json = json_file.read()
# Create the optimizer without specifying the learning rate
optimizer = Adam()
# Set the learning rate separately
optimizer.learning_rate.assign(0.001)
# Load weights
model = model_from_json(loaded_model_json)
model.load_weights("30k_model_weights.h5")
# Load optimizer state
model.compile(optimizer=optimizer, loss='categorical_crossentropy')
###########################################################################################################
tokenizer = Tokenizer()
with open("Image_Captioner_tokenizer_30k.pkl", "rb") as f:
tokenizer = pickle.load(f)
def idx_to_word(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# generate caption for an image
def predict_caption(model, image, tokenizer, max_length):
# add start tag for generation process
in_text = 'startseq'
# iterate over the max length of sequence
for i in range(max_length):
# encode input sequence
sequence = tokenizer.texts_to_sequences([in_text])[0]
# pad the sequence
sequence = pad_sequences([sequence], max_length)
# predict next word
yhat = model.predict([image, sequence], verbose=0)
# get index with high probability
yhat = np.argmax(yhat)
# convert index to word
word = idx_to_word(yhat, tokenizer)
# stop if word not found
if word is None:
break
# append word as input for generating next word
in_text += " " + word
# stop if we reach end tag
if word == 'endseq':
break
cut_text = ' '.join(in_text.split()[1:-1])
return cut_text
def google_image_testing(inp):
# Convert input into jpg file
input_image = Image.fromarray(inp)
input_image.save("input_image.jpg")
# Load input Image
image_path = 'input_image.jpg'
image = load_img(image_path, target_size=(224, 224))
# convert image pixels to numpy array
image = img_to_array(image)
# reshape data for model
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# preprocess image for vgg
image = preprocess_input(image)
# extract features
img_feature = pre_trained_model.predict(image, verbose=0)
# predict the caption
predicted = predict_caption(model, img_feature, tokenizer, max_length=74)
return predicted
demo = gr.Interface(fn=google_image_testing, inputs='image',outputs='text',title='Image Captioner')
demo.launch(debug=True,share=True) |