Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pickle | |
| import numpy as np | |
| import tensorflow as tf | |
| from tensorflow.keras.applications import EfficientNetB7 | |
| from tensorflow.keras.applications.efficientnet import preprocess_input | |
| from tensorflow.keras.preprocessing.image import load_img, img_to_array | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.models import model_from_json | |
| from keras.optimizers import Adam | |
| from PIL import Image | |
| # load vgg16 model | |
| pre_trained = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) | |
| # Freeze the base model | |
| pre_trained.trainable = False | |
| x = tf.keras.layers.GlobalAveragePooling2D()(pre_trained.output) | |
| # restructure the model | |
| pre_trained_model = Model(inputs=pre_trained.input, outputs=x) | |
| ########################################################################################################### | |
| # model = tf.keras.models.load_model("image_captioning_30k_model.h5") | |
| # Load model architecture | |
| with open("30k_model_architecture.json", "r") as json_file: | |
| loaded_model_json = json_file.read() | |
| # Create the optimizer without specifying the learning rate | |
| optimizer = Adam() | |
| # Set the learning rate separately | |
| optimizer.learning_rate.assign(0.001) | |
| # Load weights | |
| model = model_from_json(loaded_model_json) | |
| model.load_weights("30k_model_weights.h5") | |
| # Load optimizer state | |
| model.compile(optimizer=optimizer, loss='categorical_crossentropy') | |
| ########################################################################################################### | |
| tokenizer = Tokenizer() | |
| with open("Image_Captioner_tokenizer_30k.pkl", "rb") as f: | |
| tokenizer = pickle.load(f) | |
| def idx_to_word(integer, tokenizer): | |
| for word, index in tokenizer.word_index.items(): | |
| if index == integer: | |
| return word | |
| return None | |
| # generate caption for an image | |
| def predict_caption(model, image, tokenizer, max_length): | |
| # add start tag for generation process | |
| in_text = 'startseq' | |
| # iterate over the max length of sequence | |
| for i in range(max_length): | |
| # encode input sequence | |
| sequence = tokenizer.texts_to_sequences([in_text])[0] | |
| # pad the sequence | |
| sequence = pad_sequences([sequence], max_length) | |
| # predict next word | |
| yhat = model.predict([image, sequence], verbose=0) | |
| # get index with high probability | |
| yhat = np.argmax(yhat) | |
| # convert index to word | |
| word = idx_to_word(yhat, tokenizer) | |
| # stop if word not found | |
| if word is None: | |
| break | |
| # append word as input for generating next word | |
| in_text += " " + word | |
| # stop if we reach end tag | |
| if word == 'endseq': | |
| break | |
| cut_text = ' '.join(in_text.split()[1:-1]) | |
| return cut_text | |
| def google_image_testing(inp): | |
| # Convert input into jpg file | |
| input_image = Image.fromarray(inp) | |
| input_image.save("input_image.jpg") | |
| # Load input Image | |
| image_path = 'input_image.jpg' | |
| image = load_img(image_path, target_size=(224, 224)) | |
| # convert image pixels to numpy array | |
| image = img_to_array(image) | |
| # reshape data for model | |
| image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | |
| # preprocess image for vgg | |
| image = preprocess_input(image) | |
| # extract features | |
| img_feature = pre_trained_model.predict(image, verbose=0) | |
| # predict the caption | |
| predicted = predict_caption(model, img_feature, tokenizer, max_length=74) | |
| return predicted | |
| demo = gr.Interface(fn=google_image_testing, inputs='image',outputs='text',title='Image Captioner') | |
| demo.launch(debug=True,share=True) |