Spaces:
Sleeping
Sleeping
| import tensorflow as tf | |
| from tensorflow.keras.applications.vgg16 import preprocess_input | |
| from tensorflow.keras.preprocessing.image import img_to_array | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import numpy as np | |
| import pickle | |
| CapGenerator = tf.keras.models.load_model('CapGen.h5') | |
| VGGMod = tf.keras.models.load_model('VGGModel.h5') | |
| max_length = 35 | |
| with open('models/tokenizer.pickle', 'rb') as handle: | |
| tokenizer = pickle.load(handle) | |
| vocab_size = len(tokenizer.word_index) + 1 | |
| def idx_to_word(integer, tokenizer): | |
| for word, index in tokenizer.word_index.items(): | |
| if index == integer: | |
| return word | |
| return None | |
| def predict_caption(model, image, tokenizer, max_length=max_length): | |
| # add start tag for generation process | |
| in_text = 'startseq' | |
| # iterate over the max length of sequence | |
| for i in range(max_length): | |
| # encode input sequence | |
| sequence = tokenizer.texts_to_sequences([in_text])[0] | |
| # pad the sequence | |
| sequence = pad_sequences([sequence], max_length) | |
| # predict next word | |
| yhat = model.predict([image, sequence], verbose=0) | |
| # get index with high probability | |
| yhat = np.argmax(yhat) | |
| # convert index to word | |
| word = idx_to_word(yhat, tokenizer) | |
| # stop if word not found | |
| if word is None: | |
| break | |
| # append word as input for generating next word | |
| in_text += " " + word | |
| # stop if we reach end tag | |
| if word == 'endseq': | |
| break | |
| return in_text | |
| def feature_extractor(image): | |
| # Img to np array | |
| image = img_to_array(image) | |
| # Reshaping | |
| image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | |
| # Preprocessing for passing through VGG16 | |
| image = preprocess_input(image) | |
| feature = VGGMod.predict(image, verbose=0) | |
| return feature | |
| def generate_caption(image_name): | |
| y_pred = predict_caption(CapGenerator, feature_extractor(image_name), tokenizer, max_length) | |
| y_pred = y_pred[8:-7].upper() | |
| return y_pred | |