Spaces:

suriya7
/

Image-Captioning

Sleeping

App Files Files Community

suriya7 commited on Mar 15, 2024

Commit

e78a9a0

verified ·

1 Parent(s): 85cad48

Create app.py

Browse files

Files changed (1) hide show

app.py +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from tensorflow.keras.preprocessing.image import load_img, img_to_array
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Model
+from tensorflow.keras.applications.xception import Xception, preprocess_input
+import pickle
+import os
+from PIL import Image
+import numpy as np
+import gradio as gr
+model = Xception()
+# Restructure model
+model = Model(inputs = model.inputs , outputs = model.layers[-2].output)
+with open('captions.txt', 'r') as f:
+    next(f)
+    captions_doc = f.read()
+# create mapping of image to captions
+mapping = {}
+# process lines
+for line in tqdm(captions_doc.split('\n')):
+    # split the line by comma(,)
+    tokens = line.split(',')
+    if len(line) < 2:
+        continue
+    image_id, caption = tokens[0], tokens[1:]
+    # remove extension from image ID
+    image_id = image_id.split('.')[0]
+    # convert caption list to string
+    caption = " ".join(caption)
+    # create list if needed
+    if image_id not in mapping:
+        mapping[image_id] = []
+    # store the caption
+    mapping[image_id].append(caption)
+def clean(mapping):
+    for key, captions in mapping.items():
+        for i in range(len(captions)):
+            # take one caption at a time
+            caption = captions[i]
+            # preprocessing steps
+            # convert to lowercase
+            caption = caption.lower()
+            # delete digits, special chars, etc.,
+            caption = caption.replace('[^A-Za-z]', '')
+            # delete additional spaces
+            caption = caption.replace('\s+', ' ')
+            # add start and end tags to the caption
+            caption = 'startseq ' + " ".join([word for word in caption.split() if len(word)>1]) + ' endseq'
+            captions[i] = caption
+all_captions = []
+for key in mapping:
+    for caption in mapping[key]:
+        all_captions.append(caption)
+# tokenize the text
+tokenizer = Tokenizer()
+tokenizer.fit_on_texts(all_captions)
+vocab_size = len(tokenizer.word_index) + 1
+# get maximum length of the caption available
+max_length = max(len(caption.split()) for caption in all_captions)
+def extract_features(image):
+    image = load_img(image, target_size=(299, 299))
+    # convert image pixels to numpy array
+    image = img_to_array(image)
+    # reshape data for model
+    image = np.expand_dims(image, axis=0)
+    image = preprocess_input(image)
+    feature = model.predict(image, verbose=0)
+    return feature
+def idx_to_word(integer, tokenizer):
+    for word,index, in tokenizer.word_index.items():
+        if index == integer:
+            return word
+    return None
+def save_image(img, save_dir="saved_images"):
+    # Create the directory if it doesn't exist
+    os.makedirs(save_dir, exist_ok=True)
+    # Save the image with a unique name
+    img_name = os.path.join(save_dir, "uploaded_image.png")
+    img.save(img_name)
+    return img_name
+# generate caption for an image
+def predict_caption(model, image, tokenizer, max_length=35):
+    # add start tag for generation process
+    in_text = 'startseq'
+    # iterate over the max length of sequence
+    for i in range(max_length):
+        # encode input sequence
+        sequence = tokenizer.texts_to_sequences([in_text])[0]
+        # pad the sequence
+        sequence = pad_sequences([sequence], max_length)
+        # predict next word
+        yhat = model.predict([image, sequence], verbose=0)
+        # get index with high probability
+        yhat = np.argmax(yhat)
+        # convert index to word
+        word = idx_to_word(yhat, tokenizer)
+        # stop if word not found
+        if word is None:
+            break
+        # append word as input for generating next word
+        in_text += " " + word
+        # stop if we reach end tag
+        if word == 'endseq':
+            break
+    return in_text
+def caption_prediction(img):
+    image = Image.fromarray(img)
+    img_path = save_image(image)
+    features = extract_features(img_path)
+    y_pred = predict_caption(caption_model, features, tokenizer)[8:][:-6]
+    return y_pred
+demo = gr.Interface(fn=caption_prediction, inputs='image',outputs='text',title='caption generator')
+demo.launch(debug=True,share=True)