suriya7 commited on
Commit
e78a9a0
·
verified ·
1 Parent(s): 85cad48

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from tensorflow.keras.preprocessing.image import load_img, img_to_array
3
+ from tensorflow.keras.preprocessing.text import Tokenizer
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ from tensorflow.keras.models import Model
6
+ from tensorflow.keras.applications.xception import Xception, preprocess_input
7
+ import pickle
8
+ import os
9
+ from PIL import Image
10
+ import numpy as np
11
+ import gradio as gr
12
+
13
+ model = Xception()
14
+
15
+ # Restructure model
16
+ model = Model(inputs = model.inputs , outputs = model.layers[-2].output)
17
+
18
+
19
+ with open('captions.txt', 'r') as f:
20
+ next(f)
21
+ captions_doc = f.read()
22
+ # create mapping of image to captions
23
+ mapping = {}
24
+ # process lines
25
+ for line in tqdm(captions_doc.split('\n')):
26
+ # split the line by comma(,)
27
+ tokens = line.split(',')
28
+ if len(line) < 2:
29
+ continue
30
+ image_id, caption = tokens[0], tokens[1:]
31
+ # remove extension from image ID
32
+ image_id = image_id.split('.')[0]
33
+ # convert caption list to string
34
+ caption = " ".join(caption)
35
+ # create list if needed
36
+ if image_id not in mapping:
37
+ mapping[image_id] = []
38
+ # store the caption
39
+ mapping[image_id].append(caption)
40
+
41
+
42
+ def clean(mapping):
43
+ for key, captions in mapping.items():
44
+ for i in range(len(captions)):
45
+ # take one caption at a time
46
+ caption = captions[i]
47
+ # preprocessing steps
48
+ # convert to lowercase
49
+ caption = caption.lower()
50
+ # delete digits, special chars, etc.,
51
+ caption = caption.replace('[^A-Za-z]', '')
52
+ # delete additional spaces
53
+ caption = caption.replace('\s+', ' ')
54
+ # add start and end tags to the caption
55
+ caption = 'startseq ' + " ".join([word for word in caption.split() if len(word)>1]) + ' endseq'
56
+ captions[i] = caption
57
+
58
+ all_captions = []
59
+ for key in mapping:
60
+ for caption in mapping[key]:
61
+ all_captions.append(caption)
62
+
63
+
64
+ # tokenize the text
65
+ tokenizer = Tokenizer()
66
+ tokenizer.fit_on_texts(all_captions)
67
+ vocab_size = len(tokenizer.word_index) + 1
68
+
69
+ # get maximum length of the caption available
70
+ max_length = max(len(caption.split()) for caption in all_captions)
71
+
72
+
73
+ def extract_features(image):
74
+ image = load_img(image, target_size=(299, 299))
75
+ # convert image pixels to numpy array
76
+ image = img_to_array(image)
77
+ # reshape data for model
78
+ image = np.expand_dims(image, axis=0)
79
+ image = preprocess_input(image)
80
+ feature = model.predict(image, verbose=0)
81
+ return feature
82
+
83
+ def idx_to_word(integer, tokenizer):
84
+ for word,index, in tokenizer.word_index.items():
85
+ if index == integer:
86
+ return word
87
+ return None
88
+
89
+ def save_image(img, save_dir="saved_images"):
90
+ # Create the directory if it doesn't exist
91
+ os.makedirs(save_dir, exist_ok=True)
92
+ # Save the image with a unique name
93
+ img_name = os.path.join(save_dir, "uploaded_image.png")
94
+ img.save(img_name)
95
+ return img_name
96
+
97
+ # generate caption for an image
98
+ def predict_caption(model, image, tokenizer, max_length=35):
99
+ # add start tag for generation process
100
+ in_text = 'startseq'
101
+ # iterate over the max length of sequence
102
+ for i in range(max_length):
103
+ # encode input sequence
104
+ sequence = tokenizer.texts_to_sequences([in_text])[0]
105
+ # pad the sequence
106
+ sequence = pad_sequences([sequence], max_length)
107
+ # predict next word
108
+ yhat = model.predict([image, sequence], verbose=0)
109
+ # get index with high probability
110
+ yhat = np.argmax(yhat)
111
+ # convert index to word
112
+ word = idx_to_word(yhat, tokenizer)
113
+ # stop if word not found
114
+ if word is None:
115
+ break
116
+ # append word as input for generating next word
117
+ in_text += " " + word
118
+ # stop if we reach end tag
119
+ if word == 'endseq':
120
+ break
121
+
122
+ return in_text
123
+
124
+
125
+
126
+ def caption_prediction(img):
127
+ image = Image.fromarray(img)
128
+ img_path = save_image(image)
129
+ features = extract_features(img_path)
130
+ y_pred = predict_caption(caption_model, features, tokenizer)[8:][:-6]
131
+ return y_pred
132
+
133
+ demo = gr.Interface(fn=caption_prediction, inputs='image',outputs='text',title='caption generator')
134
+ demo.launch(debug=True,share=True)
135
+