bishalbose294 commited on
Commit
dbd1524
·
1 Parent(s): 6c768db

Initial Commit

Browse files
DockerFile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.11
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ COPY ./packages.txt /code/packages.txt
8
+
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ COPY . .
12
+
13
+ RUN chmod -R 777 .
14
+
15
+ EXPOSE 7860
16
+
17
+ CMD ["python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, base64
2
+ from imageCaptionGPT import predictCustomModel, predictHFModel
3
+ from flask import Flask, render_template, request
4
+ from flask_cors import CORS
5
+ from PIL import Image
6
+ from gevent.pywsgi import WSGIServer
7
+
8
+ app = Flask(__name__)
9
+ CORS(app)
10
+
11
+ cwd = os.getcwd()
12
+
13
+ app.config["ALLOWED_EXTENSIONS"] = [".jpg", ".png"]
14
+ app.config["MAX_CONTENT_LENGTH"] = 1024 * 1024
15
+ app.config["UPLOAD_FOLDER"] = os.path.join(cwd, "uploads")
16
+
17
+
18
+ @app.route("/")
19
+ def home():
20
+ return render_template("home.html", predictionCustomModel="", predictionHFModel="", img_data="")
21
+
22
+
23
+ @app.route("/predict", methods=["POST"])
24
+ def predictCaption():
25
+ file = request.files["file"]
26
+ imagePath = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
27
+ file.save(imagePath)
28
+ img = Image.open(imagePath)
29
+ with io.BytesIO() as buf:
30
+ img.save(buf, "jpeg")
31
+ image_bytes = buf.getvalue()
32
+ encoded_string = base64.b64encode(image_bytes).decode()
33
+ predictionCustomModel = predictCustomModel(img)
34
+ predictionHFModel = predictHFModel(img)
35
+ predictionCustomModel = "Custom Model:- \'"+str(predictionCustomModel)+"\'"
36
+ predictionHFModel = "HuggingFace Model:- \'"+str(predictionHFModel)+"\'"
37
+ os.remove(imagePath)
38
+ return render_template("home.html", predictionCustomModel=predictionCustomModel, predictionHFModel=predictionHFModel, img_data=encoded_string)
39
+
40
+
41
+ if __name__ == '__main__':
42
+ host = '0.0.0.0'
43
+ port = 7860
44
+ print("#"*50,"--Application Serving Now--","#"*50)
45
+ # app.run(host=host,port=port)
46
+ app_serve = WSGIServer((host,port),app)
47
+ app_serve.serve_forever()
imageCaptionGPT.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle, os, io, re, gc
2
+ from keras.models import Model, load_model
3
+ from keras.applications.inception_v3 import InceptionV3
4
+ import numpy as np
5
+ from keras.applications.inception_v3 import preprocess_input
6
+ from keras.preprocessing.sequence import pad_sequences
7
+ from PIL import Image
8
+ from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
9
+
10
+
11
+ cwd = os.getcwd()
12
+
13
+ ############################### Custom Model #############################
14
+ embedding_dim = 300
15
+ count = 0
16
+ max_caption_length = 80
17
+ models = os.path.join(cwd, "model")
18
+ word_index_Mapping = pickle.load(
19
+ open(os.path.join(models, "word_index_Mapping.pkl"), "rb")
20
+ )
21
+ index_word_Mapping = pickle.load(
22
+ open(os.path.join(models, "index_word_Mapping.pkl"), "rb")
23
+ )
24
+ vocab_size = len(word_index_Mapping) + 1
25
+ incpmodel = InceptionV3(weights="imagenet")
26
+ inceptionModel = Model(incpmodel.input, incpmodel.layers[-2].output)
27
+ model_weights_save_path = os.path.join(models, "model.h5")
28
+ predictionModel = load_model(model_weights_save_path)
29
+
30
+
31
+ ############################### Hugging Face Model #################################
32
+ device='cpu'
33
+ encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
34
+ decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
35
+ model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
36
+ feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
37
+ tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
38
+ model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
39
+
40
+
41
+ def predictCustomModel(img):
42
+ img = img.resize((299, 299), Image.LANCZOS)
43
+ img = np.expand_dims(img, axis=0)
44
+ img = preprocess_input(img)
45
+ vectorImg = inceptionModel.predict(img)
46
+ in_text = "startSeq"
47
+ for i in range(1, max_caption_length):
48
+ seq = [
49
+ word_index_Mapping[w] for w in in_text.split() if w in word_index_Mapping
50
+ ]
51
+ in_seq = pad_sequences([seq], maxlen=max_caption_length)
52
+ inputs = [vectorImg, in_seq]
53
+ yhat = predictionModel.predict(x=inputs, verbose=0)
54
+ yhat = np.argmax(yhat)
55
+ word = index_word_Mapping[yhat]
56
+ in_text += " " + word
57
+ if word == "endSeq":
58
+ break
59
+ final = in_text.split()
60
+ final = final[1:-1]
61
+ final = " ".join(final)
62
+ predict = re.sub(r"\b(\w+)( \1\b)+", r"\1", final)
63
+ del img
64
+ del vectorImg
65
+ del final
66
+ del in_text
67
+ del seq
68
+ del inputs
69
+ gc.collect()
70
+ return predict
71
+
72
+
73
+
74
+ def predictHFModel(image, max_length=256, num_beams=4):
75
+ image = image.convert('RGB')
76
+ image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
77
+ clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
78
+ caption_ids = model.generate(image, max_length = max_length)[0]
79
+ caption_text = clean_text(tokenizer.decode(caption_ids))
80
+ prediction = re.sub(r"\b(\w+)( \1\b)+", r"\1", caption_text.strip())
81
+ del image
82
+ del clean_text
83
+ del caption_ids
84
+ del caption_text
85
+ gc.collect()
86
+ return prediction
model/.nomedia ADDED
File without changes
model/index_word_Mapping.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f7df635c73f93f4517742824bfa2725dca3b8eb427f7f62f22e3e57ae9b3d4
3
+ size 104193
model/model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b3be602761d77d995c495393eb3c65e566ef168cd4edcf0933b3e4a377af6b
3
+ size 37183580
model/word_index_Mapping.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fd0e4a9f1e156d7e9c30e2cd3cae9bd590f22e61c1038345b7877145b567f1
3
+ size 104193
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pillow
2
+ pickleshare
3
+ numpy
4
+ tensorflow-cpu
5
+ Flask
6
+ flask_cors
7
+ gevent
templates/.nomedia ADDED
File without changes
templates/home.html ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+
4
+ <head>
5
+ <title>Caption IT</title>
6
+ <style>
7
+ body {
8
+ background-image: url("https://i.pinimg.com/originals/cb/53/c1/cb53c15e1f01254fbdcd9cde60825dba.jpg");
9
+ font-family: Georgia, Times, serif;
10
+ color: white;
11
+ background-color: powderblue;
12
+ background-repeat: no-repeat;
13
+ }
14
+
15
+ img {
16
+ width:300px;
17
+ height:300px;
18
+ background-image: url("https://png.pngtree.com/png-vector/20210604/ourmid/pngtree-gray-network-placeholder-png-image_3416659.jpg");
19
+ background-position: center;
20
+ background-size: contain;
21
+ }
22
+
23
+ h1 {
24
+ font-family: Helvetica, Arial;
25
+ }
26
+
27
+ .content {
28
+ width: 800px;
29
+ height: 800px;
30
+ position: absolute;
31
+ /*Can also be `fixed`*/
32
+ left: 10%;
33
+ right: 0;
34
+ top: 10%;
35
+ bottom: 0;
36
+ margin: auto;
37
+ /*Solves a problem in which the content is being cut when the div is smaller than its' wrapper:*/
38
+ max-width: 100%;
39
+ max-height: 100%;
40
+ overflow: auto;
41
+ }
42
+
43
+ .btncls {
44
+ background-color: #B80C4D;
45
+ border: #2e6da4;
46
+ font-family: Arial, Geneva, Arial, Helvetica, sans-serif;
47
+ font-size: 15px;
48
+ color: #fff;
49
+ letter-spacing: 1px;
50
+ padding: 8px 12px;
51
+ font-size: 14px;
52
+ font-weight: normal;
53
+ border-radius: 4px;
54
+ line-height: 1.5;
55
+ text-decoration: none
56
+ }
57
+
58
+ textarea {
59
+ resize: none;
60
+ }
61
+ </style>
62
+ <script>
63
+ function uploadCheck() {
64
+ var nme = document.getElementById("file");
65
+ if (nme.value.length < 1) {
66
+ alert("Please select image to upload..");
67
+ nme.focus();
68
+ return false;
69
+ }
70
+ else {
71
+ document.getElementById("image").submit();
72
+ }
73
+ }
74
+ function refreshPage() {
75
+ window.location = "/"
76
+ }
77
+ </script>
78
+ </head>
79
+
80
+ <body>
81
+ <div class='content'>
82
+ <h3>Upload Image Here:
83
+ </h3>
84
+ <form name="image" id="image" method="POST" action={{ url_for( "predictCaption" ) }}
85
+ enctype="multipart/form-data">
86
+ <input type="file" class="btn" id="file" name="file" autocomplete="off"> &nbsp; &nbsp; &nbsp; &nbsp;
87
+ <input type="button" class="btncls" value="Upload" onClick="uploadCheck();">
88
+ <input type="button" class="btncls" value="Clear" onclick="refreshPage();">
89
+ </form>
90
+ <br><br>
91
+ <label style='font-family: "Comic Sans MS", cursive, sans-serif;'>Image : </label>
92
+ <br><br>
93
+ <image src="data:image/jpeg;base64,{{ img_data }}" height=300 width=300 />
94
+ <br><br>
95
+ <h3>{{ predictionCustomModel }}</h3>
96
+ <h3>{{ predictionHFModel }}</h3>
97
+ <br>
98
+ </div>
99
+ </body>
100
+
101
+ </html>
uploads/.nomedia ADDED
File without changes