Spaces:
Sleeping
Sleeping
Commit ·
dbd1524
1
Parent(s): 6c768db
Initial Commit
Browse files- DockerFile +17 -0
- app.py +47 -0
- imageCaptionGPT.py +86 -0
- model/.nomedia +0 -0
- model/index_word_Mapping.pkl +3 -0
- model/model.h5 +3 -0
- model/word_index_Mapping.pkl +3 -0
- requirements.txt +7 -0
- templates/.nomedia +0 -0
- templates/home.html +101 -0
- uploads/.nomedia +0 -0
DockerFile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10.11
|
| 2 |
+
|
| 3 |
+
WORKDIR /code
|
| 4 |
+
|
| 5 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 6 |
+
|
| 7 |
+
COPY ./packages.txt /code/packages.txt
|
| 8 |
+
|
| 9 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 10 |
+
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
RUN chmod -R 777 .
|
| 14 |
+
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
CMD ["python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, io, base64
|
| 2 |
+
from imageCaptionGPT import predictCustomModel, predictHFModel
|
| 3 |
+
from flask import Flask, render_template, request
|
| 4 |
+
from flask_cors import CORS
|
| 5 |
+
from PIL import Image
|
| 6 |
+
from gevent.pywsgi import WSGIServer
|
| 7 |
+
|
| 8 |
+
app = Flask(__name__)
|
| 9 |
+
CORS(app)
|
| 10 |
+
|
| 11 |
+
cwd = os.getcwd()
|
| 12 |
+
|
| 13 |
+
app.config["ALLOWED_EXTENSIONS"] = [".jpg", ".png"]
|
| 14 |
+
app.config["MAX_CONTENT_LENGTH"] = 1024 * 1024
|
| 15 |
+
app.config["UPLOAD_FOLDER"] = os.path.join(cwd, "uploads")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@app.route("/")
|
| 19 |
+
def home():
|
| 20 |
+
return render_template("home.html", predictionCustomModel="", predictionHFModel="", img_data="")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@app.route("/predict", methods=["POST"])
|
| 24 |
+
def predictCaption():
|
| 25 |
+
file = request.files["file"]
|
| 26 |
+
imagePath = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
|
| 27 |
+
file.save(imagePath)
|
| 28 |
+
img = Image.open(imagePath)
|
| 29 |
+
with io.BytesIO() as buf:
|
| 30 |
+
img.save(buf, "jpeg")
|
| 31 |
+
image_bytes = buf.getvalue()
|
| 32 |
+
encoded_string = base64.b64encode(image_bytes).decode()
|
| 33 |
+
predictionCustomModel = predictCustomModel(img)
|
| 34 |
+
predictionHFModel = predictHFModel(img)
|
| 35 |
+
predictionCustomModel = "Custom Model:- \'"+str(predictionCustomModel)+"\'"
|
| 36 |
+
predictionHFModel = "HuggingFace Model:- \'"+str(predictionHFModel)+"\'"
|
| 37 |
+
os.remove(imagePath)
|
| 38 |
+
return render_template("home.html", predictionCustomModel=predictionCustomModel, predictionHFModel=predictionHFModel, img_data=encoded_string)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
if __name__ == '__main__':
|
| 42 |
+
host = '0.0.0.0'
|
| 43 |
+
port = 7860
|
| 44 |
+
print("#"*50,"--Application Serving Now--","#"*50)
|
| 45 |
+
# app.run(host=host,port=port)
|
| 46 |
+
app_serve = WSGIServer((host,port),app)
|
| 47 |
+
app_serve.serve_forever()
|
imageCaptionGPT.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle, os, io, re, gc
|
| 2 |
+
from keras.models import Model, load_model
|
| 3 |
+
from keras.applications.inception_v3 import InceptionV3
|
| 4 |
+
import numpy as np
|
| 5 |
+
from keras.applications.inception_v3 import preprocess_input
|
| 6 |
+
from keras.preprocessing.sequence import pad_sequences
|
| 7 |
+
from PIL import Image
|
| 8 |
+
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
cwd = os.getcwd()
|
| 12 |
+
|
| 13 |
+
############################### Custom Model #############################
|
| 14 |
+
embedding_dim = 300
|
| 15 |
+
count = 0
|
| 16 |
+
max_caption_length = 80
|
| 17 |
+
models = os.path.join(cwd, "model")
|
| 18 |
+
word_index_Mapping = pickle.load(
|
| 19 |
+
open(os.path.join(models, "word_index_Mapping.pkl"), "rb")
|
| 20 |
+
)
|
| 21 |
+
index_word_Mapping = pickle.load(
|
| 22 |
+
open(os.path.join(models, "index_word_Mapping.pkl"), "rb")
|
| 23 |
+
)
|
| 24 |
+
vocab_size = len(word_index_Mapping) + 1
|
| 25 |
+
incpmodel = InceptionV3(weights="imagenet")
|
| 26 |
+
inceptionModel = Model(incpmodel.input, incpmodel.layers[-2].output)
|
| 27 |
+
model_weights_save_path = os.path.join(models, "model.h5")
|
| 28 |
+
predictionModel = load_model(model_weights_save_path)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
############################### Hugging Face Model #################################
|
| 32 |
+
device='cpu'
|
| 33 |
+
encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
|
| 34 |
+
decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
|
| 35 |
+
model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
|
| 36 |
+
feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
|
| 37 |
+
tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
|
| 38 |
+
model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def predictCustomModel(img):
|
| 42 |
+
img = img.resize((299, 299), Image.LANCZOS)
|
| 43 |
+
img = np.expand_dims(img, axis=0)
|
| 44 |
+
img = preprocess_input(img)
|
| 45 |
+
vectorImg = inceptionModel.predict(img)
|
| 46 |
+
in_text = "startSeq"
|
| 47 |
+
for i in range(1, max_caption_length):
|
| 48 |
+
seq = [
|
| 49 |
+
word_index_Mapping[w] for w in in_text.split() if w in word_index_Mapping
|
| 50 |
+
]
|
| 51 |
+
in_seq = pad_sequences([seq], maxlen=max_caption_length)
|
| 52 |
+
inputs = [vectorImg, in_seq]
|
| 53 |
+
yhat = predictionModel.predict(x=inputs, verbose=0)
|
| 54 |
+
yhat = np.argmax(yhat)
|
| 55 |
+
word = index_word_Mapping[yhat]
|
| 56 |
+
in_text += " " + word
|
| 57 |
+
if word == "endSeq":
|
| 58 |
+
break
|
| 59 |
+
final = in_text.split()
|
| 60 |
+
final = final[1:-1]
|
| 61 |
+
final = " ".join(final)
|
| 62 |
+
predict = re.sub(r"\b(\w+)( \1\b)+", r"\1", final)
|
| 63 |
+
del img
|
| 64 |
+
del vectorImg
|
| 65 |
+
del final
|
| 66 |
+
del in_text
|
| 67 |
+
del seq
|
| 68 |
+
del inputs
|
| 69 |
+
gc.collect()
|
| 70 |
+
return predict
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def predictHFModel(image, max_length=256, num_beams=4):
|
| 75 |
+
image = image.convert('RGB')
|
| 76 |
+
image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
|
| 77 |
+
clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
|
| 78 |
+
caption_ids = model.generate(image, max_length = max_length)[0]
|
| 79 |
+
caption_text = clean_text(tokenizer.decode(caption_ids))
|
| 80 |
+
prediction = re.sub(r"\b(\w+)( \1\b)+", r"\1", caption_text.strip())
|
| 81 |
+
del image
|
| 82 |
+
del clean_text
|
| 83 |
+
del caption_ids
|
| 84 |
+
del caption_text
|
| 85 |
+
gc.collect()
|
| 86 |
+
return prediction
|
model/.nomedia
ADDED
|
File without changes
|
model/index_word_Mapping.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89f7df635c73f93f4517742824bfa2725dca3b8eb427f7f62f22e3e57ae9b3d4
|
| 3 |
+
size 104193
|
model/model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67b3be602761d77d995c495393eb3c65e566ef168cd4edcf0933b3e4a377af6b
|
| 3 |
+
size 37183580
|
model/word_index_Mapping.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10fd0e4a9f1e156d7e9c30e2cd3cae9bd590f22e61c1038345b7877145b567f1
|
| 3 |
+
size 104193
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pillow
|
| 2 |
+
pickleshare
|
| 3 |
+
numpy
|
| 4 |
+
tensorflow-cpu
|
| 5 |
+
Flask
|
| 6 |
+
flask_cors
|
| 7 |
+
gevent
|
templates/.nomedia
ADDED
|
File without changes
|
templates/home.html
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<title>Caption IT</title>
|
| 6 |
+
<style>
|
| 7 |
+
body {
|
| 8 |
+
background-image: url("https://i.pinimg.com/originals/cb/53/c1/cb53c15e1f01254fbdcd9cde60825dba.jpg");
|
| 9 |
+
font-family: Georgia, Times, serif;
|
| 10 |
+
color: white;
|
| 11 |
+
background-color: powderblue;
|
| 12 |
+
background-repeat: no-repeat;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
img {
|
| 16 |
+
width:300px;
|
| 17 |
+
height:300px;
|
| 18 |
+
background-image: url("https://png.pngtree.com/png-vector/20210604/ourmid/pngtree-gray-network-placeholder-png-image_3416659.jpg");
|
| 19 |
+
background-position: center;
|
| 20 |
+
background-size: contain;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
h1 {
|
| 24 |
+
font-family: Helvetica, Arial;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.content {
|
| 28 |
+
width: 800px;
|
| 29 |
+
height: 800px;
|
| 30 |
+
position: absolute;
|
| 31 |
+
/*Can also be `fixed`*/
|
| 32 |
+
left: 10%;
|
| 33 |
+
right: 0;
|
| 34 |
+
top: 10%;
|
| 35 |
+
bottom: 0;
|
| 36 |
+
margin: auto;
|
| 37 |
+
/*Solves a problem in which the content is being cut when the div is smaller than its' wrapper:*/
|
| 38 |
+
max-width: 100%;
|
| 39 |
+
max-height: 100%;
|
| 40 |
+
overflow: auto;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.btncls {
|
| 44 |
+
background-color: #B80C4D;
|
| 45 |
+
border: #2e6da4;
|
| 46 |
+
font-family: Arial, Geneva, Arial, Helvetica, sans-serif;
|
| 47 |
+
font-size: 15px;
|
| 48 |
+
color: #fff;
|
| 49 |
+
letter-spacing: 1px;
|
| 50 |
+
padding: 8px 12px;
|
| 51 |
+
font-size: 14px;
|
| 52 |
+
font-weight: normal;
|
| 53 |
+
border-radius: 4px;
|
| 54 |
+
line-height: 1.5;
|
| 55 |
+
text-decoration: none
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
textarea {
|
| 59 |
+
resize: none;
|
| 60 |
+
}
|
| 61 |
+
</style>
|
| 62 |
+
<script>
|
| 63 |
+
function uploadCheck() {
|
| 64 |
+
var nme = document.getElementById("file");
|
| 65 |
+
if (nme.value.length < 1) {
|
| 66 |
+
alert("Please select image to upload..");
|
| 67 |
+
nme.focus();
|
| 68 |
+
return false;
|
| 69 |
+
}
|
| 70 |
+
else {
|
| 71 |
+
document.getElementById("image").submit();
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
function refreshPage() {
|
| 75 |
+
window.location = "/"
|
| 76 |
+
}
|
| 77 |
+
</script>
|
| 78 |
+
</head>
|
| 79 |
+
|
| 80 |
+
<body>
|
| 81 |
+
<div class='content'>
|
| 82 |
+
<h3>Upload Image Here:
|
| 83 |
+
</h3>
|
| 84 |
+
<form name="image" id="image" method="POST" action={{ url_for( "predictCaption" ) }}
|
| 85 |
+
enctype="multipart/form-data">
|
| 86 |
+
<input type="file" class="btn" id="file" name="file" autocomplete="off">
|
| 87 |
+
<input type="button" class="btncls" value="Upload" onClick="uploadCheck();">
|
| 88 |
+
<input type="button" class="btncls" value="Clear" onclick="refreshPage();">
|
| 89 |
+
</form>
|
| 90 |
+
<br><br>
|
| 91 |
+
<label style='font-family: "Comic Sans MS", cursive, sans-serif;'>Image : </label>
|
| 92 |
+
<br><br>
|
| 93 |
+
<image src="data:image/jpeg;base64,{{ img_data }}" height=300 width=300 />
|
| 94 |
+
<br><br>
|
| 95 |
+
<h3>{{ predictionCustomModel }}</h3>
|
| 96 |
+
<h3>{{ predictionHFModel }}</h3>
|
| 97 |
+
<br>
|
| 98 |
+
</div>
|
| 99 |
+
</body>
|
| 100 |
+
|
| 101 |
+
</html>
|
uploads/.nomedia
ADDED
|
File without changes
|