Spaces:

bishalbose294
/

captionGPT

Sleeping

App Files Files Community

bishalbose294 commited on Jul 22, 2023

Commit

dbd1524

1 Parent(s): 6c768db

Initial Commit

Browse files

Files changed (11) hide show

DockerFile +17 -0
app.py +47 -0
imageCaptionGPT.py +86 -0
model/.nomedia +0 -0
model/index_word_Mapping.pkl +3 -0
model/model.h5 +3 -0
model/word_index_Mapping.pkl +3 -0
requirements.txt +7 -0
templates/.nomedia +0 -0
templates/home.html +101 -0
uploads/.nomedia +0 -0

DockerFile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.10.11
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+COPY ./packages.txt /code/packages.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+RUN chmod -R 777 .
+EXPOSE 7860
+CMD ["python", "app.py", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os, io, base64
+from imageCaptionGPT import predictCustomModel, predictHFModel
+from flask import Flask, render_template, request
+from flask_cors import CORS
+from PIL import Image
+from gevent.pywsgi import WSGIServer
+app = Flask(__name__)
+CORS(app)
+cwd = os.getcwd()
+app.config["ALLOWED_EXTENSIONS"] = [".jpg", ".png"]
+app.config["MAX_CONTENT_LENGTH"] = 1024 * 1024
+app.config["UPLOAD_FOLDER"] = os.path.join(cwd, "uploads")
+@app.route("/")
+def home():
+    return render_template("home.html", predictionCustomModel="", predictionHFModel="", img_data="")
+@app.route("/predict", methods=["POST"])
+def predictCaption():
+    file = request.files["file"]
+    imagePath = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
+    file.save(imagePath)
+    img = Image.open(imagePath)
+    with io.BytesIO() as buf:
+        img.save(buf, "jpeg")
+        image_bytes = buf.getvalue()
+    encoded_string = base64.b64encode(image_bytes).decode()
+    predictionCustomModel = predictCustomModel(img)
+    predictionHFModel = predictHFModel(img)
+    predictionCustomModel = "Custom Model:- \'"+str(predictionCustomModel)+"\'"
+    predictionHFModel = "HuggingFace Model:- \'"+str(predictionHFModel)+"\'"
+    os.remove(imagePath)
+    return render_template("home.html", predictionCustomModel=predictionCustomModel, predictionHFModel=predictionHFModel, img_data=encoded_string)
+if __name__ == '__main__':
+    host = '0.0.0.0'
+    port = 7860
+    print("#"*50,"--Application Serving Now--","#"*50)
+    # app.run(host=host,port=port)
+    app_serve = WSGIServer((host,port),app)
+    app_serve.serve_forever()

imageCaptionGPT.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import pickle, os, io, re, gc
+from keras.models import Model, load_model
+from keras.applications.inception_v3 import InceptionV3
+import numpy as np
+from keras.applications.inception_v3 import preprocess_input
+from keras.preprocessing.sequence import pad_sequences
+from PIL import Image
+from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
+cwd = os.getcwd()
+############################### Custom Model #############################
+embedding_dim = 300
+count = 0
+max_caption_length = 80
+models = os.path.join(cwd, "model")
+word_index_Mapping = pickle.load(
+    open(os.path.join(models, "word_index_Mapping.pkl"), "rb")
+)
+index_word_Mapping = pickle.load(
+    open(os.path.join(models, "index_word_Mapping.pkl"), "rb")
+)
+vocab_size = len(word_index_Mapping) + 1
+incpmodel = InceptionV3(weights="imagenet")
+inceptionModel = Model(incpmodel.input, incpmodel.layers[-2].output)
+model_weights_save_path = os.path.join(models, "model.h5")
+predictionModel = load_model(model_weights_save_path)
+############################### Hugging Face Model #################################
+device='cpu'
+encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
+decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
+model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
+feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
+model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
+def predictCustomModel(img):
+    img = img.resize((299, 299), Image.LANCZOS)
+    img = np.expand_dims(img, axis=0)
+    img = preprocess_input(img)
+    vectorImg = inceptionModel.predict(img)
+    in_text = "startSeq"
+    for i in range(1, max_caption_length):
+        seq = [
+            word_index_Mapping[w] for w in in_text.split() if w in word_index_Mapping
+        ]
+        in_seq = pad_sequences([seq], maxlen=max_caption_length)
+        inputs = [vectorImg, in_seq]
+        yhat = predictionModel.predict(x=inputs, verbose=0)
+        yhat = np.argmax(yhat)
+        word = index_word_Mapping[yhat]
+        in_text += " " + word
+        if word == "endSeq":
+            break
+    final = in_text.split()
+    final = final[1:-1]
+    final = " ".join(final)
+    predict = re.sub(r"\b(\w+)( \1\b)+", r"\1", final)
+    del img
+    del vectorImg
+    del final
+    del in_text
+    del seq
+    del inputs
+    gc.collect()
+    return predict
+def predictHFModel(image, max_length=256, num_beams=4):
+   image = image.convert('RGB')
+   image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
+   clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
+   caption_ids = model.generate(image, max_length = max_length)[0]
+   caption_text = clean_text(tokenizer.decode(caption_ids))
+   prediction = re.sub(r"\b(\w+)( \1\b)+", r"\1", caption_text.strip())
+   del image
+   del clean_text
+   del caption_ids
+   del caption_text
+   gc.collect()
+   return prediction

model/.nomedia ADDED Viewed

File without changes

model/index_word_Mapping.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89f7df635c73f93f4517742824bfa2725dca3b8eb427f7f62f22e3e57ae9b3d4
+size 104193

model/model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67b3be602761d77d995c495393eb3c65e566ef168cd4edcf0933b3e4a377af6b
+size 37183580

model/word_index_Mapping.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10fd0e4a9f1e156d7e9c30e2cd3cae9bd590f22e61c1038345b7877145b567f1
+size 104193

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pillow
+pickleshare
+numpy
+tensorflow-cpu
+Flask
+flask_cors
+gevent

templates/.nomedia ADDED Viewed

File without changes

templates/home.html ADDED Viewed

	@@ -0,0 +1,101 @@

+<!DOCTYPE html>
+<html>
+<head>
+   <title>Caption IT</title>
+   <style>
+      body {
+         background-image: url("https://i.pinimg.com/originals/cb/53/c1/cb53c15e1f01254fbdcd9cde60825dba.jpg");
+         font-family: Georgia, Times, serif;
+         color: white;
+         background-color: powderblue;
+         background-repeat: no-repeat;
+      }
+      img {
+         width:300px;
+         height:300px;
+         background-image: url("https://png.pngtree.com/png-vector/20210604/ourmid/pngtree-gray-network-placeholder-png-image_3416659.jpg");
+         background-position: center;
+         background-size: contain;
+      }
+      h1 {
+         font-family: Helvetica, Arial;
+      }
+      .content {
+         width: 800px;
+         height: 800px;
+         position: absolute;
+         /*Can also be `fixed`*/
+         left: 10%;
+         right: 0;
+         top: 10%;
+         bottom: 0;
+         margin: auto;
+         /*Solves a problem in which the content is being cut when the div is smaller than its' wrapper:*/
+         max-width: 100%;
+         max-height: 100%;
+         overflow: auto;
+      }
+      .btncls {
+         background-color: #B80C4D;
+         border: #2e6da4;
+         font-family: Arial, Geneva, Arial, Helvetica, sans-serif;
+         font-size: 15px;
+         color: #fff;
+         letter-spacing: 1px;
+         padding: 8px 12px;
+         font-size: 14px;
+         font-weight: normal;
+         border-radius: 4px;
+         line-height: 1.5;
+         text-decoration: none
+      }
+      textarea {
+         resize: none;
+      }
+   </style>
+   <script>
+      function uploadCheck() {
+         var nme = document.getElementById("file");
+         if (nme.value.length < 1) {
+            alert("Please select image to upload..");
+            nme.focus();
+            return false;
+         }
+         else {
+            document.getElementById("image").submit();
+         }
+      }
+      function refreshPage() {
+         window.location = "/"
+      }
+   </script>
+</head>
+<body>
+   <div class='content'>
+      <h3>Upload Image Here:
+      </h3>
+      <form name="image" id="image" method="POST" action={{ url_for( "predictCaption" ) }}
+         enctype="multipart/form-data">
+         <input type="file" class="btn" id="file" name="file" autocomplete="off"> &nbsp; &nbsp; &nbsp; &nbsp;
+         <input type="button" class="btncls" value="Upload" onClick="uploadCheck();">
+         <input type="button" class="btncls" value="Clear" onclick="refreshPage();">
+      </form>
+      <br><br>
+      <label style='font-family: "Comic Sans MS", cursive, sans-serif;'>Image : </label>
+         <br><br>
+         <image src="data:image/jpeg;base64,{{ img_data }}" height=300 width=300 />
+         <br><br>
+         <h3>{{ predictionCustomModel }}</h3>
+         <h3>{{ predictionHFModel }}</h3>
+      <br>
+   </div>
+</body>
+</html>

uploads/.nomedia ADDED Viewed

File without changes