Spaces:

sagar118
/

image-caption-api

Sleeping

App Files Files Community

sagar118 commited on Jan 7

Commit

8143e62

verified ·

1 Parent(s): 88079cc

Create app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
+from PIL import Image
+import torch
+import io
+app = Flask(__name__)
+CORS(app)
+print("Loading model...")
+model = VisionEncoderDecoderModel.from_pretrained(
+    "nlpconnect/vit-gpt2-image-captioning"
+)
+feature_extractor = ViTFeatureExtractor.from_pretrained(
+    "nlpconnect/vit-gpt2-image-captioning"
+)
+tokenizer = AutoTokenizer.from_pretrained(
+    "nlpconnect/vit-gpt2-image-captioning"
+)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+print("Model loaded successfully")
+def predict_caption(image: Image.Image):
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    pixel_values = feature_extractor(
+        images=[image], return_tensors="pt"
+    ).pixel_values.to(device)
+    output_ids = model.generate(
+        pixel_values,
+        max_length=16,
+        num_beams=4
+    )
+    preds = tokenizer.batch_decode(
+        output_ids, skip_special_tokens=True
+    )
+    return preds[0].strip()
+@app.route("/caption", methods=["POST"])
+def caption():
+    if "image" not in request.files:
+        return jsonify({"error": "No image provided"}), 400
+    image_file = request.files["image"]
+    image = Image.open(io.BytesIO(image_file.read()))
+    try:
+        caption = predict_caption(image)
+        return jsonify({"caption": caption})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/")
+def health():
+    return "Image Caption API is running"
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)