import os os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" from flask import Flask, request, jsonify from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import torch import io app = Flask(__name__) # Load model and processor processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") model.eval() device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) @app.route("/caption", methods=["POST"]) def caption_image(): if 'image' not in request.files: return jsonify({"error": "No image file provided"}), 400 try: image_file = request.files['image'] image = Image.open(io.BytesIO(image_file.read())).convert("RGB") processed_image = processor(images=image, return_tensors="pt").pixel_values.to(device) with torch.no_grad(): output = model.generate(processed_image) caption = processor.decode(output[0], skip_special_tokens=True) return jsonify({"caption": caption}) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)