from fastapi import FastAPI, UploadFile, File from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import torch import io app = FastAPI(title="QuickCare Captioning Endpoint") blip_model = BlipForConditionalGeneration.from_pretrained( "Salesforce/blip-image-captioning-large" ) blip_processor = BlipProcessor.from_pretrained( "Salesforce/blip-image-captioning-large" ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") blip_model.to(device) @app.post("/caption") async def generate_caption(image: UploadFile = File(...)): image_bytes = await image.read() img = Image.open(io.BytesIO(image_bytes)).convert("RGB") inputs = blip_processor(images=img, return_tensors="pt").to(device) with torch.no_grad(): caption_ids = blip_model.generate(**inputs, max_new_tokens=60) caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True) return {"caption": caption}