image2text-faiss-demo / scripts /generate_blip_caption.py
Stephen Ebert
Add app, requirements and helper scripts
ce53f55
raw
history blame contribute delete
746 Bytes
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
# Load model & processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# Local COCO image (replace with any valid file)
image_path = "/Users/steph/Library/CloudStorage/OneDrive-Personal/Desktop/Springboard/Springboard/Capstone/step2/data/coco/train2017/000000000009.jpg"
image = Image.open(image_path).convert("RGB")
# Generate caption
inputs = processor(image, return_tensors="pt")
out_ids = model.generate(**inputs)
caption = processor.decode(out_ids[0], skip_special_tokens=True)
print("Generated caption:", caption)