Spaces:
Sleeping
Sleeping
| import requests | |
| from PIL import Image | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| # Load the models and processor | |
| processor_base = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| processor_large = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
| model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") | |
| # Define the function for unconditional image captioning | |
| def caption_image_unconditional(image): | |
| inputs = processor_base(image, return_tensors="pt") | |
| outputs = model_base.generate(**inputs) | |
| caption = processor_base.decode(outputs[0], skip_special_tokens=True) | |
| return caption | |
| # Define the function for conditional image captioning | |
| def caption_image_conditional(image, text): | |
| inputs = processor_large(image, text, return_tensors="pt") | |
| outputs = model_large.generate(**inputs) | |
| caption = processor_large.decode(outputs[0], skip_special_tokens=True) | |
| return caption | |