| from transformers import BlipForConditionalGeneration | |
| from transformers import AutoProcessor | |
| from PIL import Image | |
| import requests | |
| import gradio as gr | |
| url = "http://images.cocodataset.org/val2017/000000039769.jpg" | |
| image = Image.open(requests.get(url, stream=True).raw).convert("RGB") | |
| processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| image = processor(image, return_tensors="pt") | |
| generated_ids = model.generate(**image) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() | |
| print(generated_text) | |
| def launch(input): | |
| url = input | |
| image = Image.open(requests.get(url, stream=True).raw).convert("RGB") | |
| processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| image = processor(image, return_tensors="pt") | |
| generated_ids = model.generate(**image) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() | |
| return generated_text | |
| iface = gr.Interface(fn=launch, inputs="text", outputs="text") | |
| iface.launch() | |