| import gradio as gr |
| from transformers import BlipProcessor, BlipForConditionalGeneration |
|
|
| |
| model_name = "Salesforce/blip-image-captioning-large" |
| processor = BlipProcessor.from_pretrained(model_name) |
| model = BlipForConditionalGeneration.from_pretrained(model_name) |
|
|
| def generate_caption(image): |
| |
| inputs = processor(images=image, return_tensors="pt") |
|
|
| |
| caption = model.generate(**inputs) |
|
|
| |
| decoded_caption = processor.decode(caption[0], skip_special_tokens=True) |
| return decoded_caption |
|
|
| |
| inputs = gr.inputs.Image(label="Upload an image") |
| outputs = gr.outputs.Textbox(label="Generated Caption") |
|
|
| |
| gr.Interface(fn=generate_caption, inputs=inputs, outputs=outputs).launch() |