import gradio as gr from transformers import pipeline pipe = pipeline( "image-text-to-text", model="Salesforce/blip-image-captioning-base" ) def launch(input): out = pipe(input, text="") return out[0]['generated_text'] iface = gr.Interface( launch, inputs=gr.Image(type='pil'), outputs="text" ) iface.launch()