import gradio as gr from transformers import pipeline # Pretrained vision-to-text model captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") def generate_caption(image): result = captioner(image)[0]['generated_text'] return result # Gradio UI demo = gr.Interface( fn=generate_caption, inputs=gr.Image(type="filepath"), outputs=gr.Textbox(label="Generated Caption"), title="Mini Image Captioner", description="Upload an image and get a natural language caption (Vision + LLM)" ) if __name__ == "__main__": demo.launch()