from transformers import pipeline from PIL import Image import gradio as gr # Load BLIP VQA model vqa_pipeline = pipeline( "visual-question-answering", model="Salesforce/blip-vqa-capfilt-large" ) def answer_question(image, question): if image is None or question.strip() == "": return "Please provide an image and a question." outputs = vqa_pipeline(image, question, top_k=1) return outputs[0]["answer"] with gr.Blocks() as demo: gr.Markdown("# 🖼️ Visual Question Answering with BLIP") with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload an Image") question_input = gr.Textbox(label="Enter your question") submit_btn = gr.Button("Get Answer") with gr.Column(): output_text = gr.Textbox(label="Answer") submit_btn.click(fn=answer_question, inputs=[image_input, question_input], outputs=output_text) demo.launch()