| import gradio as gr |
| from PIL import Image |
| from transformers import BlipProcessor, BlipForQuestionAnswering |
|
|
| |
| AVAILABLE_MODELS = { |
| "BLIP VQA Base": "Salesforce/blip-vqa-base", |
| "BLIP VQA Large (CapFilt)": "Salesforce/blip-vqa-capfilt-large", |
| } |
|
|
| |
| current_model_name = list(AVAILABLE_MODELS.keys())[0] |
| processor = BlipProcessor.from_pretrained(AVAILABLE_MODELS[current_model_name]) |
| model = BlipForQuestionAnswering.from_pretrained(AVAILABLE_MODELS[current_model_name]) |
|
|
|
|
| |
| def change_model(model_choice): |
| global processor, model, current_model_name |
| current_model_name = model_choice |
| model_id = AVAILABLE_MODELS[model_choice] |
| processor = BlipProcessor.from_pretrained(model_id) |
| model = BlipForQuestionAnswering.from_pretrained(model_id) |
| return f"✅ Switched to: {model_choice}" |
|
|
|
|
| |
| def answer_question(history, image, question): |
| if image is None: |
| return history + [("Please upload an image first.", None)] |
| if not question.strip(): |
| return history + [("Please enter a question.", None)] |
|
|
| inputs = processor(image, question, return_tensors="pt") |
| out = model.generate(**inputs, max_new_tokens=50) |
| answer = processor.decode(out[0], skip_special_tokens=True) |
| reply = f"🤖({current_model_name}) Answer: {answer}" |
| return history + [(question, reply)] |
|
|
|
|
| |
| def reset_chat(_): |
| return [] |
|
|
|
|
| |
| def build_ui(): |
| with gr.Blocks(title="Vision-Language Chatbot") as demo: |
| gr.Markdown("## 🤖 Vision-Language Chatbot") |
| gr.Markdown("Upload an image and ask multiple questions about it!") |
|
|
| |
| model_selector = gr.Dropdown( |
| choices=list(AVAILABLE_MODELS.keys()), |
| value=current_model_name, |
| label="Select Model", |
| ) |
| model_status = gr.Markdown(f"✅ Current model: {current_model_name}") |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| image_input = gr.Image(type="pil", label="Upload Image") |
|
|
| with gr.Column(scale=2): |
| question_input = gr.Textbox( |
| placeholder="Ask something about the image...", |
| label="Question", |
| ) |
| ask_btn = gr.Button("Ask", variant="primary") |
| clear_btn = gr.Button("Clear Chat") |
|
|
| chatbot = gr.Chatbot(height=400, label="Chat History") |
|
|
| |
| ask_btn.click( |
| fn=answer_question, |
| inputs=[chatbot, image_input, question_input], |
| outputs=chatbot, |
| ) |
|
|
| clear_btn.click(fn=lambda: [], outputs=chatbot) |
| image_input.change(fn=reset_chat, inputs=image_input, outputs=chatbot) |
|
|
| model_selector.change( |
| fn=change_model, |
| inputs=model_selector, |
| outputs=model_status, |
| ) |
|
|
| |
| gr.Examples( |
| examples=[ |
| ["sample_images/app.jpg", "How many apples are in the picture?"], |
| ["sample_images/cat_dog.jpg", "What animals are in the image?"], |
| ["sample_images/city.jpg", "What is the man doing?"] |
| ], |
| inputs=[image_input, question_input], |
| label="🏞️ Example Inputs", |
| ) |
|
|
| return demo |
|
|
|
|
| if __name__ == "__main__": |
| build_ui().launch() |
|
|