| | import gradio as gr |
| | from transformers import BlipProcessor, BlipForQuestionAnswering |
| | from PIL import Image |
| | import torch |
| |
|
| | |
| | processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") |
| | model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") |
| | model.eval() |
| |
|
| | |
| | def resize_image(image): |
| | if image is not None: |
| | max_size = 512 |
| | image.thumbnail((max_size, max_size)) |
| | return image |
| |
|
| | |
| | def answer_question(resized_image, question): |
| | if resized_image is None or question.strip() == "": |
| | return "Please upload an image and ask a question." |
| | |
| | inputs = processor(resized_image, question, return_tensors="pt") |
| | with torch.no_grad(): |
| | output = model.generate(**inputs) |
| | return processor.decode(output[0], skip_special_tokens=True) |
| |
|
| | |
| | with gr.Blocks(title="BLIP VQA App (Salesforce/blip-vqa-base)") as demo: |
| | gr.Markdown("## 📷 Visual Question Answering with BLIP VQA\nUpload an image and ask a question about it.") |
| |
|
| | image_input = gr.Image(type="pil", label="Upload Image") |
| | resized_image = gr.State() |
| |
|
| | question_input = gr.Textbox(label="Question", placeholder="What is in the image?") |
| | ask_button = gr.Button("Ask") |
| | answer_output = gr.Textbox(label="Answer") |
| |
|
| | |
| | image_input.change(fn=resize_image, inputs=image_input, outputs=resized_image) |
| |
|
| | |
| | ask_button.click(fn=answer_question, inputs=[resized_image, question_input], outputs=answer_output) |
| |
|
| | demo.launch() |
| |
|