File size: 1,585 Bytes
a0e6aa2 7c75d85 a0e6aa2 75e38c4 60dffdb 75e38c4 60dffdb 7c75d85 75e38c4 7c75d85 75e38c4 a0e6aa2 75e38c4 a0e6aa2 75e38c4 7c75d85 60dffdb 7c75d85 75e38c4 ac5df15 75e38c4 7c75d85 75e38c4 7c75d85 a0e6aa2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import gradio as gr
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
import torch
# Load processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
model.eval()
# Resize function
def resize_image(image):
if image is not None:
max_size = 512
image.thumbnail((max_size, max_size))
return image
# Answer question function
def answer_question(resized_image, question):
if resized_image is None or question.strip() == "":
return "Please upload an image and ask a question."
inputs = processor(resized_image, question, return_tensors="pt")
with torch.no_grad():
output = model.generate(**inputs)
return processor.decode(output[0], skip_special_tokens=True)
# Gradio UI
with gr.Blocks(title="BLIP VQA App (Salesforce/blip-vqa-base)") as demo:
gr.Markdown("## 📷 Visual Question Answering with BLIP VQA\nUpload an image and ask a question about it.")
image_input = gr.Image(type="pil", label="Upload Image")
resized_image = gr.State()
question_input = gr.Textbox(label="Question", placeholder="What is in the image?")
ask_button = gr.Button("Ask")
answer_output = gr.Textbox(label="Answer")
# Resize image on upload
image_input.change(fn=resize_image, inputs=image_input, outputs=resized_image)
# Ask button triggers VQA
ask_button.click(fn=answer_question, inputs=[resized_image, question_input], outputs=answer_output)
demo.launch()
|