mbwanaf commited on
Commit
75e38c4
·
verified ·
1 Parent(s): 60dffdb

update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -8,36 +8,38 @@ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
8
  model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
9
  model.eval()
10
 
11
- # Resize uploaded image immediately
12
  def resize_image(image):
13
- if image:
14
  max_size = 512
15
  image.thumbnail((max_size, max_size))
16
  return image
17
 
18
- # VQA answer function
19
- def answer_question(image, question):
20
- if image is None or question.strip() == "":
21
  return "Please upload an image and ask a question."
22
-
23
- inputs = processor(image, question, return_tensors="pt")
24
  with torch.no_grad():
25
  output = model.generate(**inputs)
26
- answer = processor.decode(output[0], skip_special_tokens=True)
27
- return answer
28
 
29
- # Gradio app layout
30
  with gr.Blocks(title="BLIP VQA App (Salesforce/blip-vqa-base)") as demo:
31
  gr.Markdown("## 📷 Visual Question Answering with BLIP VQA\nUpload an image and ask a question about it.")
32
 
33
- with gr.Row():
34
- image_input = gr.Image(type="pil", label="Upload Image").upload(resize_image)
35
- question_input = gr.Textbox(label="Question", placeholder="What is in the image?")
 
 
 
36
 
37
- with gr.Row():
38
- ask_button = gr.Button("Ask")
39
- answer_output = gr.Textbox(label="Answer")
40
 
41
- ask_button.click(fn=answer_question, inputs=[image_input, question_input], outputs=answer_output)
 
42
 
43
  demo.launch()
 
8
  model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
9
  model.eval()
10
 
11
+ # Resize function
12
  def resize_image(image):
13
+ if image is not None:
14
  max_size = 512
15
  image.thumbnail((max_size, max_size))
16
  return image
17
 
18
+ # Answer question function
19
+ def answer_question(resized_image, question):
20
+ if resized_image is None or question.strip() == "":
21
  return "Please upload an image and ask a question."
22
+
23
+ inputs = processor(resized_image, question, return_tensors="pt")
24
  with torch.no_grad():
25
  output = model.generate(**inputs)
26
+ return processor.decode(output[0], skip_special_tokens=True)
 
27
 
28
+ # Gradio UI
29
  with gr.Blocks(title="BLIP VQA App (Salesforce/blip-vqa-base)") as demo:
30
  gr.Markdown("## 📷 Visual Question Answering with BLIP VQA\nUpload an image and ask a question about it.")
31
 
32
+ image_input = gr.Image(type="pil", label="Upload Image")
33
+ resized_image = gr.State()
34
+
35
+ question_input = gr.Textbox(label="Question", placeholder="What is in the image?")
36
+ ask_button = gr.Button("Ask")
37
+ answer_output = gr.Textbox(label="Answer")
38
 
39
+ # Resize image on upload
40
+ image_input.change(fn=resize_image, inputs=image_input, outputs=resized_image)
 
41
 
42
+ # Ask button triggers VQA
43
+ ask_button.click(fn=answer_question, inputs=[resized_image, question_input], outputs=answer_output)
44
 
45
  demo.launch()