ProfRom commited on
Commit
3b69f5c
·
verified ·
1 Parent(s): 41e16fb

Davis - Final submission

Browse files
Files changed (1) hide show
  1. app.py +36 -18
app.py CHANGED
@@ -1,39 +1,57 @@
1
- import torch
2
- import gradio as gr
3
- from transformers import BlipProcessor, BlipForQuestionAnswering
4
 
5
- model_name = "Salesforce/blip-vqa-base"
6
 
7
- processor = BlipProcessor.from_pretrained(model_name)
8
- model = BlipForQuestionAnswering.from_pretrained(model_name)
 
9
 
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- model.to(device)
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def answer_question(image, question):
14
  if image is None:
15
  return "Please upload an image."
 
16
  if not question:
17
  return "Please type a question about the image."
18
 
19
- inputs = processor(image, question, return_tensors="pt").to(device)
20
-
21
- with torch.no_grad():
22
- output = model.generate(**inputs, max_new_tokens=20)
23
 
24
- answer = processor.decode(output[0], skip_special_tokens=True)
25
- return answer
26
 
 
27
  demo = gr.Interface(
28
  fn=answer_question,
29
  inputs=[
30
  gr.Image(type="pil", label="Upload an image"),
31
- gr.Textbox(label="Question", placeholder="e.g. What animal is this?"),
32
- ],
 
33
  outputs=gr.Textbox(label="Answer"),
34
  title="BLIP Visual Question Answering",
35
- description="Ask a question about an uploaded image using Salesforce/blip-vqa-base.",
36
- )
37
 
 
38
  if __name__ == "__main__":
39
  demo.launch()
 
1
+ # -*- coding: utf-8 -*-
2
+ """App_Travis_Davis.ipynb
 
3
 
4
+ Automatically generated by Colab.
5
 
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1TYz_SpHIzdYoqG_5OfIbIohXmcZTo77j
8
+ """
9
 
10
+ import torch
11
+ from transformers import pipeline
12
+ import gradio as gr
13
 
14
+ # Load BLIP VQA pipeline
15
+ if torch.cuda.is_available():
16
+ vqa = pipeline(
17
+ task="visual-question-answering",
18
+ model="Salesforce/blip-vqa-base",
19
+ torch_dtype=torch.float16,
20
+ device=0,
21
+ use_fast=False,)
22
+ else:
23
+ vqa = pipeline(
24
+ task="visual-question-answering",
25
+ model="Salesforce/blip-vqa-base",
26
+ device=-1,
27
+ use_fast=False,)
28
+
29
+ # Function to answer questions about uploaded images
30
  def answer_question(image, question):
31
  if image is None:
32
  return "Please upload an image."
33
+
34
  if not question:
35
  return "Please type a question about the image."
36
 
37
+ # Run Visual Question Answering pipeline
38
+ result = vqa(question=question, image=image)
 
 
39
 
40
+ # Return generated answer
41
+ return result[0]["answer"]
42
 
43
+ # Build Gradio interface
44
  demo = gr.Interface(
45
  fn=answer_question,
46
  inputs=[
47
  gr.Image(type="pil", label="Upload an image"),
48
+ gr.Textbox(
49
+ label="Question",
50
+ placeholder="Example: What is in this image?"),],
51
  outputs=gr.Textbox(label="Answer"),
52
  title="BLIP Visual Question Answering",
53
+ description="Upload an image and ask a question about it using Salesforce/blip-vqa-base.",)
 
54
 
55
+ # Launch application
56
  if __name__ == "__main__":
57
  demo.launch()