ProfRom commited on
Commit
e513ee9
·
verified ·
1 Parent(s): c85017a

Vangala - Final submission

Browse files
Files changed (1) hide show
  1. app.py +26 -39
app.py CHANGED
@@ -1,57 +1,44 @@
1
- # -*- coding: utf-8 -*-
2
- """App_Travis_Davis.ipynb
 
 
3
 
4
- Automatically generated by Colab.
5
 
6
- Original file is located at
7
- https://colab.research.google.com/drive/1TYz_SpHIzdYoqG_5OfIbIohXmcZTo77j
8
- """
9
 
10
- import torch
11
- from transformers import pipeline
12
- import gradio as gr
13
 
14
- # Load BLIP VQA pipeline
15
- if torch.cuda.is_available():
16
- vqa = pipeline(
17
- task="visual-question-answering",
18
- model="Salesforce/blip-vqa-base",
19
- torch_dtype=torch.float16,
20
- device=0,
21
- use_fast=False,)
22
- else:
23
- vqa = pipeline(
24
- task="visual-question-answering",
25
- model="Salesforce/blip-vqa-base",
26
- device=-1,
27
- use_fast=False,)
28
-
29
- # Function to answer questions about uploaded images
30
  def answer_question(image, question):
31
  if image is None:
32
  return "Please upload an image."
 
 
 
 
 
 
 
33
 
34
- if not question:
35
- return "Please type a question about the image."
36
 
37
- # Run Visual Question Answering pipeline
38
- result = vqa(question=question, image=image)
39
 
40
- # Return generated answer
41
- return result[0]["answer"]
42
 
43
- # Build Gradio interface
44
  demo = gr.Interface(
45
  fn=answer_question,
46
  inputs=[
47
- gr.Image(type="pil", label="Upload an image"),
48
- gr.Textbox(
49
- label="Question",
50
- placeholder="Example: What is in this image?"),],
51
- outputs=gr.Textbox(label="Answer"),
52
  title="BLIP Visual Question Answering",
53
- description="Upload an image and ask a question about it using Salesforce/blip-vqa-base.",)
 
54
 
55
- # Launch application
56
  if __name__ == "__main__":
57
  demo.launch()
 
1
+ import torch
2
+ import gradio as gr
3
+ from PIL import Image
4
+ from transformers import BlipProcessor, BlipForQuestionAnswering
5
 
6
+ MODEL_ID = "Salesforce/blip-vqa-base"
7
 
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
9
 
10
+ processor = BlipProcessor.from_pretrained(MODEL_ID)
11
+ model = BlipForQuestionAnswering.from_pretrained(MODEL_ID).to(device)
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def answer_question(image, question):
14
  if image is None:
15
  return "Please upload an image."
16
+ if not question or question.strip() == "":
17
+ return "Please enter a question about the image."
18
+
19
+ if not isinstance(image, Image.Image):
20
+ image = Image.fromarray(image)
21
+
22
+ image = image.convert("RGB")
23
 
24
+ inputs = processor(image, question, return_tensors="pt").to(device)
 
25
 
26
+ with torch.no_grad():
27
+ output_ids = model.generate(**inputs, max_new_tokens=20)
28
 
29
+ answer = processor.decode(output_ids[0], skip_special_tokens=True)
30
+ return answer
31
 
 
32
  demo = gr.Interface(
33
  fn=answer_question,
34
  inputs=[
35
+ gr.Image(type="pil", label="Upload an Image"),
36
+ gr.Textbox(label="Ask a Question", placeholder="Example: What animal is in this image?")
37
+ ],
38
+ outputs=gr.Textbox(label="Model Answer"),
 
39
  title="BLIP Visual Question Answering",
40
+ description="Upload an image and ask a question. This app uses Salesforce/blip-vqa-base."
41
+ )
42
 
 
43
  if __name__ == "__main__":
44
  demo.launch()