gopalagra commited on
Commit
269fb75
Β·
verified Β·
1 Parent(s): d90d12f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -116,11 +116,13 @@ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
116
  model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
117
 
118
  # Function
119
- def vqa_answer(image_path, question):
120
- image = Image.open(image_path).convert("RGB")
121
- inputs = processor(image, question, return_tensors="pt").to(model.device)
122
- out = model.generate(**inputs, max_new_tokens=30)
123
- return processor.decode(out[0], skip_special_tokens=True)
 
 
124
 
125
  # Example
126
  # print(vqa_answer("baby.jpg", "What is the baby eating?"))
 
116
  model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
117
 
118
  # Function
119
+ def vqa_answer(image, question):
120
+ # image is already a PIL Image (no need to open again)
121
+ inputs = vqa_processor(image, question, return_tensors="pt").to(vqa_model.device)
122
+ out = vqa_model.generate(**inputs, max_new_tokens=50)
123
+ answer = vqa_processor.decode(out[0], skip_special_tokens=True)
124
+ return answer
125
+
126
 
127
  # Example
128
  # print(vqa_answer("baby.jpg", "What is the baby eating?"))