Spaces:

gopalagra
/

blind-image-captioning

Runtime error

gopalagra commited on Sep 7, 2025

Commit

269fb75

verified ·

1 Parent(s): d90d12f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -116,11 +116,13 @@ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
 model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
 # Function
-def vqa_answer(image_path, question):
-    image = Image.open(image_path).convert("RGB")
-    inputs = processor(image, question, return_tensors="pt").to(model.device)
-    out = model.generate(**inputs, max_new_tokens=30)
-    return processor.decode(out[0], skip_special_tokens=True)
 # Example
 # print(vqa_answer("baby.jpg", "What is the baby eating?"))

 model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
 # Function
+def vqa_answer(image, question):
+    # image is already a PIL Image (no need to open again)
+    inputs = vqa_processor(image, question, return_tensors="pt").to(vqa_model.device)
+    out = vqa_model.generate(**inputs, max_new_tokens=50)
+    answer = vqa_processor.decode(out[0], skip_special_tokens=True)
+    return answer
 # Example
 # print(vqa_answer("baby.jpg", "What is the baby eating?"))