Spaces:

ProfRom
/

TestSpace

Sleeping

App Files Files Community

ProfRom commited on 16 days ago

Commit

e513ee9

verified ·

1 Parent(s): c85017a

Vangala - Final submission

Browse files

Files changed (1) hide show

app.py +26 -39

app.py CHANGED Viewed

@@ -1,57 +1,44 @@
-# -*- coding: utf-8 -*-
-"""App_Travis_Davis.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1TYz_SpHIzdYoqG_5OfIbIohXmcZTo77j
-"""
-import torch
-from transformers import pipeline
-import gradio as gr
-# Load BLIP VQA pipeline
-if torch.cuda.is_available():
-    vqa = pipeline(
-        task="visual-question-answering",
-        model="Salesforce/blip-vqa-base",
-        torch_dtype=torch.float16,
-        device=0,
-        use_fast=False,)
-else:
-    vqa = pipeline(
-        task="visual-question-answering",
-        model="Salesforce/blip-vqa-base",
-        device=-1,
-        use_fast=False,)
-# Function to answer questions about uploaded images
 def answer_question(image, question):
     if image is None:
         return "Please upload an image."
-    if not question:
-        return "Please type a question about the image."
-    # Run Visual Question Answering pipeline
-    result = vqa(question=question, image=image)
-    # Return generated answer
-    return result[0]["answer"]
-# Build Gradio interface
 demo = gr.Interface(
     fn=answer_question,
     inputs=[
-        gr.Image(type="pil", label="Upload an image"),
-        gr.Textbox(
-            label="Question",
-            placeholder="Example: What is in this image?"),],
-    outputs=gr.Textbox(label="Answer"),
     title="BLIP Visual Question Answering",
-    description="Upload an image and ask a question about it using Salesforce/blip-vqa-base.",)
-# Launch application
 if __name__ == "__main__":
     demo.launch()

+import torch
+import gradio as gr
+from PIL import Image
+from transformers import BlipProcessor, BlipForQuestionAnswering
+MODEL_ID = "Salesforce/blip-vqa-base"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = BlipProcessor.from_pretrained(MODEL_ID)
+model = BlipForQuestionAnswering.from_pretrained(MODEL_ID).to(device)
 def answer_question(image, question):
     if image is None:
         return "Please upload an image."
+    if not question or question.strip() == "":
+        return "Please enter a question about the image."
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
+    image = image.convert("RGB")
+    inputs = processor(image, question, return_tensors="pt").to(device)
+    with torch.no_grad():
+        output_ids = model.generate(**inputs, max_new_tokens=20)
+    answer = processor.decode(output_ids[0], skip_special_tokens=True)
+    return answer
 demo = gr.Interface(
     fn=answer_question,
     inputs=[
+        gr.Image(type="pil", label="Upload an Image"),
+        gr.Textbox(label="Ask a Question", placeholder="Example: What animal is in this image?")
+    ],
+    outputs=gr.Textbox(label="Model Answer"),
     title="BLIP Visual Question Answering",
+    description="Upload an image and ask a question. This app uses Salesforce/blip-vqa-base."
+)
 if __name__ == "__main__":
     demo.launch()