Spaces:
Sleeping
Sleeping
File size: 1,619 Bytes
7719da2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | # -*- coding: utf-8 -*-
"""app.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1_HQHDuRl3mgto6slVIJGSlZ5DZeSs4El
"""
import torch
from transformers import pipeline
import gradio as gr
# Choose device: GPU if available, otherwise CPU. On Hugging Face Spaces, unless you explicitly pick a GPU runtime, you’re on CPU only
if torch.cuda.is_available():
vqa = pipeline(
task="visual-question-answering",
model="Salesforce/blip-vqa-base",
torch_dtype=torch.float16,#newer versions of TRANSFORMERS in Hugging face is torch_dtype not dtype. dtype is still working fine in Google Colab space
device=0, # GPU
use_fast=False,
)
else:
vqa = pipeline(
task="visual-question-answering",
model="Salesforce/blip-vqa-base",
device=-1, # CPU
use_fast=False,
)
def answer_question(image, question):
if not question:
return "Please type a question about the image."
# vqa returns a list of dicts like [{'score':..., 'answer':...}]
result = vqa(question=question, image=image)
return result[0]["answer"]
demo = gr.Interface(
fn=answer_question,
inputs=[
gr.Image(type="pil", label="Upload an image"),
gr.Textbox(label="Question", placeholder="e.g. What is the weather in this image?"),
],
outputs=gr.Textbox(label="Answer"),
title="BLIP Visual Question Answering",
description="Ask a question about the uploaded image using Salesforce/blip-vqa-base.",
)
if __name__ == "__main__":
demo.launch() |