TestSpace2 / app.py
ProfRom's picture
Create app.py
7719da2 verified
# -*- coding: utf-8 -*-
"""app.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1_HQHDuRl3mgto6slVIJGSlZ5DZeSs4El
"""
import torch
from transformers import pipeline
import gradio as gr
# Choose device: GPU if available, otherwise CPU. On Hugging Face Spaces, unless you explicitly pick a GPU runtime, you’re on CPU only
if torch.cuda.is_available():
vqa = pipeline(
task="visual-question-answering",
model="Salesforce/blip-vqa-base",
torch_dtype=torch.float16,#newer versions of TRANSFORMERS in Hugging face is torch_dtype not dtype. dtype is still working fine in Google Colab space
device=0, # GPU
use_fast=False,
)
else:
vqa = pipeline(
task="visual-question-answering",
model="Salesforce/blip-vqa-base",
device=-1, # CPU
use_fast=False,
)
def answer_question(image, question):
if not question:
return "Please type a question about the image."
# vqa returns a list of dicts like [{'score':..., 'answer':...}]
result = vqa(question=question, image=image)
return result[0]["answer"]
demo = gr.Interface(
fn=answer_question,
inputs=[
gr.Image(type="pil", label="Upload an image"),
gr.Textbox(label="Question", placeholder="e.g. What is the weather in this image?"),
],
outputs=gr.Textbox(label="Answer"),
title="BLIP Visual Question Answering",
description="Ask a question about the uploaded image using Salesforce/blip-vqa-base.",
)
if __name__ == "__main__":
demo.launch()