# -*- coding: utf-8 -*- """app.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1_HQHDuRl3mgto6slVIJGSlZ5DZeSs4El """ import torch from transformers import pipeline import gradio as gr # Choose device: GPU if available, otherwise CPU. On Hugging Face Spaces, unless you explicitly pick a GPU runtime, you’re on CPU only if torch.cuda.is_available(): vqa = pipeline( task="visual-question-answering", model="Salesforce/blip-vqa-base", torch_dtype=torch.float16,#newer versions of TRANSFORMERS in Hugging face is torch_dtype not dtype. dtype is still working fine in Google Colab space device=0, # GPU use_fast=False, ) else: vqa = pipeline( task="visual-question-answering", model="Salesforce/blip-vqa-base", device=-1, # CPU use_fast=False, ) def answer_question(image, question): if not question: return "Please type a question about the image." # vqa returns a list of dicts like [{'score':..., 'answer':...}] result = vqa(question=question, image=image) return result[0]["answer"] demo = gr.Interface( fn=answer_question, inputs=[ gr.Image(type="pil", label="Upload an image"), gr.Textbox(label="Question", placeholder="e.g. What is the weather in this image?"), ], outputs=gr.Textbox(label="Answer"), title="BLIP Visual Question Answering", description="Ask a question about the uploaded image using Salesforce/blip-vqa-base.", ) if __name__ == "__main__": demo.launch()