Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """app.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1_HQHDuRl3mgto6slVIJGSlZ5DZeSs4El | |
| """ | |
| import torch | |
| from transformers import pipeline | |
| import gradio as gr | |
| # Choose device: GPU if available, otherwise CPU. On Hugging Face Spaces, unless you explicitly pick a GPU runtime, you’re on CPU only | |
| if torch.cuda.is_available(): | |
| vqa = pipeline( | |
| task="visual-question-answering", | |
| model="Salesforce/blip-vqa-base", | |
| torch_dtype=torch.float16,#newer versions of TRANSFORMERS in Hugging face is torch_dtype not dtype. dtype is still working fine in Google Colab space | |
| device=0, # GPU | |
| use_fast=False, | |
| ) | |
| else: | |
| vqa = pipeline( | |
| task="visual-question-answering", | |
| model="Salesforce/blip-vqa-base", | |
| device=-1, # CPU | |
| use_fast=False, | |
| ) | |
| def answer_question(image, question): | |
| if not question: | |
| return "Please type a question about the image." | |
| # vqa returns a list of dicts like [{'score':..., 'answer':...}] | |
| result = vqa(question=question, image=image) | |
| return result[0]["answer"] | |
| demo = gr.Interface( | |
| fn=answer_question, | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload an image"), | |
| gr.Textbox(label="Question", placeholder="e.g. What is the weather in this image?"), | |
| ], | |
| outputs=gr.Textbox(label="Answer"), | |
| title="BLIP Visual Question Answering", | |
| description="Ask a question about the uploaded image using Salesforce/blip-vqa-base.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |