Spaces:

josephtran04
/

VQA

Running

VQA / app.py

Upload 3 files

91efc56 verified 5 months ago

1.48 kB

	# This script creates a simple web application using Gradio to generate answers for VQA using the BLIP model from Hugging Face's Transformers library.
	# Import necessary libraries
	import gradio as gr
	import numpy as np
	from PIL import Image
	from transformers import BlipProcessor, BlipForQuestionAnswering

	# Load BLIP processor and model
	processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
	model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

	# Define the function for Visual Question Answering
	def VQA(input_image: np.ndarray, question):
	# Convert numpy array to PIL Image and convert to RGB
	raw_image = Image.fromarray(input_image).convert('RGB')

	# Prepare the inputs for the model
	inputs = processor(raw_image, question, return_tensors="pt")

	# Generate the answer using the model
	outputs = model.generate(**inputs, max_length=100)

	# Decode the generated tokens to text and store it into `answer`
	answer = processor.decode(outputs[0], skip_special_tokens=True)

	return answer

	# Create a Gradio interface
	iface = gr.Interface(
	fn=VQA,
	inputs=[
	gr.Image(label="Input image:"),
	gr.Textbox(label="Question:", placeholder="Type your question here...")
	],
	outputs="text",
	title="Visual Question Answering",
	description="This is a simple web app for VQA using BLIP model from Salesforce.\nUpload the image file:"
	)

	# Launch the Gradio app
	iface.launch()