Spaces:

dakkoong
/

finalhw

Runtime error

App Files Files Community

finalhw / app.py

dakkoong

initial commit

9b582e7 over 2 years ago

raw

history blame

2.25 kB

	import gradio as gr
	from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	import torch

	model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2")
	model_path = "microsoft/git-base-vqav2"
	dataset_name = "Multimodal-Fatima/OK-VQA_train"
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	questions = ["What can happen the objects shown are thrown on the ground?",
	"What was the machine beside the bowl used for?",
	"What kind of cars are in the photo?",
	"What is the hairstyle of the blond called?",
	"How old do you have to be in canada to do this?",
	"Can you guess the place where the man is playing?",
	"What loony tune character is in this photo?",
	"Whose birthday is being celebrated?",
	"Where can that toilet seat be bought?",
	"What do you call the kind of pants that the man on the right is wearing?"]

	processor = AutoProcessor.from_pretrained(model_path)
	model = AutoModelForVisualQuestionAnswering.from_pretrained(model_path)


	def main(select_exemple_num):
	selectednum = select_exemple_num
	exemple_img = f"image{selectednum}.jpg"
	img = Image.open(exemple_img)
	question = questions[selectednum - 1]

	encoding = processor(img, question, return_tensors='pt')

	outputs = model(**encoding)
	logits = outputs.logits

	# ---
	output_str = 'pridicted : \n'
	predicted_classes = torch.sigmoid(logits)

	probs, classes = torch.topk(predicted_classes, 5)
	ans = ''

	for prob, class_idx in zip(probs.squeeze().tolist(), classes.squeeze().tolist()):
	print(prob, model.config.id2label[class_idx])
	output_str += str(prob)
	output_str += " "
	output_str += model.config.id2label[class_idx]
	output_str += "\n"
	if not ans:
	ans = model.config.id2label[class_idx]

	print(ans)
	# ---
	output_str += f"\nso I think it's answer is : \n{ans}"

	return exemple_img, question, output_str


	demo = gr.Interface(
	fn=main,
	inputs=[gr.Slider(1, len(questions), step=1)],
	outputs=["image", "text", "text"],
	)

	demo.launch(share=True)