Spaces:

hubsnippetai
/

medapp

Build error

App Files Files Community

medapp / app.py

hubsnippetai

Update app.py

c6c3ebd verified over 1 year ago

raw

history blame contribute delete

1.64 kB

	import requests
	import asyncio

	from PIL import Image
	from transformers import AutoProcessor, AutoModelForVision2Seq


	model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
	processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")

	# The original Kosmos-2 demo saves the image first then reload it. For some images, this will give slightly different image input and change the generation outputs.

	#prompt = "{question}"

	def describe_image(image_path, question : str):
	inputs = processor(text=question, images=image_path, return_tensors="pt")

	generated_ids = await model.generate(
	pixel_values=inputs["pixel_values"],
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	image_embeds=None,
	image_embeds_position_mask=inputs["image_embeds_position_mask"],
	use_cache=True,
	max_new_tokens=128,
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	# Specify `cleanup_and_extract=False` in order to see the raw model generation.
	processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)

	processed_text, entities = processor.post_process_generation(generated_text)

	return processed_text

	import gradio as gr

	gr_app = gr.Interface(fn=describe_image, inputs=[gr.Image(label="Upload an image for description", type='pil'), gr.Textbox(label="Ask a question about the image")],
	outputs=[gr.Textbox(label="Image description")], title="App for image description")

	if __name__ == "__main__":
	gr_app.launch(show_error = True)