Spaces:
Build error
Build error
| import requests | |
| import asyncio | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224") | |
| processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224") | |
| # The original Kosmos-2 demo saves the image first then reload it. For some images, this will give slightly different image input and change the generation outputs. | |
| #prompt = "{question}" | |
| def describe_image(image_path, question : str): | |
| inputs = processor(text=question, images=image_path, return_tensors="pt") | |
| generated_ids = await model.generate( | |
| pixel_values=inputs["pixel_values"], | |
| input_ids=inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| image_embeds=None, | |
| image_embeds_position_mask=inputs["image_embeds_position_mask"], | |
| use_cache=True, | |
| max_new_tokens=128, | |
| ) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| # Specify `cleanup_and_extract=False` in order to see the raw model generation. | |
| processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False) | |
| processed_text, entities = processor.post_process_generation(generated_text) | |
| return processed_text | |
| import gradio as gr | |
| gr_app = gr.Interface(fn=describe_image, inputs=[gr.Image(label="Upload an image for description", type='pil'), gr.Textbox(label="Ask a question about the image")], | |
| outputs=[gr.Textbox(label="Image description")], title="App for image description") | |
| if __name__ == "__main__": | |
| gr_app.launch(show_error = True) |