Spaces:
Runtime error
Runtime error
ivelin
commited on
Commit
·
ba6d9e2
1
Parent(s):
e0dd23e
fix:example formatting
Browse filesSigned-off-by: ivelin <ivelin.eth@gmail.com>
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import re
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
|
| 4 |
import torch
|
| 5 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
|
@@ -13,7 +14,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 13 |
model.to(device)
|
| 14 |
|
| 15 |
|
| 16 |
-
def process_document(image, prompt):
|
|
|
|
|
|
|
|
|
|
| 17 |
# prepare encoder inputs
|
| 18 |
pixel_values = processor(image, return_tensors="pt").pixel_values
|
| 19 |
|
|
@@ -68,20 +72,13 @@ def process_document(image, prompt):
|
|
| 68 |
|
| 69 |
description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
|
| 70 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
["example_1.jpg", "select the setting icon from top right corner"],
|
| 82 |
-
["example_2.jpg", "enter the text field next to the name"]
|
| 83 |
-
],
|
| 84 |
-
cache_examples=False)
|
| 85 |
-
|
| 86 |
-
demo.launch()
|
| 87 |
-
#
|
|
|
|
| 1 |
import re
|
| 2 |
import gradio as gr
|
| 3 |
+
from PIL import Image
|
| 4 |
|
| 5 |
import torch
|
| 6 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
|
|
|
| 14 |
model.to(device)
|
| 15 |
|
| 16 |
|
| 17 |
+
def process_document(image: Image, prompt: str):
|
| 18 |
+
# trim prompt to 80 characters and normalize to lowercase
|
| 19 |
+
prompt = prompt[:80].lower()
|
| 20 |
+
|
| 21 |
# prepare encoder inputs
|
| 22 |
pixel_values = processor(image, return_tensors="pt").pixel_values
|
| 23 |
|
|
|
|
| 72 |
|
| 73 |
description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
|
| 74 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
|
| 75 |
+
examples = [
|
| 76 |
+
["example_1.jpg", "select the setting icon from top right corner"],
|
| 77 |
+
["example_2.jpg", "enter the text field next to the name"]
|
| 78 |
+
],
|
| 79 |
+
|
| 80 |
+
demo = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"],
|
| 81 |
+
outputs=[gr.inputs.Image(type='pil'), "textbox"],
|
| 82 |
+
title=title, description=description, article=article, examples=examples,
|
| 83 |
+
allow_flagging=False, allow_screenshot=False)
|
| 84 |
+
demo.launch(cache_examples=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|