Spaces:

ivelin
/

ui-refexp

Runtime error

ivelin commited on Jan 9, 2023

Commit

ba6d9e2

1 Parent(s): e0dd23e

fix:example formatting

Signed-off-by: ivelin <ivelin.eth@gmail.com>

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import re
 import gradio as gr
 import torch
 from transformers import DonutProcessor, VisionEncoderDecoderModel
@@ -13,7 +14,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-def process_document(image, prompt):
     # prepare encoder inputs
     pixel_values = processor(image, return_tensors="pt").pixel_values
@@ -68,20 +72,13 @@ def process_document(image, prompt):
 description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
-demo = gr.Interface(
-    fn=process_document,
-    inputs=["image", "text"],
-    outputs="json",
-    title="Demo: Donut 🍩 for DocVQA",
-    description=description,
-    article=article,
-    enable_queue=True,
-    examples=[
-        ["example_1.jpg", "select the setting icon from top right corner"],
-        ["example_2.jpg", "enter the text field next to the name"]
-    ],
-    cache_examples=False)
-demo.launch()
-#

 import re
 import gradio as gr
+from PIL import Image
 import torch
 from transformers import DonutProcessor, VisionEncoderDecoderModel
 model.to(device)
+def process_document(image: Image, prompt: str):
+    # trim prompt to 80 characters and normalize to lowercase
+    prompt = prompt[:80].lower()
     # prepare encoder inputs
     pixel_values = processor(image, return_tensors="pt").pixel_values
 description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
+examples = [
+    ["example_1.jpg", "select the setting icon from top right corner"],
+    ["example_2.jpg", "enter the text field next to the name"]
+],
+demo = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"],
+                    outputs=[gr.inputs.Image(type='pil'), "textbox"],
+                    title=title, description=description, article=article, examples=examples,
+                    allow_flagging=False, allow_screenshot=False)
+demo.launch(cache_examples=True)