Spaces:
Runtime error
Runtime error
ai42
commited on
Commit
·
7163e1b
1
Parent(s):
44f8cdc
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
-
import io
|
| 3 |
|
| 4 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 5 |
|
| 6 |
from PIL import Image, ImageDraw
|
| 7 |
import traceback
|
| 8 |
|
| 9 |
-
|
| 10 |
import gradio as gr
|
| 11 |
|
| 12 |
import torch
|
| 13 |
from docquery import pipeline
|
| 14 |
-
from docquery.document import load_document,
|
| 15 |
from docquery.ocr_reader import get_ocr_reader
|
| 16 |
|
| 17 |
|
|
@@ -87,11 +85,8 @@ examples = [
|
|
| 87 |
"statement.png",
|
| 88 |
"What are net sales for 2020?",
|
| 89 |
],
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
"What is the highest sale amount of televsion in east region?",
|
| 93 |
-
|
| 94 |
-
]
|
| 95 |
# [
|
| 96 |
# "docquery.png",
|
| 97 |
# "How many likes does the space have?",
|
|
@@ -106,8 +101,6 @@ question_files = {
|
|
| 106 |
"What are net sales for 2020?": "statement.pdf",
|
| 107 |
"How many likes does the space have?": "https://huggingface.co/spaces/impira/docquery",
|
| 108 |
"What is the title of post number 5?": "https://news.ycombinator.com",
|
| 109 |
-
"What is the highest sale amount of television in east region?": "SaleData.pdf", # Add the PDF file here
|
| 110 |
-
|
| 111 |
}
|
| 112 |
|
| 113 |
|
|
@@ -150,10 +143,6 @@ def process_upload(file):
|
|
| 150 |
gr.update(visible=False, value=None),
|
| 151 |
None,
|
| 152 |
)
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
|
| 158 |
|
| 159 |
colors = ["#64A087", "green", "black"]
|
|
@@ -236,7 +225,7 @@ CSS = """
|
|
| 236 |
padding-bottom: 2px !important;
|
| 237 |
padding-left: 8px !important;
|
| 238 |
padding-right: 8px !important;
|
| 239 |
-
|
| 240 |
}
|
| 241 |
.gradio-container .gr-button-primary {
|
| 242 |
background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
|
|
@@ -302,15 +291,19 @@ gradio-app h2, .gradio-app h2 {
|
|
| 302 |
"""
|
| 303 |
|
| 304 |
with gr.Blocks(css=CSS) as demo:
|
| 305 |
-
gr.Markdown("# Document Query Engine")
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
document = gr.Variable()
|
| 309 |
example_question = gr.Textbox(visible=False)
|
| 310 |
example_image = gr.Image(visible=False)
|
| 311 |
-
excel_upload = gr.File(label="Upload Excel", type="xlsx", elem_id="excel-upload-box")
|
| 312 |
-
|
| 313 |
-
excel_process_button = gr.Button("Process Excel", variant="primary", elem_id="excel-process-button")
|
| 314 |
|
| 315 |
with gr.Row(equal_height=True):
|
| 316 |
with gr.Column():
|
|
@@ -434,6 +427,5 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 434 |
outputs=[document, question, image, img_clear_button, output, output_text],
|
| 435 |
)
|
| 436 |
|
| 437 |
-
|
| 438 |
if __name__ == "__main__":
|
| 439 |
demo.launch(enable_queue=False)
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 4 |
|
| 5 |
from PIL import Image, ImageDraw
|
| 6 |
import traceback
|
| 7 |
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
import torch
|
| 11 |
from docquery import pipeline
|
| 12 |
+
from docquery.document import load_document, ImageDocument
|
| 13 |
from docquery.ocr_reader import get_ocr_reader
|
| 14 |
|
| 15 |
|
|
|
|
| 85 |
"statement.png",
|
| 86 |
"What are net sales for 2020?",
|
| 87 |
],
|
| 88 |
+
|
| 89 |
+
|
|
|
|
|
|
|
|
|
|
| 90 |
# [
|
| 91 |
# "docquery.png",
|
| 92 |
# "How many likes does the space have?",
|
|
|
|
| 101 |
"What are net sales for 2020?": "statement.pdf",
|
| 102 |
"How many likes does the space have?": "https://huggingface.co/spaces/impira/docquery",
|
| 103 |
"What is the title of post number 5?": "https://news.ycombinator.com",
|
|
|
|
|
|
|
| 104 |
}
|
| 105 |
|
| 106 |
|
|
|
|
| 143 |
gr.update(visible=False, value=None),
|
| 144 |
None,
|
| 145 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
colors = ["#64A087", "green", "black"]
|
|
|
|
| 225 |
padding-bottom: 2px !important;
|
| 226 |
padding-left: 8px !important;
|
| 227 |
padding-right: 8px !important;
|
| 228 |
+
margin-top: 10px;
|
| 229 |
}
|
| 230 |
.gradio-container .gr-button-primary {
|
| 231 |
background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
|
|
|
|
| 291 |
"""
|
| 292 |
|
| 293 |
with gr.Blocks(css=CSS) as demo:
|
| 294 |
+
gr.Markdown("# DocQuery: Document Query Engine")
|
| 295 |
+
gr.Markdown(
|
| 296 |
+
"DocQuery (created by [Impira](https://impira.com?utm_source=huggingface&utm_medium=referral&utm_campaign=docquery_space))"
|
| 297 |
+
" uses LayoutLMv1 fine-tuned on DocVQA, a document visual question"
|
| 298 |
+
" answering dataset, as well as SQuAD, which boosts its English-language comprehension."
|
| 299 |
+
" To use it, simply upload an image or PDF, type a question, and click 'submit', or "
|
| 300 |
+
" click one of the examples to load them."
|
| 301 |
+
" DocQuery is MIT-licensed and available on [Github](https://github.com/impira/docquery)."
|
| 302 |
+
)
|
| 303 |
|
| 304 |
document = gr.Variable()
|
| 305 |
example_question = gr.Textbox(visible=False)
|
| 306 |
example_image = gr.Image(visible=False)
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
with gr.Row(equal_height=True):
|
| 309 |
with gr.Column():
|
|
|
|
| 427 |
outputs=[document, question, image, img_clear_button, output, output_text],
|
| 428 |
)
|
| 429 |
|
|
|
|
| 430 |
if __name__ == "__main__":
|
| 431 |
demo.launch(enable_queue=False)
|