root commited on
Commit
9142812
·
1 Parent(s): 03a4122
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -157,14 +157,15 @@ def _file_to_b64_image_only(path_or_url: str) -> Tuple[str, int]:
157
  return base64.b64encode(content).decode("utf-8"), 1 # 1 = image 类型
158
 
159
  def _call_api(api_url: str, path_or_url: str, use_layout_detection: bool,
160
- prompt_label: Optional[str], use_chart_recognition: bool = False) -> Dict[str, Any]:
 
161
  b64, file_type = _file_to_b64_image_only(path_or_url)
162
  payload = {
163
  "file": b64,
164
  "useLayoutDetection": bool(use_layout_detection),
165
  "fileType": file_type,
166
- "useDocUnwarping":False,
167
- "useDocOrientationClassify":False
168
  }
169
  if not use_layout_detection:
170
  if not prompt_label:
@@ -227,11 +228,12 @@ def _process_api_response_page(result: Dict[str, Any]) -> Tuple[str, str, str]:
227
  md_text = _escape_inequalities_in_math(md_text)
228
  return md_text or "(Empty result)", output_html, md_text
229
 
230
- def handle_complex_doc(path_or_url: str, use_chart_recognition: bool) -> Tuple[str, str, str]:
231
  if not path_or_url:
232
  raise gr.Error("Please upload an image first.")
233
  data = _call_api(DEFAULT_API_URL, path_or_url, use_layout_detection=True,
234
- prompt_label=None, use_chart_recognition=use_chart_recognition)
 
235
  result = data.get("result", {})
236
  return _process_api_response_page(result)
237
 
@@ -245,7 +247,7 @@ def handle_targeted_recognition(path_or_url: str, prompt_choice: str) -> Tuple[s
245
  "Chart Recognition": "chart",
246
  }
247
  label = mapping.get(prompt_choice, "ocr")
248
- data = _call_api(DEFAULT_API_URL, path_or_url, use_layout_detection=False, prompt_label=label)
249
  result = data.get("result", {})
250
  md_preview, _, md_raw = _process_api_response_page(result)
251
  return md_preview, md_raw
@@ -273,6 +275,7 @@ body, .gradio-container { font-family: "Noto Sans SC", "Microsoft YaHei", "PingF
273
  .notice strong { font-weight: 700; }
274
  .notice a { color: #3b82f6; text-decoration: none; }
275
  .notice a:hover { text-decoration: underline; }
 
276
  """
277
 
278
  with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as demo:
@@ -280,7 +283,7 @@ with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as
280
  gr.HTML(f"""<div class="app-header"><img src="{logo_data_url}" alt="App Logo" style="max-height:10%; width: auto; margin: 10px auto; display: block;"></div>""")
281
  gr.HTML("""<div class="notice"><strong>Heads up:</strong> The Hugging Face demo can be slow at times. For a faster experience, please try <a href="https://aistudio.baidu.com/application/detail/98365" target="_blank" rel="noopener noreferrer">Baidu AI Studio</a> or <a href="https://modelscope.cn/studios/PaddlePaddle/PaddleOCR-VL_Online_Demo/summary" target="_blank" rel="noopener noreferrer">ModelScope</a>.</div>""")
282
  gr.HTML("""<div class="quick-links"><a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">GitHub</a> | <a href="https://ernie.baidu.com/blog/publication/PaddleOCR-VL_Technical_Report.pdf" target="_blank">Technical Report</a> | <a href="https://huggingface.co/PaddlePaddle/PaddleOCR-VL" target="_blank">Model</a></div>""")
283
-
284
  with gr.Tabs():
285
  # ===================== Document Parsing =====================
286
  with gr.Tab("Document Parsing"):
@@ -294,8 +297,13 @@ with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as
294
  example_url_doc = gr.State(value=None)
295
 
296
  with gr.Row(variant="panel"):
297
- chart_parsing_switch = gr.Checkbox(label="Enable chart parsing", value=False, scale=1)
298
- btn_parse = gr.Button("Parse Document", variant="primary", scale=2)
 
 
 
 
 
299
 
300
  if complex_document_examples:
301
  complex_paths = [e[0] for e in complex_document_examples] # 这里是 List[str]
@@ -343,13 +351,13 @@ with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as
343
 
344
  file_doc.change(fn=on_file_doc_change, inputs=[file_doc], outputs=[example_url_doc, preview_doc_html])
345
 
346
- def parse_doc_router(fp, example_url, use_chart):
347
  src = fp if fp else example_url
348
  if not src:
349
  raise gr.Error("Please upload an image or pick an example first.")
350
- return handle_complex_doc(src, use_chart)
351
 
352
- btn_parse.click(fn=parse_doc_router, inputs=[file_doc, example_url_doc, chart_parsing_switch],
353
  outputs=[md_preview_doc, vis_image_doc, md_raw_doc])
354
 
355
  # ===================== Element-level Recognition =====================
 
157
  return base64.b64encode(content).decode("utf-8"), 1 # 1 = image 类型
158
 
159
  def _call_api(api_url: str, path_or_url: str, use_layout_detection: bool,
160
+ prompt_label: Optional[str], use_chart_recognition: bool = False,
161
+ use_doc_unwarping: bool = True, use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
162
  b64, file_type = _file_to_b64_image_only(path_or_url)
163
  payload = {
164
  "file": b64,
165
  "useLayoutDetection": bool(use_layout_detection),
166
  "fileType": file_type,
167
+ "useDocUnwarping": use_doc_unwarping,
168
+ "useDocOrientationClassify": use_doc_orientation_classify
169
  }
170
  if not use_layout_detection:
171
  if not prompt_label:
 
228
  md_text = _escape_inequalities_in_math(md_text)
229
  return md_text or "(Empty result)", output_html, md_text
230
 
231
+ def handle_complex_doc(path_or_url: str, use_chart_recognition: bool, use_doc_unwarping: bool, use_doc_orientation_classify: bool) -> Tuple[str, str, str]:
232
  if not path_or_url:
233
  raise gr.Error("Please upload an image first.")
234
  data = _call_api(DEFAULT_API_URL, path_or_url, use_layout_detection=True,
235
+ prompt_label=None, use_chart_recognition=use_chart_recognition,
236
+ use_doc_unwarping=use_doc_unwarping, use_doc_orientation_classify=use_doc_orientation_classify)
237
  result = data.get("result", {})
238
  return _process_api_response_page(result)
239
 
 
247
  "Chart Recognition": "chart",
248
  }
249
  label = mapping.get(prompt_choice, "ocr")
250
+ data = _call_api(DEFAULT_API_URL, path_or_url, use_layout_detection=False, prompt_label=label, use_doc_unwarping=False, use_doc_orientation_classify=False)
251
  result = data.get("result", {})
252
  md_preview, _, md_raw = _process_api_response_page(result)
253
  return md_preview, md_raw
 
275
  .notice strong { font-weight: 700; }
276
  .notice a { color: #3b82f6; text-decoration: none; }
277
  .notice a:hover { text-decoration: underline; }
278
+ .checkbox-row .gradio-checkbox { flex-grow: 1; text-align: center; }
279
  """
280
 
281
  with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as demo:
 
283
  gr.HTML(f"""<div class="app-header"><img src="{logo_data_url}" alt="App Logo" style="max-height:10%; width: auto; margin: 10px auto; display: block;"></div>""")
284
  gr.HTML("""<div class="notice"><strong>Heads up:</strong> The Hugging Face demo can be slow at times. For a faster experience, please try <a href="https://aistudio.baidu.com/application/detail/98365" target="_blank" rel="noopener noreferrer">Baidu AI Studio</a> or <a href="https://modelscope.cn/studios/PaddlePaddle/PaddleOCR-VL_Online_Demo/summary" target="_blank" rel="noopener noreferrer">ModelScope</a>.</div>""")
285
  gr.HTML("""<div class="quick-links"><a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">GitHub</a> | <a href="https://ernie.baidu.com/blog/publication/PaddleOCR-VL_Technical_Report.pdf" target="_blank">Technical Report</a> | <a href="https://huggingface.co/PaddlePaddle/PaddleOCR-VL" target="_blank">Model</a></div>""")
286
+
287
  with gr.Tabs():
288
  # ===================== Document Parsing =====================
289
  with gr.Tab("Document Parsing"):
 
297
  example_url_doc = gr.State(value=None)
298
 
299
  with gr.Row(variant="panel"):
300
+ with gr.Column(scale=2):
301
+ btn_parse = gr.Button("Parse Document", variant="primary")
302
+ with gr.Column(scale=3):
303
+ with gr.Row(elem_classes=["checkbox-row"]):
304
+ chart_parsing_switch = gr.Checkbox(label="Enable chart parsing", value=False, min_width=10)
305
+ doc_unwarping_switch = gr.Checkbox(label="Enable document unwarping", value=True, min_width=10)
306
+ doc_orientation_switch = gr.Checkbox(label="Enable orientation classification", value=True, min_width=10)
307
 
308
  if complex_document_examples:
309
  complex_paths = [e[0] for e in complex_document_examples] # 这里是 List[str]
 
351
 
352
  file_doc.change(fn=on_file_doc_change, inputs=[file_doc], outputs=[example_url_doc, preview_doc_html])
353
 
354
+ def parse_doc_router(fp, example_url, use_chart, use_unwarping, use_orientation):
355
  src = fp if fp else example_url
356
  if not src:
357
  raise gr.Error("Please upload an image or pick an example first.")
358
+ return handle_complex_doc(src, use_chart, use_unwarping, use_orientation)
359
 
360
+ btn_parse.click(fn=parse_doc_router, inputs=[file_doc, example_url_doc, chart_parsing_switch, doc_unwarping_switch, doc_orientation_switch],
361
  outputs=[md_preview_doc, vis_image_doc, md_raw_doc])
362
 
363
  # ===================== Element-level Recognition =====================