root commited on
Commit
b9d6b1d
·
1 Parent(s): d7600ce

returnback

Browse files
Files changed (1) hide show
  1. app.py +23 -39
app.py CHANGED
@@ -64,46 +64,14 @@ def _escape_inequalities_in_math(md: str) -> str:
64
  return md
65
 
66
  def _get_examples_from_dir(dir_path: str) -> List[List[str]]:
67
-
68
- BASE_URL = "https://paddle-model-ecology.bj.bcebos.com/PPOCRVL/dataset/examples"
69
  supported_exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
70
  examples = []
71
- if not os.path.exists(dir_path):
72
- print(f"Warning: example dir {dir_path} not found.")
73
- return []
74
  for filename in sorted(os.listdir(dir_path)):
75
- ext = os.path.splitext(filename)[1].lower()
76
- if ext in supported_exts:
77
- subdir = os.path.basename(dir_path.rstrip("/"))
78
- img_url = f"{BASE_URL}/{subdir}/{filename}"
79
- examples.append([img_url])
80
  return examples
81
 
82
-
83
- import tempfile
84
- from urllib.parse import urlparse
85
-
86
- def _download_to_temp(url: str) -> str:
87
- r = requests.get(url, timeout=600)
88
- r.raise_for_status()
89
- ext = os.path.splitext(urlparse(url).path)[1] or ".jpg"
90
- fd, tmp_path = tempfile.mkstemp(suffix=ext)
91
- with os.fdopen(fd, "wb") as f:
92
- f.write(r.content)
93
- return tmp_path
94
-
95
- def _on_gallery_select(example_paths: List[str], evt: gr.SelectData):
96
- idx = evt.index
97
- selected_path = example_paths[idx]
98
- if isinstance(selected_path, list):
99
- selected_path = selected_path[0]
100
- if isinstance(selected_path, str) and selected_path.startswith(("http://", "https://")):
101
- tmp_file = _download_to_temp(selected_path)
102
- print(f"Selected remote example -> tmp file: {tmp_file}")
103
- return tmp_file
104
- else:
105
- return selected_path
106
-
107
  TARGETED_EXAMPLES_DIR = "examples/targeted"
108
  COMPLEX_EXAMPLES_DIR = "examples/complex"
109
  targeted_recognition_examples = _get_examples_from_dir(TARGETED_EXAMPLES_DIR)
@@ -127,7 +95,12 @@ def update_preview_visibility(file_path: Optional[str]) -> Dict:
127
  else:
128
  return gr.update(value="", visible=False)
129
 
130
-
 
 
 
 
 
131
 
132
  # =========================
133
  # API Call Logic
@@ -175,7 +148,6 @@ def _call_api(api_url: str, file_path: str, use_layout_detection: bool,
175
  return data
176
 
177
 
178
-
179
  def _process_api_response_page(result: Dict[str, Any]) -> Tuple[str, str, str]:
180
  """
181
  Processes the API response.
@@ -188,6 +160,7 @@ def _process_api_response_page(result: Dict[str, Any]) -> Tuple[str, str, str]:
188
 
189
  page0 = layout_results[0] or {}
190
 
 
191
  md_data = page0.get("markdown") or {}
192
  md_text = md_data.get("text", "") or ""
193
  md_images_map = md_data.get("images", {})
@@ -196,19 +169,24 @@ def _process_api_response_page(result: Dict[str, Any]) -> Tuple[str, str, str]:
196
  md_text = md_text.replace(f'src="{placeholder_path}"', f'src="{image_url}"') \
197
  .replace(f']({placeholder_path})', f']({image_url})')
198
 
 
199
  output_html = "<p style='text-align:center; color:#888;'>No visualization image available.</p>"
200
  out_imgs = page0.get("outputImages") or {}
201
 
 
202
  sorted_urls = [img_url for _, img_url in sorted(out_imgs.items()) if img_url]
203
 
 
204
  output_image_url: Optional[str] = None
205
  if len(sorted_urls) >= 2:
206
  output_image_url = sorted_urls[1]
207
  elif sorted_urls:
208
  output_image_url = sorted_urls[0]
209
 
 
210
  if output_image_url:
211
  print(f"Found visualization image URL: {output_image_url}")
 
212
  output_html = f'<img src="{output_image_url}" alt="Detection Visualization">'
213
  else:
214
  print("Warning: No visualization image URL found in the API response.")
@@ -216,10 +194,14 @@ def _process_api_response_page(result: Dict[str, Any]) -> Tuple[str, str, str]:
216
  md_text = _escape_inequalities_in_math(md_text)
217
  return md_text or "(Empty result)", output_html, md_text
218
 
 
 
 
219
  def handle_complex_doc(file_path: str, use_chart_recognition: bool) -> Tuple[str, str, str]:
220
  if not file_path: raise gr.Error("Please upload an image first.")
221
  data = _call_api(DEFAULT_API_URL, file_path, use_layout_detection=True, prompt_label=None, use_chart_recognition=use_chart_recognition)
222
  result = data.get("result", {})
 
223
  return _process_api_response_page(result)
224
 
225
  def handle_targeted_recognition(file_path: str, prompt_choice: str) -> Tuple[str, str]:
@@ -263,7 +245,8 @@ body, .gradio-container {
263
  with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as demo:
264
  logo_data_url = image_to_base64_data_url(LOGO_IMAGE_PATH) if os.path.exists(LOGO_IMAGE_PATH) else ""
265
  gr.HTML(f"""<div class="app-header"><img src="{logo_data_url}" alt="App Logo" style="max-height:10%; width: auto; margin: 10px auto; display: block;"></div>""")
266
- gr.HTML("""<div class="quick-links"><a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">GitHub</a> | <a href="https://ernie.baidu.com/blog/publication/PaddleOCR-VL_Technical_Report.pdf" target="_blank">Technical Report</a> | <a href="https://www.modelscope.cn/models/PaddlePaddle/PaddleOCR-VL" target="_blank">Model</a></div>""")
 
267
 
268
  with gr.Tabs():
269
  with gr.Tab("Document Parsing"):
@@ -288,6 +271,7 @@ with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as
288
  with gr.Tab("Markdown Preview"):
289
  md_preview_doc = gr.Markdown("Please upload an image and click 'Parse Document'.", latex_delimiters=LATEX_DELIMS, elem_id="md_preview_doc")
290
  with gr.Tab("Visualization"):
 
291
  vis_image_doc = gr.HTML(label="Detection Visualization", elem_id="vis_image_doc")
292
  with gr.Tab("Markdown Source"):
293
  md_raw_doc = gr.Code(label="Markdown Source Code", language="markdown")
@@ -329,5 +313,5 @@ with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as
329
  btn_chart.click(fn=handle_targeted_recognition, inputs=[file_vl, gr.State("Chart Recognition")], outputs=[md_preview_vl, md_raw_vl])
330
 
331
  if __name__ == "__main__":
332
- port = int(os.getenv("PORT", "7860"))
333
  demo.queue(max_size=64).launch(server_name="0.0.0.0", server_port=port,share=False)
 
64
  return md
65
 
66
  def _get_examples_from_dir(dir_path: str) -> List[List[str]]:
 
 
67
  supported_exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
68
  examples = []
69
+ if not os.path.exists(dir_path): return []
 
 
70
  for filename in sorted(os.listdir(dir_path)):
71
+ if os.path.splitext(filename)[1].lower() in supported_exts:
72
+ examples.append([os.path.join(dir_path, filename)])
 
 
 
73
  return examples
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  TARGETED_EXAMPLES_DIR = "examples/targeted"
76
  COMPLEX_EXAMPLES_DIR = "examples/complex"
77
  targeted_recognition_examples = _get_examples_from_dir(TARGETED_EXAMPLES_DIR)
 
95
  else:
96
  return gr.update(value="", visible=False)
97
 
98
+ def _on_gallery_select(example_paths: List[str], evt: gr.SelectData):
99
+ try:
100
+ idx = evt.index
101
+ return example_paths[idx]
102
+ except Exception:
103
+ return None
104
 
105
  # =========================
106
  # API Call Logic
 
148
  return data
149
 
150
 
 
151
  def _process_api_response_page(result: Dict[str, Any]) -> Tuple[str, str, str]:
152
  """
153
  Processes the API response.
 
160
 
161
  page0 = layout_results[0] or {}
162
 
163
+ # Step 1: Process Markdown content (unchanged from previous optimization)
164
  md_data = page0.get("markdown") or {}
165
  md_text = md_data.get("text", "") or ""
166
  md_images_map = md_data.get("images", {})
 
169
  md_text = md_text.replace(f'src="{placeholder_path}"', f'src="{image_url}"') \
170
  .replace(f']({placeholder_path})', f']({image_url})')
171
 
172
+ # 【核心改动点】 Step 2: Process Visualization images by creating an HTML string
173
  output_html = "<p style='text-align:center; color:#888;'>No visualization image available.</p>"
174
  out_imgs = page0.get("outputImages") or {}
175
 
176
+ # Get all image URLs and sort them
177
  sorted_urls = [img_url for _, img_url in sorted(out_imgs.items()) if img_url]
178
 
179
+ # Logic to select the final visualization image URL
180
  output_image_url: Optional[str] = None
181
  if len(sorted_urls) >= 2:
182
  output_image_url = sorted_urls[1]
183
  elif sorted_urls:
184
  output_image_url = sorted_urls[0]
185
 
186
+ # If a URL was found, create the <img> tag
187
  if output_image_url:
188
  print(f"Found visualization image URL: {output_image_url}")
189
+ # The CSS will style this `img` tag because of the `#vis_image_doc img` selector
190
  output_html = f'<img src="{output_image_url}" alt="Detection Visualization">'
191
  else:
192
  print("Warning: No visualization image URL found in the API response.")
 
194
  md_text = _escape_inequalities_in_math(md_text)
195
  return md_text or "(Empty result)", output_html, md_text
196
 
197
+ # =========================
198
+ # Handlers
199
+ # =========================
200
  def handle_complex_doc(file_path: str, use_chart_recognition: bool) -> Tuple[str, str, str]:
201
  if not file_path: raise gr.Error("Please upload an image first.")
202
  data = _call_api(DEFAULT_API_URL, file_path, use_layout_detection=True, prompt_label=None, use_chart_recognition=use_chart_recognition)
203
  result = data.get("result", {})
204
+ # Note the return types now align with the new function signature
205
  return _process_api_response_page(result)
206
 
207
  def handle_targeted_recognition(file_path: str, prompt_choice: str) -> Tuple[str, str]:
 
245
  with gr.Blocks(head=GOOGLE_FONTS_URL, css=custom_css, theme=gr.themes.Soft()) as demo:
246
  logo_data_url = image_to_base64_data_url(LOGO_IMAGE_PATH) if os.path.exists(LOGO_IMAGE_PATH) else ""
247
  gr.HTML(f"""<div class="app-header"><img src="{logo_data_url}" alt="App Logo" style="max-height:10%; width: auto; margin: 10px auto; display: block;"></div>""")
248
+ gr.HTML("""<div class="notice"><strong>Heads up:</strong> The Hugging Face demo can be slow at times. For a faster experience, please try <a href="https://aistudio.baidu.com/application/detail/98365" target="_blank" rel="noopener noreferrer">Baidu AI Studio</a> or <a href="https://modelscope.cn/studios/PaddlePaddle/PaddleOCR-VL_Online_Demo/summary" target="_blank" rel="noopener noreferrer">ModelScope</a>.</div>""")
249
+ gr.HTML("""<div class="quick-links"><a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">GitHub</a> | <a href="https://ernie.baidu.com/blog/publication/PaddleOCR-VL_Technical_Report.pdf" target="_blank">Technical Report</a> | <a href="https://huggingface.co/PaddlePaddle/PaddleOCR-VL" target="_blank">Model</a></div>""")
250
 
251
  with gr.Tabs():
252
  with gr.Tab("Document Parsing"):
 
271
  with gr.Tab("Markdown Preview"):
272
  md_preview_doc = gr.Markdown("Please upload an image and click 'Parse Document'.", latex_delimiters=LATEX_DELIMS, elem_id="md_preview_doc")
273
  with gr.Tab("Visualization"):
274
+ # 【核心改动点】: 将 gr.Image 替换为 gr.HTML
275
  vis_image_doc = gr.HTML(label="Detection Visualization", elem_id="vis_image_doc")
276
  with gr.Tab("Markdown Source"):
277
  md_raw_doc = gr.Code(label="Markdown Source Code", language="markdown")
 
313
  btn_chart.click(fn=handle_targeted_recognition, inputs=[file_vl, gr.State("Chart Recognition")], outputs=[md_preview_vl, md_raw_vl])
314
 
315
  if __name__ == "__main__":
316
+ port = int(os.getenv("PORT", "8812"))
317
  demo.queue(max_size=64).launch(server_name="0.0.0.0", server_port=port,share=False)