Luis J Camargo commited on
Commit
b107ea6
Β·
1 Parent(s): 2147761

refactor: Migrate image inputs from `gr.File` to `gr.Image` for native preview support, removing custom preview logic, and add full progress display for inference tasks.

Browse files
Files changed (1) hide show
  1. app.py +42 -37
app.py CHANGED
@@ -167,22 +167,7 @@ def _escape_inequalities_in_math(md: str) -> str:
167
  md = pat.sub(lambda m: m.group(0).replace(m.group(1), fix(m.group(1))), md)
168
  return md
169
 
170
- def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
171
- if not path_or_url:
172
- return gr.update(value="", visible=False)
173
-
174
- is_url = isinstance(path_or_url, str) and path_or_url.startswith(("http://", "https://"))
175
- if is_url:
176
- src = path_or_url
177
- else:
178
- src = image_to_base64_data_url(path_or_url)
179
-
180
- html_content = f"""
181
- <div class="uploaded-image" style="background: white; padding: 10px; border-radius: 8px;">
182
- <img src="{src}" alt="Preview" style="width:100%; height:auto; max-height:800px; object-fit:contain;"/>
183
- </div>
184
- """
185
- return gr.update(value=html_content, visible=True)
186
 
187
  # --- Inference Logic ---
188
 
@@ -211,20 +196,28 @@ def run_inference(img_path, task_type="ocr"):
211
  os.makedirs(run_output_dir, exist_ok=True)
212
 
213
  for i, res in enumerate(output):
 
 
214
  res.save_to_json(save_path=run_output_dir)
215
  res.save_to_markdown(save_path=run_output_dir)
216
  res.print()
217
 
 
 
218
  fnames = os.listdir(run_output_dir)
219
  for fname in fnames:
220
  fpath = os.path.join(run_output_dir, fname)
221
  if fname.endswith(".md"):
222
  with open(fpath, 'r', encoding='utf-8') as f:
223
- md_content += f.read() + "\n\n"
 
 
224
  elif fname.endswith(".json"):
225
  with open(fpath, 'r', encoding='utf-8') as f:
226
- json_content += f.read() + "\n\n"
227
- elif fname.endswith((".png", ".jpg", ".jpeg")) and "res" in fname:
 
 
228
  vis_src = image_to_base64_data_url(fpath)
229
  vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
230
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
@@ -275,12 +268,12 @@ with gr.Blocks() as demo:
275
  gr.Markdown(f"**⚑ Status:** {status_text} | **Model:** `{REPO_ID}` | **Hardware:** CPU")
276
 
277
  with gr.Tabs():
 
278
  with gr.Tab("πŸ“„ Full Document Parsing"):
279
  with gr.Row():
280
  with gr.Column(scale=5):
281
- file_doc = gr.File(label="Upload Document", type="filepath")
282
- preview_doc_html = gr.HTML(visible=False)
283
- btn_parse = gr.Button("πŸ” Start Parsing", variant="primary")
284
  with gr.Row():
285
  chart_switch = gr.Checkbox(label="Chart OCR", value=True)
286
  unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
@@ -290,23 +283,27 @@ with gr.Blocks() as demo:
290
  with gr.Tab("πŸ“ Markdown View"):
291
  md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
292
  with gr.Tab("πŸ–ΌοΈ Visual Results"):
293
- vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Waiting for results...</div>')
294
  with gr.Tab("πŸ“œ Raw Source"):
295
  md_raw_doc = gr.Code(language="markdown")
296
 
297
- file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
298
-
299
  def parse_doc_wrapper(fp, ch, uw):
 
300
  res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
301
  return res_preview, res_vis, res_raw
302
 
303
- btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
 
 
 
 
 
304
 
 
305
  with gr.Tab("🧩 Specific Recognition"):
306
  with gr.Row():
307
  with gr.Column(scale=5):
308
- file_vl = gr.File(label="Upload Element", type="filepath")
309
- preview_vl_html = gr.HTML(visible=False)
310
  with gr.Row():
311
  btn_ocr = gr.Button("Text", variant="secondary")
312
  btn_formula = gr.Button("Formula", variant="secondary")
@@ -319,36 +316,44 @@ with gr.Blocks() as demo:
319
  with gr.Tab("πŸ“œ Source"):
320
  md_raw_vl = gr.Code(language="markdown")
321
 
322
- file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
323
-
324
  def run_vl_wrapper(fp, prompt):
 
325
  res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
326
  return res_preview, res_raw
327
 
328
  for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
329
- btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
 
 
 
 
 
330
 
 
331
  with gr.Tab("πŸ“ Feature Spotting"):
332
  with gr.Row():
333
  with gr.Column(scale=5):
334
- file_spot = gr.File(label="Target Image", type="filepath")
335
- preview_spot_html = gr.HTML(visible=False)
336
  btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
337
 
338
  with gr.Column(scale=7):
339
  with gr.Tabs():
340
  with gr.Tab("πŸ–ΌοΈ Detection"):
341
- vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Visual detection here.</div>')
342
  with gr.Tab("πŸ’Ύ JSON Feed"):
343
  json_spot = gr.Code(label="JSON", language="json")
344
 
345
- file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
346
-
347
  def run_spotting_wrapper(fp):
 
348
  _, _, vis, js = run_inference(fp, task_type="Spotting")
349
  return vis, js
350
 
351
- btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
 
 
 
 
 
352
 
353
  gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")
354
 
 
167
  md = pat.sub(lambda m: m.group(0).replace(m.group(1), fix(m.group(1))), md)
168
  return md
169
 
170
+ # Removed update_preview_visibility as gr.Image handles previews natively.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  # --- Inference Logic ---
173
 
 
196
  os.makedirs(run_output_dir, exist_ok=True)
197
 
198
  for i, res in enumerate(output):
199
+ logger.info(f"Processing segment {i+1}...")
200
+ # Save results
201
  res.save_to_json(save_path=run_output_dir)
202
  res.save_to_markdown(save_path=run_output_dir)
203
  res.print()
204
 
205
+ # Read back generated files from this segment's save
206
+ # Paddle naming: res_{i}.md, res_{i}.json, etc.
207
  fnames = os.listdir(run_output_dir)
208
  for fname in fnames:
209
  fpath = os.path.join(run_output_dir, fname)
210
  if fname.endswith(".md"):
211
  with open(fpath, 'r', encoding='utf-8') as f:
212
+ content = f.read()
213
+ if content not in md_content: # Avoid duplicates if listdir is messy
214
+ md_content += content + "\n\n"
215
  elif fname.endswith(".json"):
216
  with open(fpath, 'r', encoding='utf-8') as f:
217
+ content = f.read()
218
+ if content not in json_content:
219
+ json_content += content + "\n\n"
220
+ elif fname.endswith((".png", ".jpg", ".jpeg")) and ("res" in fname or "vis" in fname):
221
  vis_src = image_to_base64_data_url(fpath)
222
  vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
223
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
 
268
  gr.Markdown(f"**⚑ Status:** {status_text} | **Model:** `{REPO_ID}` | **Hardware:** CPU")
269
 
270
  with gr.Tabs():
271
+ # Document Parsing Tab
272
  with gr.Tab("πŸ“„ Full Document Parsing"):
273
  with gr.Row():
274
  with gr.Column(scale=5):
275
+ file_doc = gr.Image(label="Upload Image", type="filepath")
276
+ btn_parse = gr.Button("οΏ½ Start Parsing", variant="primary")
 
277
  with gr.Row():
278
  chart_switch = gr.Checkbox(label="Chart OCR", value=True)
279
  unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
 
283
  with gr.Tab("πŸ“ Markdown View"):
284
  md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
285
  with gr.Tab("πŸ–ΌοΈ Visual Results"):
286
+ vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Results will appear here.</div>')
287
  with gr.Tab("πŸ“œ Raw Source"):
288
  md_raw_doc = gr.Code(language="markdown")
289
 
 
 
290
  def parse_doc_wrapper(fp, ch, uw):
291
+ if not fp: return "⚠️ Please upload an image.", "", ""
292
  res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
293
  return res_preview, res_vis, res_raw
294
 
295
+ btn_parse.click(
296
+ parse_doc_wrapper,
297
+ [file_doc, chart_switch, unwarp_switch],
298
+ [md_preview_doc, vis_image_doc, md_raw_doc],
299
+ show_progress="full"
300
+ )
301
 
302
+ # Element Recognition Tab
303
  with gr.Tab("🧩 Specific Recognition"):
304
  with gr.Row():
305
  with gr.Column(scale=5):
306
+ file_vl = gr.Image(label="Upload Element", type="filepath")
 
307
  with gr.Row():
308
  btn_ocr = gr.Button("Text", variant="secondary")
309
  btn_formula = gr.Button("Formula", variant="secondary")
 
316
  with gr.Tab("πŸ“œ Source"):
317
  md_raw_vl = gr.Code(language="markdown")
318
 
 
 
319
  def run_vl_wrapper(fp, prompt):
320
+ if not fp: return "⚠️ Please upload an image.", ""
321
  res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
322
  return res_preview, res_raw
323
 
324
  for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
325
+ btn.click(
326
+ run_vl_wrapper,
327
+ [file_vl, gr.State(prompt)],
328
+ [md_preview_vl, md_raw_vl],
329
+ show_progress="full"
330
+ )
331
 
332
+ # Spotting Tab
333
  with gr.Tab("πŸ“ Feature Spotting"):
334
  with gr.Row():
335
  with gr.Column(scale=5):
336
+ file_spot = gr.Image(label="Target Image", type="filepath")
 
337
  btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
338
 
339
  with gr.Column(scale=7):
340
  with gr.Tabs():
341
  with gr.Tab("πŸ–ΌοΈ Detection"):
342
+ vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes view.</div>')
343
  with gr.Tab("πŸ’Ύ JSON Feed"):
344
  json_spot = gr.Code(label="JSON", language="json")
345
 
 
 
346
  def run_spotting_wrapper(fp):
347
+ if not fp: return "", ""
348
  _, _, vis, js = run_inference(fp, task_type="Spotting")
349
  return vis, js
350
 
351
+ btn_run_spot.click(
352
+ run_spotting_wrapper,
353
+ file_spot,
354
+ [vis_image_spot, json_spot],
355
+ show_progress="full"
356
+ )
357
 
358
  gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")
359