Spaces:

tachiwin
/

document-ocr

Running

App Files Files Community

Luis J Camargo commited on 1 day ago

Commit

b107ea6

1 Parent(s): 2147761

refactor: Migrate image inputs from `gr.File` to `gr.Image` for native preview support, removing custom preview logic, and add full progress display for inference tasks.

Browse files

Files changed (1) hide show

app.py +42 -37

app.py CHANGED Viewed

@@ -167,22 +167,7 @@ def _escape_inequalities_in_math(md: str) -> str:
         md = pat.sub(lambda m: m.group(0).replace(m.group(1), fix(m.group(1))), md)
     return md
-def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
-    if not path_or_url:
-        return gr.update(value="", visible=False)
-    is_url = isinstance(path_or_url, str) and path_or_url.startswith(("http://", "https://"))
-    if is_url:
-        src = path_or_url
-    else:
-        src = image_to_base64_data_url(path_or_url)
-    html_content = f"""
-    <div class="uploaded-image" style="background: white; padding: 10px; border-radius: 8px;">
-        <img src="{src}" alt="Preview" style="width:100%; height:auto; max-height:800px; object-fit:contain;"/>
-    </div>
-    """
-    return gr.update(value=html_content, visible=True)
 # --- Inference Logic ---
@@ -211,20 +196,28 @@ def run_inference(img_path, task_type="ocr"):
         os.makedirs(run_output_dir, exist_ok=True)
         for i, res in enumerate(output):
             res.save_to_json(save_path=run_output_dir)
             res.save_to_markdown(save_path=run_output_dir)
             res.print()
             fnames = os.listdir(run_output_dir)
             for fname in fnames:
                 fpath = os.path.join(run_output_dir, fname)
                 if fname.endswith(".md"):
                     with open(fpath, 'r', encoding='utf-8') as f:
-                        md_content += f.read() + "\n\n"
                 elif fname.endswith(".json"):
                     with open(fpath, 'r', encoding='utf-8') as f:
-                        json_content += f.read() + "\n\n"
-                elif fname.endswith((".png", ".jpg", ".jpeg")) and "res" in fname:
                     vis_src = image_to_base64_data_url(fpath)
                     vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
                     vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
@@ -275,12 +268,12 @@ with gr.Blocks() as demo:
         gr.Markdown(f"**⚡ Status:** {status_text} | **Model:** `{REPO_ID}` | **Hardware:** CPU")
     with gr.Tabs():
         with gr.Tab("📄 Full Document Parsing"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_doc = gr.File(label="Upload Document", type="filepath")
-                    preview_doc_html = gr.HTML(visible=False)
-                    btn_parse = gr.Button("🔍 Start Parsing", variant="primary")
                     with gr.Row():
                         chart_switch = gr.Checkbox(label="Chart OCR", value=True)
                         unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
@@ -290,23 +283,27 @@ with gr.Blocks() as demo:
                         with gr.Tab("📝 Markdown View"):
                             md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
                         with gr.Tab("🖼️ Visual Results"):
-                            vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Waiting for results...</div>')
                         with gr.Tab("📜 Raw Source"):
                             md_raw_doc = gr.Code(language="markdown")
-            file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
             def parse_doc_wrapper(fp, ch, uw):
                 res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
                 return res_preview, res_vis, res_raw
-            btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
         with gr.Tab("🧩 Specific Recognition"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_vl = gr.File(label="Upload Element", type="filepath")
-                    preview_vl_html = gr.HTML(visible=False)
                     with gr.Row():
                         btn_ocr = gr.Button("Text", variant="secondary")
                         btn_formula = gr.Button("Formula", variant="secondary")
@@ -319,36 +316,44 @@ with gr.Blocks() as demo:
                         with gr.Tab("📜 Source"):
                             md_raw_vl = gr.Code(language="markdown")
-            file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
             def run_vl_wrapper(fp, prompt):
                 res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
                 return res_preview, res_raw
             for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
-                btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
         with gr.Tab("📍 Feature Spotting"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_spot = gr.File(label="Target Image", type="filepath")
-                    preview_spot_html = gr.HTML(visible=False)
                     btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("🖼️ Detection"):
-                            vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Visual detection here.</div>')
                         with gr.Tab("💾 JSON Feed"):
                             json_spot = gr.Code(label="JSON", language="json")
-            file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
             def run_spotting_wrapper(fp):
                 _, _, vis, js = run_inference(fp, task_type="Spotting")
                 return vis, js
-            btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
     gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")

         md = pat.sub(lambda m: m.group(0).replace(m.group(1), fix(m.group(1))), md)
     return md
+# Removed update_preview_visibility as gr.Image handles previews natively.
 # --- Inference Logic ---
         os.makedirs(run_output_dir, exist_ok=True)
         for i, res in enumerate(output):
+            logger.info(f"Processing segment {i+1}...")
+            # Save results
             res.save_to_json(save_path=run_output_dir)
             res.save_to_markdown(save_path=run_output_dir)
             res.print()
+            # Read back generated files from this segment's save
+            # Paddle naming: res_{i}.md, res_{i}.json, etc.
             fnames = os.listdir(run_output_dir)
             for fname in fnames:
                 fpath = os.path.join(run_output_dir, fname)
                 if fname.endswith(".md"):
                     with open(fpath, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                        if content not in md_content: # Avoid duplicates if listdir is messy
+                            md_content += content + "\n\n"
                 elif fname.endswith(".json"):
                     with open(fpath, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                        if content not in json_content:
+                            json_content += content + "\n\n"
+                elif fname.endswith((".png", ".jpg", ".jpeg")) and ("res" in fname or "vis" in fname):
                     vis_src = image_to_base64_data_url(fpath)
                     vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
                     vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
         gr.Markdown(f"**⚡ Status:** {status_text} | **Model:** `{REPO_ID}` | **Hardware:** CPU")
     with gr.Tabs():
+        # Document Parsing Tab
         with gr.Tab("📄 Full Document Parsing"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_doc = gr.Image(label="Upload Image", type="filepath")
+                    btn_parse = gr.Button("� Start Parsing", variant="primary")
                     with gr.Row():
                         chart_switch = gr.Checkbox(label="Chart OCR", value=True)
                         unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
                         with gr.Tab("📝 Markdown View"):
                             md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
                         with gr.Tab("🖼️ Visual Results"):
+                            vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Results will appear here.</div>')
                         with gr.Tab("📜 Raw Source"):
                             md_raw_doc = gr.Code(language="markdown")
             def parse_doc_wrapper(fp, ch, uw):
+                if not fp: return "⚠️ Please upload an image.", "", ""
                 res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
                 return res_preview, res_vis, res_raw
+            btn_parse.click(
+                parse_doc_wrapper,
+                [file_doc, chart_switch, unwarp_switch],
+                [md_preview_doc, vis_image_doc, md_raw_doc],
+                show_progress="full"
+            )
+        # Element Recognition Tab
         with gr.Tab("🧩 Specific Recognition"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_vl = gr.Image(label="Upload Element", type="filepath")
                     with gr.Row():
                         btn_ocr = gr.Button("Text", variant="secondary")
                         btn_formula = gr.Button("Formula", variant="secondary")
                         with gr.Tab("📜 Source"):
                             md_raw_vl = gr.Code(language="markdown")
             def run_vl_wrapper(fp, prompt):
+                if not fp: return "⚠️ Please upload an image.", ""
                 res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
                 return res_preview, res_raw
             for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
+                btn.click(
+                    run_vl_wrapper,
+                    [file_vl, gr.State(prompt)],
+                    [md_preview_vl, md_raw_vl],
+                    show_progress="full"
+                )
+        # Spotting Tab
         with gr.Tab("📍 Feature Spotting"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_spot = gr.Image(label="Target Image", type="filepath")
                     btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("🖼️ Detection"):
+                            vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes view.</div>')
                         with gr.Tab("💾 JSON Feed"):
                             json_spot = gr.Code(label="JSON", language="json")
             def run_spotting_wrapper(fp):
+                if not fp: return "", ""
                 _, _, vis, js = run_inference(fp, task_type="Spotting")
                 return vis, js
+            btn_run_spot.click(
+                run_spotting_wrapper,
+                file_spot,
+                [vis_image_spot, json_spot],
+                show_progress="full"
+            )
     gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")