Spaces:

ricklon
/

DeepSeek-OCR-2-Math

Running on Zero

App Files Files Community

ricklon commited on 7 days ago

Commit

7b8199f

1 Parent(s): 152c5bd

Simplify region workflow: visible selector and single Extract path

Browse files

Files changed (1) hide show

app.py +16 -68

app.py CHANGED Viewed

@@ -782,26 +782,6 @@ def _extract_editor_image(editor_value):
             return background
     return None
-def _dedupe_consecutive_lines(text: str) -> str:
-    if not text:
-        return text
-    out = []
-    prev = None
-    blank_count = 0
-    for line in text.splitlines():
-        if not line.strip():
-            blank_count += 1
-            if blank_count <= 2:
-                out.append("")
-            continue
-        blank_count = 0
-        norm = re.sub(r'\s+', ' ', line).strip()
-        if norm and norm == prev:
-            continue
-        out.append(line)
-        prev = norm
-    return "\n".join(out).strip()
 def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
     text_display = re.sub(
         r'\\\[(.+?)\\\]',
@@ -830,30 +810,8 @@ def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
         img_out,
         gallery_items,
         gr.DownloadButton(value=dl_tmp.name, visible=True),
-        text_display,
-        markdown_html,
     )
-def run_region(editor_value, task, custom_prompt, enable_equation_zoom):
-    image = _extract_editor_image(editor_value)
-    if image is None:
-        msg = "Draw/crop a region first, then click OCR Region."
-        return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
-    cleaned, markdown, raw, img_out, crops = process_image(
-        image,
-        task,
-        custom_prompt,
-        enable_equation_zoom=enable_equation_zoom,
-        infer_crop_mode=False,
-    )
-    # Region workflows are single-area; collapse obvious duplicate lines.
-    cleaned = _dedupe_consecutive_lines(cleaned)
-    markdown = _dedupe_consecutive_lines(markdown)
-    gallery_items = [image] + (crops or [])
-    return _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items)
 def toggle_prompt(task):
     if task == "✏️ Custom":
         return gr.update(visible=True, label="Custom Prompt", placeholder="Add <|grounding|> for bounding boxes")
@@ -908,7 +866,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
     """)
     region_editor = None
-    region_btn = None
     with gr.Row():
         with gr.Column(scale=1):
             file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
@@ -921,23 +878,22 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
                 """
                 **Quick use**
                 1. `Entire Page`: click **Extract**.
-                2. `Selected Region`: open **Region Selector**, draw a box around the target (no painting), crop, then click **Extract**.
                 3. Check **Cropped Images** to confirm the selected region used for OCR.
                 """
             )
             prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
             btn = gr.Button("Extract", variant="primary", size="lg")
-            with gr.Accordion("Region Selector (Draw/Crop)", open=False):
-                if HAS_IMAGE_EDITOR:
-                    region_editor = gr.ImageEditor(
-                        label="Draw a rectangle around what you want (do not paint/fill), crop, then run Extract with Input Scope=Selected Region.",
-                        type="pil",
-                        height=300,
-                    )
-                    region_btn = gr.Button("OCR Region", variant="secondary")
-                else:
-                    gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
-                    region_editor = gr.State(None)
         with gr.Column(scale=2):
             with gr.Tabs() as tabs:
@@ -957,9 +913,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
                     gallery = gr.Gallery(show_label=False, columns=3, height=400)
                 with gr.Tab("Raw Text", id="tab_raw"):
                     raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
-                with gr.Tab("Region OCR", id="tab_region"):
-                    region_text_out = gr.Textbox(lines=12, buttons=["copy"], label="Region OCR Text")
-                    region_html_out = gr.HTML("")
             download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
     with gr.Accordion("Image Examples", open=True):
@@ -1001,7 +954,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
         - **Locate**: Find and highlight where specific text appears (grounding ✅)
         - **Describe**: General image description
         - **Custom**: Your own prompt
-        - **Region OCR (new)**: In the left panel, open **Region Selector (Draw/Crop)**, draw/crop a target area, then click **OCR Region** (or set Input Scope to Selected Region and click Extract)
         - **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
         - **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
@@ -1023,15 +976,10 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
     page_selector.change(load_image, [file_in, page_selector], [input_img])
     task.change(toggle_prompt, [task], [prompt])
     task.change(select_boxes, [task], [tabs])
-    if HAS_IMAGE_EDITOR and region_editor is not None and region_btn is not None:
         file_in.change(load_image, [file_in, page_selector], [region_editor])
         page_selector.change(load_image, [file_in, page_selector], [region_editor])
         input_img.change(lambda img: img, [input_img], [region_editor])
-        region_btn.click(
-            run_region,
-            [region_editor, task, prompt, equation_zoom],
-            [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
-        )
     def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
         selected_region = None
@@ -1039,7 +987,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
             selected_region = _extract_editor_image(region_value)
             if selected_region is None:
                 msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
-                return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
             cleaned, markdown, raw, img_out, crops = process_image(
                 selected_region,
                 task,
@@ -1065,14 +1013,14 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
             )
         else:
             msg = "Error: Upload a file or image"
-            return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
         return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
     submit_event = btn.click(
         run,
         [input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
-        [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
     )
     submit_event.then(select_boxes, [task], [tabs])

             return background
     return None
 def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
     text_display = re.sub(
         r'\\\[(.+?)\\\]',
         img_out,
         gallery_items,
         gr.DownloadButton(value=dl_tmp.name, visible=True),
     )
 def toggle_prompt(task):
     if task == "✏️ Custom":
         return gr.update(visible=True, label="Custom Prompt", placeholder="Add <|grounding|> for bounding boxes")
     """)
     region_editor = None
     with gr.Row():
         with gr.Column(scale=1):
             file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
                 """
                 **Quick use**
                 1. `Entire Page`: click **Extract**.
+                2. `Selected Region`: use the Region Selector below, draw a box around the target (no painting), crop, then click **Extract**.
                 3. Check **Cropped Images** to confirm the selected region used for OCR.
                 """
             )
             prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
             btn = gr.Button("Extract", variant="primary", size="lg")
+            gr.Markdown("**Region Selector (for Selected Region scope)**")
+            if HAS_IMAGE_EDITOR:
+                region_editor = gr.ImageEditor(
+                    label="Draw a rectangle around what you want (do not paint/fill), crop, then run Extract with Input Scope=Selected Region.",
+                    type="pil",
+                    height=300,
+                )
+            else:
+                gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
+                region_editor = gr.State(None)
         with gr.Column(scale=2):
             with gr.Tabs() as tabs:
                     gallery = gr.Gallery(show_label=False, columns=3, height=400)
                 with gr.Tab("Raw Text", id="tab_raw"):
                     raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
             download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
     with gr.Accordion("Image Examples", open=True):
         - **Locate**: Find and highlight where specific text appears (grounding ✅)
         - **Describe**: General image description
         - **Custom**: Your own prompt
+        - **Region selection**: Use **Input Scope=Selected Region**, draw/crop in Region Selector, then click **Extract**
         - **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
         - **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
     page_selector.change(load_image, [file_in, page_selector], [input_img])
     task.change(toggle_prompt, [task], [prompt])
     task.change(select_boxes, [task], [tabs])
+    if HAS_IMAGE_EDITOR and region_editor is not None:
         file_in.change(load_image, [file_in, page_selector], [region_editor])
         page_selector.change(load_image, [file_in, page_selector], [region_editor])
         input_img.change(lambda img: img, [input_img], [region_editor])
     def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
         selected_region = None
             selected_region = _extract_editor_image(region_value)
             if selected_region is None:
                 msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
+                return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
             cleaned, markdown, raw, img_out, crops = process_image(
                 selected_region,
                 task,
             )
         else:
             msg = "Error: Upload a file or image"
+            return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
         return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
     submit_event = btn.click(
         run,
         [input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
+        [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn]
     )
     submit_event.then(select_boxes, [task], [tabs])