Spaces:

ricklon
/

DeepSeek-OCR-2-Math

Running on Zero

App Files Files Community

ricklon commited on 7 days ago

Commit

fce2f1f

1 Parent(s): bc8cf96

Refactor UI into workflow-first layout with larger workspace

Browse files

Files changed (1) hide show

app.py +54 -14

app.py CHANGED Viewed

@@ -1113,6 +1113,31 @@ def select_boxes(task):
         return gr.update(selected="tab_boxes")
     return gr.update()
 def get_pdf_page_count(file_path):
     if not file_path or not file_path.lower().endswith('.pdf'):
         return 1
@@ -1172,10 +1197,16 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
     workspace_base_size = gr.State(None)
     workspace_base_image = gr.State(None)
     selected_regions_state = gr.State([])
     with gr.Row():
-        with gr.Column(scale=1):
             file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
             page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
             gr.Markdown("**Image Workspace (full page + region selection)**")
             if HAS_IMAGE_EDITOR:
                 editor_kwargs = {}
@@ -1203,35 +1234,42 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
                 region_editor = gr.ImageEditor(
                     label="Main image workspace. Recommended: freehand/highlight the target area, then click Add Region. (Crop tool for rectangles is optional.)",
                     type="pil",
-                    height=300,
                     **editor_kwargs,
                 )
             else:
                 gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
                 region_editor = gr.State(None)
             input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
-            selection_controls = gr.Row()
             with selection_controls:
                 add_region_btn = gr.Button("Add Region", variant="secondary")
                 clear_regions_btn = gr.Button("Clear Regions")
-            selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False)
-            selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=3, height=170)
-            task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
-            equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
-            separate_eq_lines = gr.Checkbox(label="Detect Equation Lines Separately", value=False)
             prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
             btn = gr.Button("Extract", variant="primary", size="lg")
-        with gr.Column(scale=2):
             with gr.Tabs() as tabs:
                 with gr.Tab("Text", id="tab_text"):
                     text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
                 with gr.Tab("Markdown Preview", id="tab_markdown"):
                     md_out = gr.HTML("")
                 with gr.Tab("Boxes", id="tab_boxes"):
-                    img_out = gr.Image(type="pil", height=500, show_label=False)
                 with gr.Tab("Cropped Images", id="tab_crops"):
-                    gallery = gr.Gallery(show_label=False, columns=3, height=400)
                 with gr.Tab("Raw Text", id="tab_raw"):
                     raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
             download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
@@ -1272,7 +1310,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
         1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
         ### Faculty Quick Workflow
-        1. Choose a task (`Markdown`, `Free OCR`, or `Locate`).
         2. Choose **Input Scope**:
            - `Entire Page` for the full page.
            - `Selected Region` for a specific area.
@@ -1284,6 +1322,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
            Then click **Extract**.
         4. Use **Clear Regions** to reset multi-select state.
         5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
         ### Tasks
         - **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
@@ -1312,6 +1351,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
     file_in.change(update_page_selector, [file_in], [page_selector])
     task.change(toggle_prompt, [task], [prompt])
     task.change(select_boxes, [task], [tabs])
     if HAS_IMAGE_EDITOR and region_editor is not None:
         file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
         page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
@@ -1398,7 +1438,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
         [file_in, task, prompt, page_selector, equation_zoom, separate_eq_lines, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
         [text_out, md_out, raw_out, img_out, gallery, download_btn]
     )
-    submit_event.then(select_boxes, [task], [tabs])
 if __name__ == "__main__":
     # server_name="0.0.0.0" is needed locally (WSL2 → Windows access)

         return gr.update(selected="tab_boxes")
     return gr.update()
+def toggle_scope_ui(scope):
+    if scope == "Selected Region":
+        hint = (
+            "**Selected Region mode:** Draw/highlight on the workspace, click **Add Region** "
+            "for each target area, then click **Extract**."
+        )
+        return (
+            gr.update(value=hint),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+        )
+    hint = "**Entire Page mode:** No drawing needed. Click **Extract** to process the full page."
+    return (
+        gr.update(value=hint),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+    )
+def select_post_extract_tab(task, scope):
+    if scope == "Selected Region" or task == "📍 Locate":
+        return gr.update(selected="tab_boxes")
+    return gr.update(selected="tab_text")
 def get_pdf_page_count(file_path):
     if not file_path or not file_path.lower().endswith('.pdf'):
         return 1
     workspace_base_size = gr.State(None)
     workspace_base_image = gr.State(None)
     selected_regions_state = gr.State([])
     with gr.Row():
+        with gr.Column(scale=3):
             file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
+        with gr.Column(scale=1):
             page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
+    with gr.Row():
+        with gr.Column(scale=3):
+            workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
             gr.Markdown("**Image Workspace (full page + region selection)**")
             if HAS_IMAGE_EDITOR:
                 editor_kwargs = {}
                 region_editor = gr.ImageEditor(
                     label="Main image workspace. Recommended: freehand/highlight the target area, then click Add Region. (Crop tool for rectangles is optional.)",
                     type="pil",
+                    height=640,
                     **editor_kwargs,
                 )
             else:
                 gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
                 region_editor = gr.State(None)
+        with gr.Column(scale=1):
+            gr.Markdown("### OCR Workflow")
+            task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
             input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
+            selection_controls = gr.Row(visible=False)
             with selection_controls:
                 add_region_btn = gr.Button("Add Region", variant="secondary")
                 clear_regions_btn = gr.Button("Clear Regions")
+            selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False, visible=False)
+            selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=2, height=190, visible=False)
+            with gr.Accordion("Advanced Options", open=False):
+                equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
+                separate_eq_lines = gr.Checkbox(label="Detect Equation Lines Separately", value=False)
             prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
             btn = gr.Button("Extract", variant="primary", size="lg")
+    with gr.Row():
+        with gr.Column(scale=1):
             with gr.Tabs() as tabs:
                 with gr.Tab("Text", id="tab_text"):
                     text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
                 with gr.Tab("Markdown Preview", id="tab_markdown"):
                     md_out = gr.HTML("")
                 with gr.Tab("Boxes", id="tab_boxes"):
+                    img_out = gr.Image(type="pil", height=560, show_label=False)
                 with gr.Tab("Cropped Images", id="tab_crops"):
+                    gallery = gr.Gallery(show_label=False, columns=3, height=420)
                 with gr.Tab("Raw Text", id="tab_raw"):
                     raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
             download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
         1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
         ### Faculty Quick Workflow
+        1. Upload a page/image, then confirm **Task**.
         2. Choose **Input Scope**:
            - `Entire Page` for the full page.
            - `Selected Region` for a specific area.
            Then click **Extract**.
         4. Use **Clear Regions** to reset multi-select state.
         5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
+        6. Use **Advanced Options** only when needed (Equation Zoom / line-by-line equation OCR).
         ### Tasks
         - **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
     file_in.change(update_page_selector, [file_in], [page_selector])
     task.change(toggle_prompt, [task], [prompt])
     task.change(select_boxes, [task], [tabs])
+    input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
     if HAS_IMAGE_EDITOR and region_editor is not None:
         file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
         page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
         [file_in, task, prompt, page_selector, equation_zoom, separate_eq_lines, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
         [text_out, md_out, raw_out, img_out, gallery, download_btn]
     )
+    submit_event.then(select_post_extract_tab, [task, input_scope], [tabs])
 if __name__ == "__main__":
     # server_name="0.0.0.0" is needed locally (WSL2 → Windows access)