Spaces:
Running on Zero
Running on Zero
Improve faculty-facing region selection guidance in UI
Browse files
app.py
CHANGED
|
@@ -915,19 +915,29 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 915 |
input_img = gr.Image(label="Input Image", type="pil", height=300)
|
| 916 |
page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
|
| 917 |
task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
|
|
|
|
| 918 |
equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 919 |
prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
|
| 920 |
btn = gr.Button("Extract", variant="primary", size="lg")
|
| 921 |
-
with gr.Accordion("Region
|
| 922 |
if HAS_IMAGE_EDITOR:
|
| 923 |
region_editor = gr.ImageEditor(
|
| 924 |
-
label="Draw a
|
| 925 |
type="pil",
|
| 926 |
height=300,
|
| 927 |
)
|
| 928 |
region_btn = gr.Button("OCR Region", variant="secondary")
|
| 929 |
else:
|
| 930 |
gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
|
|
|
|
| 931 |
|
| 932 |
with gr.Column(scale=2):
|
| 933 |
with gr.Tabs() as tabs:
|
|
@@ -977,13 +987,22 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 977 |
### Configuration
|
| 978 |
1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
|
| 979 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
### Tasks
|
| 981 |
- **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
|
| 982 |
- **Free OCR**: Read all visible text from the full page/image (no boxes, no targeting)
|
| 983 |
- **Locate**: Find and highlight where specific text appears (grounding ✅)
|
| 984 |
- **Describe**: General image description
|
| 985 |
- **Custom**: Your own prompt
|
| 986 |
-
- **Region OCR (new)**: In the left panel, open **Region
|
|
|
|
| 987 |
- **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
|
| 988 |
|
| 989 |
### Free OCR vs Locate (important)
|
|
@@ -1014,8 +1033,22 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1014 |
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
|
| 1015 |
)
|
| 1016 |
|
| 1017 |
-
def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom):
|
| 1018 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1019 |
cleaned, markdown, raw, img_out, crops = process_file(
|
| 1020 |
file_path,
|
| 1021 |
task,
|
|
@@ -1038,7 +1071,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1038 |
|
| 1039 |
submit_event = btn.click(
|
| 1040 |
run,
|
| 1041 |
-
[input_img, file_in, task, prompt, page_selector, equation_zoom],
|
| 1042 |
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
|
| 1043 |
)
|
| 1044 |
submit_event.then(select_boxes, [task], [tabs])
|
|
|
|
| 915 |
input_img = gr.Image(label="Input Image", type="pil", height=300)
|
| 916 |
page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
|
| 917 |
task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
|
| 918 |
+
input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
|
| 919 |
equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
|
| 920 |
+
gr.Markdown(
|
| 921 |
+
"""
|
| 922 |
+
**Quick use**
|
| 923 |
+
1. `Entire Page`: click **Extract**.
|
| 924 |
+
2. `Selected Region`: open **Region Selector**, draw a box around the target (no painting), crop, then click **Extract**.
|
| 925 |
+
3. Check **Cropped Images** to confirm the selected region used for OCR.
|
| 926 |
+
"""
|
| 927 |
+
)
|
| 928 |
prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
|
| 929 |
btn = gr.Button("Extract", variant="primary", size="lg")
|
| 930 |
+
with gr.Accordion("Region Selector (Draw/Crop)", open=False):
|
| 931 |
if HAS_IMAGE_EDITOR:
|
| 932 |
region_editor = gr.ImageEditor(
|
| 933 |
+
label="Draw a rectangle around what you want (do not paint/fill), crop, then run Extract with Input Scope=Selected Region.",
|
| 934 |
type="pil",
|
| 935 |
height=300,
|
| 936 |
)
|
| 937 |
region_btn = gr.Button("OCR Region", variant="secondary")
|
| 938 |
else:
|
| 939 |
gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
|
| 940 |
+
region_editor = gr.State(None)
|
| 941 |
|
| 942 |
with gr.Column(scale=2):
|
| 943 |
with gr.Tabs() as tabs:
|
|
|
|
| 987 |
### Configuration
|
| 988 |
1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
|
| 989 |
|
| 990 |
+
### Faculty Quick Workflow
|
| 991 |
+
1. Choose a task (`Markdown`, `Free OCR`, or `Locate`).
|
| 992 |
+
2. Choose **Input Scope**:
|
| 993 |
+
- `Entire Page` for the full page.
|
| 994 |
+
- `Selected Region` for a specific area.
|
| 995 |
+
3. For `Selected Region`, open **Region Selector (Draw/Crop)**, draw a box around the target (no painting/fill), crop, then click **Extract**.
|
| 996 |
+
4. Review **Cropped Images** to confirm the selected region used for OCR.
|
| 997 |
+
|
| 998 |
### Tasks
|
| 999 |
- **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
|
| 1000 |
- **Free OCR**: Read all visible text from the full page/image (no boxes, no targeting)
|
| 1001 |
- **Locate**: Find and highlight where specific text appears (grounding ✅)
|
| 1002 |
- **Describe**: General image description
|
| 1003 |
- **Custom**: Your own prompt
|
| 1004 |
+
- **Region OCR (new)**: In the left panel, open **Region Selector (Draw/Crop)**, draw/crop a target area, then click **OCR Region** (or set Input Scope to Selected Region and click Extract)
|
| 1005 |
+
- **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
|
| 1006 |
- **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
|
| 1007 |
|
| 1008 |
### Free OCR vs Locate (important)
|
|
|
|
| 1033 |
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
|
| 1034 |
)
|
| 1035 |
|
| 1036 |
+
def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
|
| 1037 |
+
selected_region = None
|
| 1038 |
+
if scope == "Selected Region":
|
| 1039 |
+
selected_region = _extract_editor_image(region_value)
|
| 1040 |
+
if selected_region is None:
|
| 1041 |
+
msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
|
| 1042 |
+
return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
|
| 1043 |
+
cleaned, markdown, raw, img_out, crops = process_image(
|
| 1044 |
+
selected_region,
|
| 1045 |
+
task,
|
| 1046 |
+
custom_prompt,
|
| 1047 |
+
enable_equation_zoom=enable_equation_zoom,
|
| 1048 |
+
infer_crop_mode=False,
|
| 1049 |
+
)
|
| 1050 |
+
crops = [selected_region] + (crops or [])
|
| 1051 |
+
elif file_path:
|
| 1052 |
cleaned, markdown, raw, img_out, crops = process_file(
|
| 1053 |
file_path,
|
| 1054 |
task,
|
|
|
|
| 1071 |
|
| 1072 |
submit_event = btn.click(
|
| 1073 |
run,
|
| 1074 |
+
[input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
|
| 1075 |
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
|
| 1076 |
)
|
| 1077 |
submit_event.then(select_boxes, [task], [tabs])
|