Spaces:
Running on Zero
Running on Zero
Simplify region workflow: visible selector and single Extract path
Browse files
app.py
CHANGED
|
@@ -782,26 +782,6 @@ def _extract_editor_image(editor_value):
|
|
| 782 |
return background
|
| 783 |
return None
|
| 784 |
|
| 785 |
-
def _dedupe_consecutive_lines(text: str) -> str:
|
| 786 |
-
if not text:
|
| 787 |
-
return text
|
| 788 |
-
out = []
|
| 789 |
-
prev = None
|
| 790 |
-
blank_count = 0
|
| 791 |
-
for line in text.splitlines():
|
| 792 |
-
if not line.strip():
|
| 793 |
-
blank_count += 1
|
| 794 |
-
if blank_count <= 2:
|
| 795 |
-
out.append("")
|
| 796 |
-
continue
|
| 797 |
-
blank_count = 0
|
| 798 |
-
norm = re.sub(r'\s+', ' ', line).strip()
|
| 799 |
-
if norm and norm == prev:
|
| 800 |
-
continue
|
| 801 |
-
out.append(line)
|
| 802 |
-
prev = norm
|
| 803 |
-
return "\n".join(out).strip()
|
| 804 |
-
|
| 805 |
def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
| 806 |
text_display = re.sub(
|
| 807 |
r'\\\[(.+?)\\\]',
|
|
@@ -830,30 +810,8 @@ def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
|
| 830 |
img_out,
|
| 831 |
gallery_items,
|
| 832 |
gr.DownloadButton(value=dl_tmp.name, visible=True),
|
| 833 |
-
text_display,
|
| 834 |
-
markdown_html,
|
| 835 |
)
|
| 836 |
|
| 837 |
-
def run_region(editor_value, task, custom_prompt, enable_equation_zoom):
|
| 838 |
-
image = _extract_editor_image(editor_value)
|
| 839 |
-
if image is None:
|
| 840 |
-
msg = "Draw/crop a region first, then click OCR Region."
|
| 841 |
-
return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
|
| 842 |
-
|
| 843 |
-
cleaned, markdown, raw, img_out, crops = process_image(
|
| 844 |
-
image,
|
| 845 |
-
task,
|
| 846 |
-
custom_prompt,
|
| 847 |
-
enable_equation_zoom=enable_equation_zoom,
|
| 848 |
-
infer_crop_mode=False,
|
| 849 |
-
)
|
| 850 |
-
|
| 851 |
-
# Region workflows are single-area; collapse obvious duplicate lines.
|
| 852 |
-
cleaned = _dedupe_consecutive_lines(cleaned)
|
| 853 |
-
markdown = _dedupe_consecutive_lines(markdown)
|
| 854 |
-
gallery_items = [image] + (crops or [])
|
| 855 |
-
return _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items)
|
| 856 |
-
|
| 857 |
def toggle_prompt(task):
|
| 858 |
if task == "✏️ Custom":
|
| 859 |
return gr.update(visible=True, label="Custom Prompt", placeholder="Add <|grounding|> for bounding boxes")
|
|
@@ -908,7 +866,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 908 |
""")
|
| 909 |
|
| 910 |
region_editor = None
|
| 911 |
-
region_btn = None
|
| 912 |
with gr.Row():
|
| 913 |
with gr.Column(scale=1):
|
| 914 |
file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
|
|
@@ -921,23 +878,22 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 921 |
"""
|
| 922 |
**Quick use**
|
| 923 |
1. `Entire Page`: click **Extract**.
|
| 924 |
-
2. `Selected Region`:
|
| 925 |
3. Check **Cropped Images** to confirm the selected region used for OCR.
|
| 926 |
"""
|
| 927 |
)
|
| 928 |
prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
|
| 929 |
btn = gr.Button("Extract", variant="primary", size="lg")
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
region_editor = gr.State(None)
|
| 941 |
|
| 942 |
with gr.Column(scale=2):
|
| 943 |
with gr.Tabs() as tabs:
|
|
@@ -957,9 +913,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 957 |
gallery = gr.Gallery(show_label=False, columns=3, height=400)
|
| 958 |
with gr.Tab("Raw Text", id="tab_raw"):
|
| 959 |
raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
|
| 960 |
-
with gr.Tab("Region OCR", id="tab_region"):
|
| 961 |
-
region_text_out = gr.Textbox(lines=12, buttons=["copy"], label="Region OCR Text")
|
| 962 |
-
region_html_out = gr.HTML("")
|
| 963 |
download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
|
| 964 |
|
| 965 |
with gr.Accordion("Image Examples", open=True):
|
|
@@ -1001,7 +954,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1001 |
- **Locate**: Find and highlight where specific text appears (grounding ✅)
|
| 1002 |
- **Describe**: General image description
|
| 1003 |
- **Custom**: Your own prompt
|
| 1004 |
-
- **Region
|
| 1005 |
- **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
|
| 1006 |
- **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
|
| 1007 |
|
|
@@ -1023,15 +976,10 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1023 |
page_selector.change(load_image, [file_in, page_selector], [input_img])
|
| 1024 |
task.change(toggle_prompt, [task], [prompt])
|
| 1025 |
task.change(select_boxes, [task], [tabs])
|
| 1026 |
-
if HAS_IMAGE_EDITOR and region_editor is not None
|
| 1027 |
file_in.change(load_image, [file_in, page_selector], [region_editor])
|
| 1028 |
page_selector.change(load_image, [file_in, page_selector], [region_editor])
|
| 1029 |
input_img.change(lambda img: img, [input_img], [region_editor])
|
| 1030 |
-
region_btn.click(
|
| 1031 |
-
run_region,
|
| 1032 |
-
[region_editor, task, prompt, equation_zoom],
|
| 1033 |
-
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
|
| 1034 |
-
)
|
| 1035 |
|
| 1036 |
def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
|
| 1037 |
selected_region = None
|
|
@@ -1039,7 +987,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1039 |
selected_region = _extract_editor_image(region_value)
|
| 1040 |
if selected_region is None:
|
| 1041 |
msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
|
| 1042 |
-
return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False)
|
| 1043 |
cleaned, markdown, raw, img_out, crops = process_image(
|
| 1044 |
selected_region,
|
| 1045 |
task,
|
|
@@ -1065,14 +1013,14 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1065 |
)
|
| 1066 |
else:
|
| 1067 |
msg = "Error: Upload a file or image"
|
| 1068 |
-
return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False)
|
| 1069 |
|
| 1070 |
return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
|
| 1071 |
|
| 1072 |
submit_event = btn.click(
|
| 1073 |
run,
|
| 1074 |
[input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
|
| 1075 |
-
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn
|
| 1076 |
)
|
| 1077 |
submit_event.then(select_boxes, [task], [tabs])
|
| 1078 |
|
|
|
|
| 782 |
return background
|
| 783 |
return None
|
| 784 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
| 786 |
text_display = re.sub(
|
| 787 |
r'\\\[(.+?)\\\]',
|
|
|
|
| 810 |
img_out,
|
| 811 |
gallery_items,
|
| 812 |
gr.DownloadButton(value=dl_tmp.name, visible=True),
|
|
|
|
|
|
|
| 813 |
)
|
| 814 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
def toggle_prompt(task):
|
| 816 |
if task == "✏️ Custom":
|
| 817 |
return gr.update(visible=True, label="Custom Prompt", placeholder="Add <|grounding|> for bounding boxes")
|
|
|
|
| 866 |
""")
|
| 867 |
|
| 868 |
region_editor = None
|
|
|
|
| 869 |
with gr.Row():
|
| 870 |
with gr.Column(scale=1):
|
| 871 |
file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
|
|
|
|
| 878 |
"""
|
| 879 |
**Quick use**
|
| 880 |
1. `Entire Page`: click **Extract**.
|
| 881 |
+
2. `Selected Region`: use the Region Selector below, draw a box around the target (no painting), crop, then click **Extract**.
|
| 882 |
3. Check **Cropped Images** to confirm the selected region used for OCR.
|
| 883 |
"""
|
| 884 |
)
|
| 885 |
prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
|
| 886 |
btn = gr.Button("Extract", variant="primary", size="lg")
|
| 887 |
+
gr.Markdown("**Region Selector (for Selected Region scope)**")
|
| 888 |
+
if HAS_IMAGE_EDITOR:
|
| 889 |
+
region_editor = gr.ImageEditor(
|
| 890 |
+
label="Draw a rectangle around what you want (do not paint/fill), crop, then run Extract with Input Scope=Selected Region.",
|
| 891 |
+
type="pil",
|
| 892 |
+
height=300,
|
| 893 |
+
)
|
| 894 |
+
else:
|
| 895 |
+
gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
|
| 896 |
+
region_editor = gr.State(None)
|
|
|
|
| 897 |
|
| 898 |
with gr.Column(scale=2):
|
| 899 |
with gr.Tabs() as tabs:
|
|
|
|
| 913 |
gallery = gr.Gallery(show_label=False, columns=3, height=400)
|
| 914 |
with gr.Tab("Raw Text", id="tab_raw"):
|
| 915 |
raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
|
|
|
|
|
|
|
|
|
|
| 916 |
download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
|
| 917 |
|
| 918 |
with gr.Accordion("Image Examples", open=True):
|
|
|
|
| 954 |
- **Locate**: Find and highlight where specific text appears (grounding ✅)
|
| 955 |
- **Describe**: General image description
|
| 956 |
- **Custom**: Your own prompt
|
| 957 |
+
- **Region selection**: Use **Input Scope=Selected Region**, draw/crop in Region Selector, then click **Extract**
|
| 958 |
- **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
|
| 959 |
- **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
|
| 960 |
|
|
|
|
| 976 |
page_selector.change(load_image, [file_in, page_selector], [input_img])
|
| 977 |
task.change(toggle_prompt, [task], [prompt])
|
| 978 |
task.change(select_boxes, [task], [tabs])
|
| 979 |
+
if HAS_IMAGE_EDITOR and region_editor is not None:
|
| 980 |
file_in.change(load_image, [file_in, page_selector], [region_editor])
|
| 981 |
page_selector.change(load_image, [file_in, page_selector], [region_editor])
|
| 982 |
input_img.change(lambda img: img, [input_img], [region_editor])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 983 |
|
| 984 |
def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
|
| 985 |
selected_region = None
|
|
|
|
| 987 |
selected_region = _extract_editor_image(region_value)
|
| 988 |
if selected_region is None:
|
| 989 |
msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
|
| 990 |
+
return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
|
| 991 |
cleaned, markdown, raw, img_out, crops = process_image(
|
| 992 |
selected_region,
|
| 993 |
task,
|
|
|
|
| 1013 |
)
|
| 1014 |
else:
|
| 1015 |
msg = "Error: Upload a file or image"
|
| 1016 |
+
return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
|
| 1017 |
|
| 1018 |
return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
|
| 1019 |
|
| 1020 |
submit_event = btn.click(
|
| 1021 |
run,
|
| 1022 |
[input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
|
| 1023 |
+
[text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn]
|
| 1024 |
)
|
| 1025 |
submit_event.then(select_boxes, [task], [tabs])
|
| 1026 |
|