ricklon commited on
Commit
7b8199f
·
1 Parent(s): 152c5bd

Simplify region workflow: visible selector and single Extract path

Browse files
Files changed (1) hide show
  1. app.py +16 -68
app.py CHANGED
@@ -782,26 +782,6 @@ def _extract_editor_image(editor_value):
782
  return background
783
  return None
784
 
785
- def _dedupe_consecutive_lines(text: str) -> str:
786
- if not text:
787
- return text
788
- out = []
789
- prev = None
790
- blank_count = 0
791
- for line in text.splitlines():
792
- if not line.strip():
793
- blank_count += 1
794
- if blank_count <= 2:
795
- out.append("")
796
- continue
797
- blank_count = 0
798
- norm = re.sub(r'\s+', ' ', line).strip()
799
- if norm and norm == prev:
800
- continue
801
- out.append(line)
802
- prev = norm
803
- return "\n".join(out).strip()
804
-
805
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
806
  text_display = re.sub(
807
  r'\\\[(.+?)\\\]',
@@ -830,30 +810,8 @@ def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
830
  img_out,
831
  gallery_items,
832
  gr.DownloadButton(value=dl_tmp.name, visible=True),
833
- text_display,
834
- markdown_html,
835
  )
836
 
837
- def run_region(editor_value, task, custom_prompt, enable_equation_zoom):
838
- image = _extract_editor_image(editor_value)
839
- if image is None:
840
- msg = "Draw/crop a region first, then click OCR Region."
841
- return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
842
-
843
- cleaned, markdown, raw, img_out, crops = process_image(
844
- image,
845
- task,
846
- custom_prompt,
847
- enable_equation_zoom=enable_equation_zoom,
848
- infer_crop_mode=False,
849
- )
850
-
851
- # Region workflows are single-area; collapse obvious duplicate lines.
852
- cleaned = _dedupe_consecutive_lines(cleaned)
853
- markdown = _dedupe_consecutive_lines(markdown)
854
- gallery_items = [image] + (crops or [])
855
- return _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items)
856
-
857
  def toggle_prompt(task):
858
  if task == "✏️ Custom":
859
  return gr.update(visible=True, label="Custom Prompt", placeholder="Add <|grounding|> for bounding boxes")
@@ -908,7 +866,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
908
  """)
909
 
910
  region_editor = None
911
- region_btn = None
912
  with gr.Row():
913
  with gr.Column(scale=1):
914
  file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
@@ -921,23 +878,22 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
921
  """
922
  **Quick use**
923
  1. `Entire Page`: click **Extract**.
924
- 2. `Selected Region`: open **Region Selector**, draw a box around the target (no painting), crop, then click **Extract**.
925
  3. Check **Cropped Images** to confirm the selected region used for OCR.
926
  """
927
  )
928
  prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
929
  btn = gr.Button("Extract", variant="primary", size="lg")
930
- with gr.Accordion("Region Selector (Draw/Crop)", open=False):
931
- if HAS_IMAGE_EDITOR:
932
- region_editor = gr.ImageEditor(
933
- label="Draw a rectangle around what you want (do not paint/fill), crop, then run Extract with Input Scope=Selected Region.",
934
- type="pil",
935
- height=300,
936
- )
937
- region_btn = gr.Button("OCR Region", variant="secondary")
938
- else:
939
- gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
940
- region_editor = gr.State(None)
941
 
942
  with gr.Column(scale=2):
943
  with gr.Tabs() as tabs:
@@ -957,9 +913,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
957
  gallery = gr.Gallery(show_label=False, columns=3, height=400)
958
  with gr.Tab("Raw Text", id="tab_raw"):
959
  raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
960
- with gr.Tab("Region OCR", id="tab_region"):
961
- region_text_out = gr.Textbox(lines=12, buttons=["copy"], label="Region OCR Text")
962
- region_html_out = gr.HTML("")
963
  download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
964
 
965
  with gr.Accordion("Image Examples", open=True):
@@ -1001,7 +954,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1001
  - **Locate**: Find and highlight where specific text appears (grounding ✅)
1002
  - **Describe**: General image description
1003
  - **Custom**: Your own prompt
1004
- - **Region OCR (new)**: In the left panel, open **Region Selector (Draw/Crop)**, draw/crop a target area, then click **OCR Region** (or set Input Scope to Selected Region and click Extract)
1005
  - **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
1006
  - **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
1007
 
@@ -1023,15 +976,10 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1023
  page_selector.change(load_image, [file_in, page_selector], [input_img])
1024
  task.change(toggle_prompt, [task], [prompt])
1025
  task.change(select_boxes, [task], [tabs])
1026
- if HAS_IMAGE_EDITOR and region_editor is not None and region_btn is not None:
1027
  file_in.change(load_image, [file_in, page_selector], [region_editor])
1028
  page_selector.change(load_image, [file_in, page_selector], [region_editor])
1029
  input_img.change(lambda img: img, [input_img], [region_editor])
1030
- region_btn.click(
1031
- run_region,
1032
- [region_editor, task, prompt, equation_zoom],
1033
- [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
1034
- )
1035
 
1036
  def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
1037
  selected_region = None
@@ -1039,7 +987,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1039
  selected_region = _extract_editor_image(region_value)
1040
  if selected_region is None:
1041
  msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
1042
- return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
1043
  cleaned, markdown, raw, img_out, crops = process_image(
1044
  selected_region,
1045
  task,
@@ -1065,14 +1013,14 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1065
  )
1066
  else:
1067
  msg = "Error: Upload a file or image"
1068
- return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False), msg, "")
1069
 
1070
  return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
1071
 
1072
  submit_event = btn.click(
1073
  run,
1074
  [input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
1075
- [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn, region_text_out, region_html_out]
1076
  )
1077
  submit_event.then(select_boxes, [task], [tabs])
1078
 
 
782
  return background
783
  return None
784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
786
  text_display = re.sub(
787
  r'\\\[(.+?)\\\]',
 
810
  img_out,
811
  gallery_items,
812
  gr.DownloadButton(value=dl_tmp.name, visible=True),
 
 
813
  )
814
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815
  def toggle_prompt(task):
816
  if task == "✏️ Custom":
817
  return gr.update(visible=True, label="Custom Prompt", placeholder="Add <|grounding|> for bounding boxes")
 
866
  """)
867
 
868
  region_editor = None
 
869
  with gr.Row():
870
  with gr.Column(scale=1):
871
  file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
 
878
  """
879
  **Quick use**
880
  1. `Entire Page`: click **Extract**.
881
+ 2. `Selected Region`: use the Region Selector below, draw a box around the target (no painting), crop, then click **Extract**.
882
  3. Check **Cropped Images** to confirm the selected region used for OCR.
883
  """
884
  )
885
  prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
886
  btn = gr.Button("Extract", variant="primary", size="lg")
887
+ gr.Markdown("**Region Selector (for Selected Region scope)**")
888
+ if HAS_IMAGE_EDITOR:
889
+ region_editor = gr.ImageEditor(
890
+ label="Draw a rectangle around what you want (do not paint/fill), crop, then run Extract with Input Scope=Selected Region.",
891
+ type="pil",
892
+ height=300,
893
+ )
894
+ else:
895
+ gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
896
+ region_editor = gr.State(None)
 
897
 
898
  with gr.Column(scale=2):
899
  with gr.Tabs() as tabs:
 
913
  gallery = gr.Gallery(show_label=False, columns=3, height=400)
914
  with gr.Tab("Raw Text", id="tab_raw"):
915
  raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
 
 
 
916
  download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
917
 
918
  with gr.Accordion("Image Examples", open=True):
 
954
  - **Locate**: Find and highlight where specific text appears (grounding ✅)
955
  - **Describe**: General image description
956
  - **Custom**: Your own prompt
957
+ - **Region selection**: Use **Input Scope=Selected Region**, draw/crop in Region Selector, then click **Extract**
958
  - **Input Scope**: `Entire Page` or `Selected Region` (Selected Region uses the Region Selector crop as main input)
959
  - **Equation Zoom (multipass)**: Optional nested equation refinement for Markdown. Off by default for speed/stability.
960
 
 
976
  page_selector.change(load_image, [file_in, page_selector], [input_img])
977
  task.change(toggle_prompt, [task], [prompt])
978
  task.change(select_boxes, [task], [tabs])
979
+ if HAS_IMAGE_EDITOR and region_editor is not None:
980
  file_in.change(load_image, [file_in, page_selector], [region_editor])
981
  page_selector.change(load_image, [file_in, page_selector], [region_editor])
982
  input_img.change(lambda img: img, [input_img], [region_editor])
 
 
 
 
 
983
 
984
  def run(image, file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value):
985
  selected_region = None
 
987
  selected_region = _extract_editor_image(region_value)
988
  if selected_region is None:
989
  msg = "Select Input Scope=Selected Region, then draw/crop in Region Selector first."
990
+ return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
991
  cleaned, markdown, raw, img_out, crops = process_image(
992
  selected_region,
993
  task,
 
1013
  )
1014
  else:
1015
  msg = "Error: Upload a file or image"
1016
+ return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
1017
 
1018
  return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
1019
 
1020
  submit_event = btn.click(
1021
  run,
1022
  [input_img, file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor],
1023
+ [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn]
1024
  )
1025
  submit_event.then(select_boxes, [task], [tabs])
1026