ricklon commited on
Commit
930195f
·
1 Parent(s): 3affffd

Label selected regions across boxes and cropped outputs

Browse files
Files changed (1) hide show
  1. app.py +197 -69
app.py CHANGED
@@ -804,7 +804,66 @@ def _to_rgba_image(obj):
804
  return Image.fromarray(arr.astype(np.uint8), mode="RGBA")
805
  return None
806
 
807
- def _extract_selected_region(editor_value, base_size=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  """Extract a clean selected region from ImageEditor data.
809
 
810
  Strategy:
@@ -815,10 +874,11 @@ def _extract_selected_region(editor_value, base_size=None):
815
  return None
816
  if isinstance(editor_value, Image.Image):
817
  if base_size and tuple(editor_value.size) == tuple(base_size):
818
- return None
819
- return editor_value
 
820
  if not isinstance(editor_value, dict):
821
- return None
822
 
823
  background = _to_rgba_image(editor_value.get("background"))
824
  composite = _to_rgba_image(editor_value.get("composite"))
@@ -826,14 +886,16 @@ def _extract_selected_region(editor_value, base_size=None):
826
 
827
  if background is None:
828
  if composite is None:
829
- return None
830
  background = composite
831
 
832
  if not isinstance(layers, list) or not layers:
833
  # No annotation layers; treat as explicit crop only if size changed from base.
834
  if base_size and tuple(background.size) == tuple(base_size):
835
- return None
836
- return background.convert("RGB")
 
 
837
 
838
  alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
839
  for layer in layers:
@@ -848,7 +910,7 @@ def _extract_selected_region(editor_value, base_size=None):
848
 
849
  ys, xs = np.where(alpha_acc > 0)
850
  if xs.size == 0 or ys.size == 0:
851
- return None
852
 
853
  x1, y1 = int(xs.min()), int(ys.min())
854
  x2, y2 = int(xs.max()) + 1, int(ys.max()) + 1
@@ -859,9 +921,46 @@ def _extract_selected_region(editor_value, base_size=None):
859
  x2 = min(background.width, x2 + pad_x)
860
  y2 = min(background.height, y2 + pad_y)
861
  if x2 <= x1 or y2 <= y1:
862
- return None
863
 
864
- return background.crop((x1, y1, x2, y2)).convert("RGB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865
 
866
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
867
  text_display = re.sub(
@@ -877,16 +976,9 @@ def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
877
  dl_tmp.close()
878
 
879
  markdown_html = to_math_html(markdown)
880
- mathjax_html = to_mathjax_html(markdown)
881
- spatial_html = to_spatial_html(raw, markdown)
882
-
883
  return (
884
  text_display,
885
  markdown_html,
886
- mathjax_html,
887
- mathjax_html,
888
- spatial_html,
889
- spatial_html,
890
  raw,
891
  img_out,
892
  gallery_items,
@@ -930,8 +1022,8 @@ def load_image(file_path, page_num=1):
930
  def load_image_with_size(file_path, page_num=1):
931
  img = load_image(file_path, page_num)
932
  if img is None:
933
- return None, None
934
- return img, (int(img.width), int(img.height))
935
 
936
  def update_page_selector(file_path):
937
  if not file_path:
@@ -954,25 +1046,12 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
954
 
955
  region_editor = None
956
  workspace_base_size = gr.State(None)
 
 
957
  with gr.Row():
958
  with gr.Column(scale=1):
959
  file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
960
  page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
961
- task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
962
- input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
963
- equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
964
- gr.Markdown(
965
- """
966
- **Quick use**
967
- 1. Load a page/image into the workspace below.
968
- 2. `Entire Page`: click **Extract**.
969
- 3. `Selected Region`: use the **Crop** tool for a rectangle selection, or draw/highlight freehand; then click **Extract**.
970
- 4. Freehand/highlight uses semi-transparent blue ink so text stays visible.
971
- 5. Check **Cropped Images** to confirm the selected region used for OCR.
972
- """
973
- )
974
- prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
975
- btn = gr.Button("Extract", variant="primary", size="lg")
976
  gr.Markdown("**Image Workspace (full page + region selection)**")
977
  if HAS_IMAGE_EDITOR:
978
  editor_kwargs = {}
@@ -1006,6 +1085,17 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1006
  else:
1007
  gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
1008
  region_editor = gr.State(None)
 
 
 
 
 
 
 
 
 
 
 
1009
 
1010
  with gr.Column(scale=2):
1011
  with gr.Tabs() as tabs:
@@ -1013,12 +1103,6 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1013
  text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
1014
  with gr.Tab("Markdown Preview", id="tab_markdown"):
1015
  md_out = gr.HTML("")
1016
- with gr.Tab("HTML + MathJax", id="tab_html"):
1017
- html_out = gr.HTML("")
1018
- html_source_out = gr.Code(label="Generated HTML Source", language="html", lines=16)
1019
- with gr.Tab("Spatial HTML", id="tab_spatial"):
1020
- spatial_out = gr.HTML("")
1021
- spatial_source_out = gr.Code(label="Spatial HTML Source", language="html", lines=16)
1022
  with gr.Tab("Boxes", id="tab_boxes"):
1023
  img_out = gr.Image(type="pil", height=500, show_label=False)
1024
  with gr.Tab("Cropped Images", id="tab_crops"):
@@ -1028,15 +1112,23 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1028
  download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
1029
 
1030
  with gr.Accordion("Image Examples", open=True):
1031
- gr.Examples(
1032
- examples=[
1033
- ["examples/2022-0922 Section 13 Notes.png", "📋 Markdown", ""],
1034
- ["examples/2022-0922 Section 14 Notes.png", "📋 Markdown", ""],
1035
- ["examples/2022-0922 Section 15 Notes.png", "📋 Markdown", ""],
1036
- ],
1037
- inputs=[file_in, task, prompt],
1038
- cache_examples=False
1039
- )
 
 
 
 
 
 
 
 
1040
 
1041
  with gr.Accordion("PDF Examples", open=True):
1042
  gr.Examples(
@@ -1061,8 +1153,10 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1061
  - Rectangle selection: use the **Crop** tool.
1062
  - Freehand selection: draw/highlight the target; app uses an automatic bounding box around your marks.
1063
  - Freehand/highlight ink is semi-transparent so underlying content stays visible.
 
1064
  Then click **Extract**.
1065
- 4. Review **Cropped Images** to confirm the selected region used for OCR.
 
1066
 
1067
  ### Tasks
1068
  - **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
@@ -1091,24 +1185,58 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1091
  task.change(toggle_prompt, [task], [prompt])
1092
  task.change(select_boxes, [task], [tabs])
1093
  if HAS_IMAGE_EDITOR and region_editor is not None:
1094
- file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size])
1095
- page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size])
 
 
 
 
 
 
 
 
 
 
 
 
1096
 
1097
- def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value, base_size):
1098
- selected_region = None
1099
  if scope == "Selected Region":
1100
- selected_region = _extract_selected_region(region_value, base_size=base_size)
1101
- if selected_region is None:
1102
- msg = "Select Input Scope=Selected Region, then crop or annotate a target area in the Image Workspace first."
1103
- return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
1104
- cleaned, markdown, raw, img_out, crops = process_image(
1105
- selected_region,
1106
- task,
1107
- custom_prompt,
1108
- enable_equation_zoom=enable_equation_zoom,
1109
- infer_crop_mode=False,
1110
- )
1111
- crops = [selected_region] + (crops or [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1112
  elif (full_image := _extract_editor_background(region_value)) is not None:
1113
  cleaned, markdown, raw, img_out, crops = process_image(
1114
  full_image,
@@ -1126,14 +1254,14 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1126
  )
1127
  else:
1128
  msg = "Error: Upload a file or image"
1129
- return (msg, "", "", "", "", "", "", None, [], gr.DownloadButton(visible=False))
1130
 
1131
  return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
1132
 
1133
  submit_event = btn.click(
1134
  run,
1135
- [file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor, workspace_base_size],
1136
- [text_out, md_out, html_out, html_source_out, spatial_out, spatial_source_out, raw_out, img_out, gallery, download_btn]
1137
  )
1138
  submit_event.then(select_boxes, [task], [tabs])
1139
 
 
804
  return Image.fromarray(arr.astype(np.uint8), mode="RGBA")
805
  return None
806
 
807
+ def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
808
+ """Approximate patch location in base image using downscaled SSD search."""
809
+ if base_image is None or patch_image is None:
810
+ return None
811
+ base = np.asarray(base_image.convert("L"), dtype=np.float32)
812
+ patch = np.asarray(patch_image.convert("L"), dtype=np.float32)
813
+ bh, bw = base.shape[:2]
814
+ ph, pw = patch.shape[:2]
815
+ if ph <= 0 or pw <= 0 or ph > bh or pw > bw:
816
+ return None
817
+
818
+ max_dim = max(bh, bw)
819
+ scale = min(1.0, 320.0 / max_dim) if max_dim > 0 else 1.0
820
+ if scale < 1.0:
821
+ new_bw = max(1, int(round(bw * scale)))
822
+ new_bh = max(1, int(round(bh * scale)))
823
+ new_pw = max(1, int(round(pw * scale)))
824
+ new_ph = max(1, int(round(ph * scale)))
825
+ base_small = np.asarray(Image.fromarray(base.astype(np.uint8)).resize((new_bw, new_bh), Image.Resampling.BILINEAR), dtype=np.float32)
826
+ patch_small = np.asarray(Image.fromarray(patch.astype(np.uint8)).resize((new_pw, new_ph), Image.Resampling.BILINEAR), dtype=np.float32)
827
+ else:
828
+ base_small = base
829
+ patch_small = patch
830
+
831
+ sbh, sbw = base_small.shape
832
+ sph, spw = patch_small.shape
833
+ if sph > sbh or spw > sbw:
834
+ return None
835
+
836
+ best_score = float("inf")
837
+ best_x = 0
838
+ best_y = 0
839
+ for y in range(sbh - sph + 1):
840
+ row = base_small[y:y + sph, :]
841
+ windows = np.lib.stride_tricks.sliding_window_view(row, spw, axis=1)
842
+ # windows: (sph, sbw-spw+1, spw)
843
+ diff = windows - patch_small[:, None, :]
844
+ scores = np.mean(diff * diff, axis=(0, 2))
845
+ x = int(np.argmin(scores))
846
+ score = float(scores[x])
847
+ if score < best_score:
848
+ best_score = score
849
+ best_x = x
850
+ best_y = y
851
+
852
+ if scale < 1.0:
853
+ x1 = int(round(best_x / scale))
854
+ y1 = int(round(best_y / scale))
855
+ x2 = int(round((best_x + spw) / scale))
856
+ y2 = int(round((best_y + sph) / scale))
857
+ else:
858
+ x1, y1, x2, y2 = best_x, best_y, best_x + spw, best_y + sph
859
+
860
+ x1 = max(0, min(bw - 1, x1))
861
+ y1 = max(0, min(bh - 1, y1))
862
+ x2 = max(x1 + 1, min(bw, x2))
863
+ y2 = max(y1 + 1, min(bh, y2))
864
+ return (x1, y1, x2, y2)
865
+
866
+ def _extract_selected_region(editor_value, base_size=None, base_image=None):
867
  """Extract a clean selected region from ImageEditor data.
868
 
869
  Strategy:
 
874
  return None
875
  if isinstance(editor_value, Image.Image):
876
  if base_size and tuple(editor_value.size) == tuple(base_size):
877
+ return None, None
878
+ bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
879
+ return editor_value, bbox
880
  if not isinstance(editor_value, dict):
881
+ return None, None
882
 
883
  background = _to_rgba_image(editor_value.get("background"))
884
  composite = _to_rgba_image(editor_value.get("composite"))
 
886
 
887
  if background is None:
888
  if composite is None:
889
+ return None, None
890
  background = composite
891
 
892
  if not isinstance(layers, list) or not layers:
893
  # No annotation layers; treat as explicit crop only if size changed from base.
894
  if base_size and tuple(background.size) == tuple(base_size):
895
+ return None, None
896
+ patch = background.convert("RGB")
897
+ bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
898
+ return patch, bbox
899
 
900
  alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
901
  for layer in layers:
 
910
 
911
  ys, xs = np.where(alpha_acc > 0)
912
  if xs.size == 0 or ys.size == 0:
913
+ return None, None
914
 
915
  x1, y1 = int(xs.min()), int(ys.min())
916
  x2, y2 = int(xs.max()) + 1, int(ys.max()) + 1
 
921
  x2 = min(background.width, x2 + pad_x)
922
  y2 = min(background.height, y2 + pad_y)
923
  if x2 <= x1 or y2 <= y1:
924
+ return None, None
925
 
926
+ return background.crop((x1, y1, x2, y2)).convert("RGB"), (x1, y1, x2, y2)
927
+
928
+ def _draw_selected_region_boxes(image, boxes):
929
+ if image is None or not boxes:
930
+ return None
931
+ refs = []
932
+ w, h = image.size
933
+ for i, b in enumerate(boxes, 1):
934
+ x1, y1, x2, y2 = b
935
+ nx1 = max(0.0, min(999.0, x1 / max(1, w) * 999.0))
936
+ ny1 = max(0.0, min(999.0, y1 / max(1, h) * 999.0))
937
+ nx2 = max(0.0, min(999.0, x2 / max(1, w) * 999.0))
938
+ ny2 = max(0.0, min(999.0, y2 / max(1, h) * 999.0))
939
+ label = f"Region {i}"
940
+ coord_text = repr([[nx1, ny1, nx2, ny2]])
941
+ raw = f'<|ref|>region_{i}<|/ref|><|det|>{coord_text}<|/det|>'
942
+ refs.append((raw, label, coord_text))
943
+ img_out, _ = draw_bounding_boxes(image, refs, extract_images=False)
944
+ return img_out
945
+
946
+ def _region_gallery_items(regions):
947
+ return [(r["image"], f"Region {i}") for i, r in enumerate(regions, 1)]
948
+
949
+ def _reset_selected_regions():
950
+ return [], [], "No saved regions."
951
+
952
+ def add_selected_region(editor_value, base_size, base_image, selected_regions):
953
+ region_img, bbox = _extract_selected_region(editor_value, base_size=base_size, base_image=base_image)
954
+ if region_img is None:
955
+ msg = "No region detected. Use Crop or draw/highlight a region first."
956
+ regions = selected_regions or []
957
+ return regions, _region_gallery_items(regions), msg
958
+ regions = list(selected_regions or [])
959
+ regions.append({"image": region_img, "bbox": bbox})
960
+ return regions, _region_gallery_items(regions), f"{len(regions)} region(s) saved."
961
+
962
+ def clear_selected_regions():
963
+ return _reset_selected_regions()
964
 
965
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
966
  text_display = re.sub(
 
976
  dl_tmp.close()
977
 
978
  markdown_html = to_math_html(markdown)
 
 
 
979
  return (
980
  text_display,
981
  markdown_html,
 
 
 
 
982
  raw,
983
  img_out,
984
  gallery_items,
 
1022
  def load_image_with_size(file_path, page_num=1):
1023
  img = load_image(file_path, page_num)
1024
  if img is None:
1025
+ return None, None, None
1026
+ return img, (int(img.width), int(img.height)), img
1027
 
1028
  def update_page_selector(file_path):
1029
  if not file_path:
 
1046
 
1047
  region_editor = None
1048
  workspace_base_size = gr.State(None)
1049
+ workspace_base_image = gr.State(None)
1050
+ selected_regions_state = gr.State([])
1051
  with gr.Row():
1052
  with gr.Column(scale=1):
1053
  file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
1054
  page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
  gr.Markdown("**Image Workspace (full page + region selection)**")
1056
  if HAS_IMAGE_EDITOR:
1057
  editor_kwargs = {}
 
1085
  else:
1086
  gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
1087
  region_editor = gr.State(None)
1088
+ input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
1089
+ selection_controls = gr.Row()
1090
+ with selection_controls:
1091
+ add_region_btn = gr.Button("Add Region", variant="secondary")
1092
+ clear_regions_btn = gr.Button("Clear Regions")
1093
+ selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False)
1094
+ selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=3, height=170)
1095
+ task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
1096
+ equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
1097
+ prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
1098
+ btn = gr.Button("Extract", variant="primary", size="lg")
1099
 
1100
  with gr.Column(scale=2):
1101
  with gr.Tabs() as tabs:
 
1103
  text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
1104
  with gr.Tab("Markdown Preview", id="tab_markdown"):
1105
  md_out = gr.HTML("")
 
 
 
 
 
 
1106
  with gr.Tab("Boxes", id="tab_boxes"):
1107
  img_out = gr.Image(type="pil", height=500, show_label=False)
1108
  with gr.Tab("Cropped Images", id="tab_crops"):
 
1112
  download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
1113
 
1114
  with gr.Accordion("Image Examples", open=True):
1115
+ image_examples = [
1116
+ ["examples/2022-0922 Section 13 Notes.png", "📋 Markdown", ""],
1117
+ ["examples/2022-0922 Section 14 Notes.png", "📋 Markdown", ""],
1118
+ ["examples/2022-0922 Section 15 Notes.png", "📋 Markdown", ""],
1119
+ ]
1120
+ if HAS_IMAGE_EDITOR and region_editor is not None:
1121
+ gr.Examples(
1122
+ examples=image_examples,
1123
+ inputs=[region_editor, task, prompt],
1124
+ cache_examples=False
1125
+ )
1126
+ else:
1127
+ gr.Examples(
1128
+ examples=image_examples,
1129
+ inputs=[file_in, task, prompt],
1130
+ cache_examples=False
1131
+ )
1132
 
1133
  with gr.Accordion("PDF Examples", open=True):
1134
  gr.Examples(
 
1153
  - Rectangle selection: use the **Crop** tool.
1154
  - Freehand selection: draw/highlight the target; app uses an automatic bounding box around your marks.
1155
  - Freehand/highlight ink is semi-transparent so underlying content stays visible.
1156
+ - Optional multi-select: click **Add Region** after each selection.
1157
  Then click **Extract**.
1158
+ 4. Use **Clear Regions** to reset multi-select state.
1159
+ 5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
1160
 
1161
  ### Tasks
1162
  - **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
 
1185
  task.change(toggle_prompt, [task], [prompt])
1186
  task.change(select_boxes, [task], [tabs])
1187
  if HAS_IMAGE_EDITOR and region_editor is not None:
1188
+ file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1189
+ page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1190
+ file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1191
+ page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1192
+
1193
+ add_region_btn.click(
1194
+ add_selected_region,
1195
+ [region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1196
+ [selected_regions_state, selected_regions_gallery, selection_status],
1197
+ )
1198
+ clear_regions_btn.click(
1199
+ clear_selected_regions,
1200
+ outputs=[selected_regions_state, selected_regions_gallery, selection_status],
1201
+ )
1202
 
1203
+ def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, scope, region_value, base_size, base_image, selected_regions):
 
1204
  if scope == "Selected Region":
1205
+ regions = list(selected_regions or [])
1206
+ if not regions:
1207
+ selected_region, selected_bbox = _extract_selected_region(region_value, base_size=base_size, base_image=base_image)
1208
+ if selected_region is None:
1209
+ msg = "Select Input Scope=Selected Region, then crop or annotate a target area in the Image Workspace first."
1210
+ return (msg, "", "", None, [], gr.DownloadButton(visible=False))
1211
+ regions = [{"image": selected_region, "bbox": selected_bbox}]
1212
+
1213
+ cleaned_parts = []
1214
+ markdown_parts = []
1215
+ raw_parts = []
1216
+ for i, r in enumerate(regions, 1):
1217
+ cleaned_i, markdown_i, raw_i, _, _ = process_image(
1218
+ r["image"],
1219
+ task,
1220
+ custom_prompt,
1221
+ enable_equation_zoom=enable_equation_zoom,
1222
+ infer_crop_mode=False,
1223
+ )
1224
+ if len(regions) > 1:
1225
+ cleaned_parts.append(f"## Region {i}\n\n{cleaned_i}")
1226
+ markdown_parts.append(f"## Region {i}\n\n{markdown_i}")
1227
+ raw_parts.append(f"## Region {i}\n\n{raw_i}")
1228
+ else:
1229
+ cleaned_parts.append(cleaned_i)
1230
+ markdown_parts.append(markdown_i)
1231
+ raw_parts.append(raw_i)
1232
+
1233
+ cleaned = "\n\n".join(cleaned_parts).strip()
1234
+ markdown = "\n\n".join(markdown_parts).strip()
1235
+ raw = "\n\n".join(raw_parts).strip()
1236
+ crops = _region_gallery_items(regions)
1237
+ full_img = base_image if isinstance(base_image, Image.Image) else _extract_editor_background(region_value)
1238
+ region_boxes = [r["bbox"] for r in regions if r.get("bbox") is not None]
1239
+ img_out = _draw_selected_region_boxes(full_img, region_boxes)
1240
  elif (full_image := _extract_editor_background(region_value)) is not None:
1241
  cleaned, markdown, raw, img_out, crops = process_image(
1242
  full_image,
 
1254
  )
1255
  else:
1256
  msg = "Error: Upload a file or image"
1257
+ return (msg, "", "", None, [], gr.DownloadButton(visible=False))
1258
 
1259
  return _compose_ui_outputs(cleaned, markdown, raw, img_out, crops)
1260
 
1261
  submit_event = btn.click(
1262
  run,
1263
+ [file_in, task, prompt, page_selector, equation_zoom, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1264
+ [text_out, md_out, raw_out, img_out, gallery, download_btn]
1265
  )
1266
  submit_event.then(select_boxes, [task], [tabs])
1267