ricklon commited on
Commit
7763137
·
1 Parent(s): d3dfd44

Preserve workspace zoom by delta-based region snapshots

Browse files
Files changed (1) hide show
  1. app.py +101 -57
app.py CHANGED
@@ -1006,6 +1006,55 @@ def _component_boxes(binary_mask, min_pixels=24):
1006
  boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
1007
  return boxes
1008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1009
  def _extract_selected_regions(editor_value, base_size=None, base_image=None):
1010
  if editor_value is None:
1011
  return []
@@ -1017,14 +1066,10 @@ def _extract_selected_regions(editor_value, base_size=None, base_image=None):
1017
  if not isinstance(editor_value, dict):
1018
  return []
1019
 
1020
- background = _to_rgba_image(editor_value.get("background"))
1021
- composite = _to_rgba_image(editor_value.get("composite"))
1022
  layers = editor_value.get("layers") or []
1023
-
1024
  if background is None:
1025
- if composite is None:
1026
- return []
1027
- background = composite
1028
 
1029
  if not isinstance(layers, list) or not layers:
1030
  # No annotation layers; treat as explicit crop only if size changed from base.
@@ -1034,39 +1079,34 @@ def _extract_selected_regions(editor_value, base_size=None, base_image=None):
1034
  bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
1035
  return [(patch, bbox)]
1036
 
1037
- alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
1038
- for layer in layers:
1039
- layer_img = _to_rgba_image(layer)
1040
- if layer_img is None:
1041
- continue
1042
- if layer_img.size != background.size:
1043
- nearest = Image.Resampling.NEAREST if hasattr(Image, "Resampling") else Image.NEAREST
1044
- layer_img = layer_img.resize(background.size, nearest)
1045
- layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
1046
- alpha_acc = np.maximum(alpha_acc, layer_alpha)
1047
-
1048
- components = _component_boxes(alpha_acc > 0, min_pixels=24)
1049
- if not components:
1050
  return []
 
1051
 
1052
- regions = []
1053
- for x1, y1, x2, y2, _ in components:
1054
- pad_x = max(2, int((x2 - x1) * 0.02))
1055
- pad_y = max(2, int((y2 - y1) * 0.02))
1056
- px1 = max(0, x1 - pad_x)
1057
- py1 = max(0, y1 - pad_y)
1058
- px2 = min(background.width, x2 + pad_x)
1059
- py2 = min(background.height, y2 + pad_y)
1060
- if px2 <= px1 or py2 <= py1:
1061
- continue
1062
- crop = background.crop((px1, py1, px2, py2)).convert("RGB")
1063
- regions.append((crop, (px1, py1, px2, py2)))
1064
 
1065
- regions.sort(
1066
- key=lambda item: (item[1][2] - item[1][0]) * (item[1][3] - item[1][1]),
1067
- reverse=True,
1068
- )
1069
- return regions
 
 
 
 
 
 
 
 
 
 
 
1070
 
1071
  def _extract_selected_region(editor_value, base_size=None, base_image=None):
1072
  regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
@@ -1093,14 +1133,6 @@ def _is_duplicate_bbox(candidate_bbox, existing_bbox):
1093
  cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
1094
  return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
1095
 
1096
- def _clear_editor_overlays(editor_value):
1097
- if isinstance(editor_value, dict):
1098
- bg = _to_rgba_image(editor_value.get("background")) or _to_rgba_image(editor_value.get("composite"))
1099
- if isinstance(bg, Image.Image):
1100
- clean_bg = bg.convert("RGB")
1101
- return {"background": clean_bg, "layers": [], "composite": clean_bg}
1102
- return editor_value
1103
-
1104
  def _draw_selected_region_boxes(image, boxes):
1105
  if image is None or not boxes:
1106
  return None
@@ -1137,12 +1169,20 @@ def _label_gallery_items(items, prefix=None):
1137
  def _reset_selected_regions():
1138
  return [], [], "No saved regions."
1139
 
1140
- def add_selected_region(editor_value, base_size, base_image, selected_regions):
1141
- candidates = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
 
 
 
 
 
 
 
 
1142
  regions = list(selected_regions or [])
1143
  if not candidates:
1144
  msg = "No region detected. Use Crop or draw/highlight a region first."
1145
- return regions, _region_gallery_items(regions), msg, editor_value
1146
 
1147
  existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
1148
  added = 0
@@ -1156,17 +1196,18 @@ def add_selected_region(editor_value, base_size, base_image, selected_regions):
1156
 
1157
  if added == 0:
1158
  msg = "No new region added. Draw one region, click Add Region, then draw the next region."
1159
- return regions, _region_gallery_items(regions), msg, editor_value
1160
 
1161
- msg = f"Added {added} region(s). {len(regions)} total."
1162
- return regions, _region_gallery_items(regions), msg, _clear_editor_overlays(editor_value)
1163
 
1164
  def clear_selected_regions():
1165
  return _reset_selected_regions()
1166
 
1167
- def clear_regions_and_editor(editor_value):
1168
  regions, gallery_items, msg = _reset_selected_regions()
1169
- return regions, gallery_items, msg, _clear_editor_overlays(editor_value)
 
1170
 
1171
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
1172
  text_display = re.sub(
@@ -1287,6 +1328,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1287
  workspace_base_size = gr.State(None)
1288
  workspace_base_image = gr.State(None)
1289
  selected_regions_state = gr.State([])
 
1290
 
1291
  with gr.Row():
1292
  with gr.Column(scale=3):
@@ -1409,7 +1451,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1409
  - Optional rectangle selection: use the **Crop** tool.
1410
  - Freehand/highlight ink is semi-transparent so underlying content stays visible.
1411
  - Optional multi-select: click **Add Region** after each selection.
1412
- - After **Add Region**, drawing marks are cleared automatically so the next region stays separate.
1413
  Then click **Extract**.
1414
  4. Use **Clear Regions** to reset multi-select state.
1415
  5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
@@ -1449,16 +1491,18 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1449
  region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
1450
  file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1451
  page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
 
 
1452
 
1453
  add_region_btn.click(
1454
  add_selected_region,
1455
- [region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1456
- [selected_regions_state, selected_regions_gallery, selection_status, region_editor],
1457
  )
1458
  clear_regions_btn.click(
1459
- clear_regions_and_editor,
1460
  inputs=[region_editor],
1461
- outputs=[selected_regions_state, selected_regions_gallery, selection_status, region_editor],
1462
  )
1463
 
1464
  def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):
 
1006
  boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
1007
  return boxes
1008
 
1009
+ def _extract_regions_from_mask(background, mask):
1010
+ components = _component_boxes(mask, min_pixels=24)
1011
+ if not components:
1012
+ return []
1013
+
1014
+ regions = []
1015
+ for x1, y1, x2, y2, _ in components:
1016
+ pad_x = max(2, int((x2 - x1) * 0.02))
1017
+ pad_y = max(2, int((y2 - y1) * 0.02))
1018
+ px1 = max(0, x1 - pad_x)
1019
+ py1 = max(0, y1 - pad_y)
1020
+ px2 = min(background.width, x2 + pad_x)
1021
+ py2 = min(background.height, y2 + pad_y)
1022
+ if px2 <= px1 or py2 <= py1:
1023
+ continue
1024
+ crop = background.crop((px1, py1, px2, py2)).convert("RGB")
1025
+ regions.append((crop, (px1, py1, px2, py2)))
1026
+
1027
+ regions.sort(
1028
+ key=lambda item: (item[1][2] - item[1][0]) * (item[1][3] - item[1][1]),
1029
+ reverse=True,
1030
+ )
1031
+ return regions
1032
+
1033
+ def _editor_background_and_mask(editor_value):
1034
+ if not isinstance(editor_value, dict):
1035
+ return None, None
1036
+ background = _to_rgba_image(editor_value.get("background"))
1037
+ composite = _to_rgba_image(editor_value.get("composite"))
1038
+ layers = editor_value.get("layers") or []
1039
+ if background is None:
1040
+ if composite is None:
1041
+ return None, None
1042
+ background = composite
1043
+ if not isinstance(layers, list) or not layers:
1044
+ return background, None
1045
+
1046
+ alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
1047
+ for layer in layers:
1048
+ layer_img = _to_rgba_image(layer)
1049
+ if layer_img is None:
1050
+ continue
1051
+ if layer_img.size != background.size:
1052
+ nearest = Image.Resampling.NEAREST if hasattr(Image, "Resampling") else Image.NEAREST
1053
+ layer_img = layer_img.resize(background.size, nearest)
1054
+ layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
1055
+ alpha_acc = np.maximum(alpha_acc, layer_alpha)
1056
+ return background, (alpha_acc > 0)
1057
+
1058
  def _extract_selected_regions(editor_value, base_size=None, base_image=None):
1059
  if editor_value is None:
1060
  return []
 
1066
  if not isinstance(editor_value, dict):
1067
  return []
1068
 
1069
+ background, mask = _editor_background_and_mask(editor_value)
 
1070
  layers = editor_value.get("layers") or []
 
1071
  if background is None:
1072
+ return []
 
 
1073
 
1074
  if not isinstance(layers, list) or not layers:
1075
  # No annotation layers; treat as explicit crop only if size changed from base.
 
1079
  bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
1080
  return [(patch, bbox)]
1081
 
1082
+ if mask is None:
 
 
 
 
 
 
 
 
 
 
 
 
1083
  return []
1084
+ return _extract_regions_from_mask(background, mask)
1085
 
1086
+ def _extract_new_drawn_regions(editor_value, base_size=None, base_image=None, consumed_mask=None):
1087
+ # For crop mode / explicit cropped image, fall back to classic extraction.
1088
+ if isinstance(editor_value, Image.Image):
1089
+ regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
1090
+ return regions, consumed_mask
1091
+ if not isinstance(editor_value, dict):
1092
+ return [], consumed_mask
 
 
 
 
 
1093
 
1094
+ background, mask = _editor_background_and_mask(editor_value)
1095
+ layers = editor_value.get("layers") or []
1096
+ if background is None:
1097
+ return [], consumed_mask
1098
+
1099
+ # If there are no drawn layers, treat as explicit crop mode.
1100
+ if not isinstance(layers, list) or not layers or mask is None:
1101
+ regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
1102
+ return regions, consumed_mask
1103
+
1104
+ if consumed_mask is None or not isinstance(consumed_mask, np.ndarray) or consumed_mask.shape != mask.shape:
1105
+ delta_mask = mask
1106
+ else:
1107
+ delta_mask = np.logical_and(mask, np.logical_not(consumed_mask))
1108
+ regions = _extract_regions_from_mask(background, delta_mask)
1109
+ return regions, mask
1110
 
1111
  def _extract_selected_region(editor_value, base_size=None, base_image=None):
1112
  regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
 
1133
  cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
1134
  return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
1135
 
 
 
 
 
 
 
 
 
1136
  def _draw_selected_region_boxes(image, boxes):
1137
  if image is None or not boxes:
1138
  return None
 
1169
  def _reset_selected_regions():
1170
  return [], [], "No saved regions."
1171
 
1172
+ def _reset_drawn_mask():
1173
+ return None
1174
+
1175
+ def add_selected_region(editor_value, base_size, base_image, selected_regions, consumed_mask):
1176
+ candidates, updated_mask = _extract_new_drawn_regions(
1177
+ editor_value,
1178
+ base_size=base_size,
1179
+ base_image=base_image,
1180
+ consumed_mask=consumed_mask,
1181
+ )
1182
  regions = list(selected_regions or [])
1183
  if not candidates:
1184
  msg = "No region detected. Use Crop or draw/highlight a region first."
1185
+ return regions, _region_gallery_items(regions), msg, updated_mask
1186
 
1187
  existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
1188
  added = 0
 
1196
 
1197
  if added == 0:
1198
  msg = "No new region added. Draw one region, click Add Region, then draw the next region."
1199
+ return regions, _region_gallery_items(regions), msg, updated_mask
1200
 
1201
+ msg = f"Added {added} region(s). {len(regions)} total. Zoom/pan is preserved."
1202
+ return regions, _region_gallery_items(regions), msg, updated_mask
1203
 
1204
  def clear_selected_regions():
1205
  return _reset_selected_regions()
1206
 
1207
+ def clear_regions_preserve_view(editor_value):
1208
  regions, gallery_items, msg = _reset_selected_regions()
1209
+ _, mask = _editor_background_and_mask(editor_value)
1210
+ return regions, gallery_items, msg, mask
1211
 
1212
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
1213
  text_display = re.sub(
 
1328
  workspace_base_size = gr.State(None)
1329
  workspace_base_image = gr.State(None)
1330
  selected_regions_state = gr.State([])
1331
+ drawn_mask_state = gr.State(None)
1332
 
1333
  with gr.Row():
1334
  with gr.Column(scale=3):
 
1451
  - Optional rectangle selection: use the **Crop** tool.
1452
  - Freehand/highlight ink is semi-transparent so underlying content stays visible.
1453
  - Optional multi-select: click **Add Region** after each selection.
1454
+ - **Add Region** snapshots only newly drawn pixels so zoom/pan stays in place while you continue selecting.
1455
  Then click **Extract**.
1456
  4. Use **Clear Regions** to reset multi-select state.
1457
  5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
 
1491
  region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
1492
  file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1493
  page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1494
+ file_in.change(_reset_drawn_mask, outputs=[drawn_mask_state])
1495
+ page_selector.change(_reset_drawn_mask, outputs=[drawn_mask_state])
1496
 
1497
  add_region_btn.click(
1498
  add_selected_region,
1499
+ [region_editor, workspace_base_size, workspace_base_image, selected_regions_state, drawn_mask_state],
1500
+ [selected_regions_state, selected_regions_gallery, selection_status, drawn_mask_state],
1501
  )
1502
  clear_regions_btn.click(
1503
+ clear_regions_preserve_view,
1504
  inputs=[region_editor],
1505
+ outputs=[selected_regions_state, selected_regions_gallery, selection_status, drawn_mask_state],
1506
  )
1507
 
1508
  def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):