ricklon commited on
Commit
d3dfd44
·
1 Parent(s): fce2f1f

Keep region selections separate and clear overlays after add

Browse files
Files changed (1) hide show
  1. app.py +128 -36
app.py CHANGED
@@ -15,6 +15,7 @@ import base64
15
  import html as html_lib
16
  import markdown as md_lib
17
  import latex2mathml.converter
 
18
 
19
  from io import StringIO, BytesIO
20
 
@@ -967,22 +968,54 @@ def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
967
  y2 = max(y1 + 1, min(bh, y2))
968
  return (x1, y1, x2, y2)
969
 
970
- def _extract_selected_region(editor_value, base_size=None, base_image=None):
971
- """Extract a clean selected region from ImageEditor data.
 
 
 
972
 
973
- Strategy:
974
- 1) Prefer explicit crop (editor background/composite already reduced in size).
975
- 2) Otherwise, infer region from drawn layers' alpha mask bbox and crop background.
976
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
977
  if editor_value is None:
978
- return None
979
  if isinstance(editor_value, Image.Image):
980
  if base_size and tuple(editor_value.size) == tuple(base_size):
981
- return None, None
982
  bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
983
- return editor_value, bbox
984
  if not isinstance(editor_value, dict):
985
- return None, None
986
 
987
  background = _to_rgba_image(editor_value.get("background"))
988
  composite = _to_rgba_image(editor_value.get("composite"))
@@ -990,16 +1023,16 @@ def _extract_selected_region(editor_value, base_size=None, base_image=None):
990
 
991
  if background is None:
992
  if composite is None:
993
- return None, None
994
  background = composite
995
 
996
  if not isinstance(layers, list) or not layers:
997
  # No annotation layers; treat as explicit crop only if size changed from base.
998
  if base_size and tuple(background.size) == tuple(base_size):
999
- return None, None
1000
  patch = background.convert("RGB")
1001
  bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
1002
- return patch, bbox
1003
 
1004
  alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
1005
  for layer in layers:
@@ -1012,22 +1045,61 @@ def _extract_selected_region(editor_value, base_size=None, base_image=None):
1012
  layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
1013
  alpha_acc = np.maximum(alpha_acc, layer_alpha)
1014
 
1015
- ys, xs = np.where(alpha_acc > 0)
1016
- if xs.size == 0 or ys.size == 0:
1017
- return None, None
1018
 
1019
- x1, y1 = int(xs.min()), int(ys.min())
1020
- x2, y2 = int(xs.max()) + 1, int(ys.max()) + 1
1021
- pad_x = max(2, int((x2 - x1) * 0.02))
1022
- pad_y = max(2, int((y2 - y1) * 0.02))
1023
- x1 = max(0, x1 - pad_x)
1024
- y1 = max(0, y1 - pad_y)
1025
- x2 = min(background.width, x2 + pad_x)
1026
- y2 = min(background.height, y2 + pad_y)
1027
- if x2 <= x1 or y2 <= y1:
 
 
 
 
 
 
 
 
 
 
 
 
 
1028
  return None, None
 
1029
 
1030
- return background.crop((x1, y1, x2, y2)).convert("RGB"), (x1, y1, x2, y2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1031
 
1032
  def _draw_selected_region_boxes(image, boxes):
1033
  if image is None or not boxes:
@@ -1066,18 +1138,36 @@ def _reset_selected_regions():
1066
  return [], [], "No saved regions."
1067
 
1068
  def add_selected_region(editor_value, base_size, base_image, selected_regions):
1069
- region_img, bbox = _extract_selected_region(editor_value, base_size=base_size, base_image=base_image)
1070
- if region_img is None:
1071
- msg = "No region detected. Use Crop or draw/highlight a region first."
1072
- regions = selected_regions or []
1073
- return regions, _region_gallery_items(regions), msg
1074
  regions = list(selected_regions or [])
1075
- regions.append({"image": region_img, "bbox": bbox})
1076
- return regions, _region_gallery_items(regions), f"{len(regions)} region(s) saved."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1077
 
1078
  def clear_selected_regions():
1079
  return _reset_selected_regions()
1080
 
 
 
 
 
1081
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
1082
  text_display = re.sub(
1083
  r'\\\[(.+?)\\\]',
@@ -1319,6 +1409,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1319
  - Optional rectangle selection: use the **Crop** tool.
1320
  - Freehand/highlight ink is semi-transparent so underlying content stays visible.
1321
  - Optional multi-select: click **Add Region** after each selection.
 
1322
  Then click **Extract**.
1323
  4. Use **Clear Regions** to reset multi-select state.
1324
  5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
@@ -1362,11 +1453,12 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1362
  add_region_btn.click(
1363
  add_selected_region,
1364
  [region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1365
- [selected_regions_state, selected_regions_gallery, selection_status],
1366
  )
1367
  clear_regions_btn.click(
1368
- clear_selected_regions,
1369
- outputs=[selected_regions_state, selected_regions_gallery, selection_status],
 
1370
  )
1371
 
1372
  def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):
 
15
  import html as html_lib
16
  import markdown as md_lib
17
  import latex2mathml.converter
18
+ from collections import deque
19
 
20
  from io import StringIO, BytesIO
21
 
 
968
  y2 = max(y1 + 1, min(bh, y2))
969
  return (x1, y1, x2, y2)
970
 
971
+ def _component_boxes(binary_mask, min_pixels=24):
972
+ h, w = binary_mask.shape
973
+ visited = np.zeros((h, w), dtype=bool)
974
+ boxes = []
975
+ neighbors = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)]
976
 
977
+ ys, xs = np.where(binary_mask)
978
+ for sy, sx in zip(ys.tolist(), xs.tolist()):
979
+ if visited[sy, sx]:
980
+ continue
981
+ q = deque([(sy, sx)])
982
+ visited[sy, sx] = True
983
+ min_x = max_x = sx
984
+ min_y = max_y = sy
985
+ count = 0
986
+ while q:
987
+ y, x = q.popleft()
988
+ count += 1
989
+ if x < min_x:
990
+ min_x = x
991
+ if x > max_x:
992
+ max_x = x
993
+ if y < min_y:
994
+ min_y = y
995
+ if y > max_y:
996
+ max_y = y
997
+ for dy, dx in neighbors:
998
+ ny, nx = y + dy, x + dx
999
+ if ny < 0 or ny >= h or nx < 0 or nx >= w:
1000
+ continue
1001
+ if visited[ny, nx] or not binary_mask[ny, nx]:
1002
+ continue
1003
+ visited[ny, nx] = True
1004
+ q.append((ny, nx))
1005
+ if count >= min_pixels:
1006
+ boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
1007
+ return boxes
1008
+
1009
+ def _extract_selected_regions(editor_value, base_size=None, base_image=None):
1010
  if editor_value is None:
1011
+ return []
1012
  if isinstance(editor_value, Image.Image):
1013
  if base_size and tuple(editor_value.size) == tuple(base_size):
1014
+ return []
1015
  bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
1016
+ return [(editor_value, bbox)]
1017
  if not isinstance(editor_value, dict):
1018
+ return []
1019
 
1020
  background = _to_rgba_image(editor_value.get("background"))
1021
  composite = _to_rgba_image(editor_value.get("composite"))
 
1023
 
1024
  if background is None:
1025
  if composite is None:
1026
+ return []
1027
  background = composite
1028
 
1029
  if not isinstance(layers, list) or not layers:
1030
  # No annotation layers; treat as explicit crop only if size changed from base.
1031
  if base_size and tuple(background.size) == tuple(base_size):
1032
+ return []
1033
  patch = background.convert("RGB")
1034
  bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
1035
+ return [(patch, bbox)]
1036
 
1037
  alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
1038
  for layer in layers:
 
1045
  layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
1046
  alpha_acc = np.maximum(alpha_acc, layer_alpha)
1047
 
1048
+ components = _component_boxes(alpha_acc > 0, min_pixels=24)
1049
+ if not components:
1050
+ return []
1051
 
1052
+ regions = []
1053
+ for x1, y1, x2, y2, _ in components:
1054
+ pad_x = max(2, int((x2 - x1) * 0.02))
1055
+ pad_y = max(2, int((y2 - y1) * 0.02))
1056
+ px1 = max(0, x1 - pad_x)
1057
+ py1 = max(0, y1 - pad_y)
1058
+ px2 = min(background.width, x2 + pad_x)
1059
+ py2 = min(background.height, y2 + pad_y)
1060
+ if px2 <= px1 or py2 <= py1:
1061
+ continue
1062
+ crop = background.crop((px1, py1, px2, py2)).convert("RGB")
1063
+ regions.append((crop, (px1, py1, px2, py2)))
1064
+
1065
+ regions.sort(
1066
+ key=lambda item: (item[1][2] - item[1][0]) * (item[1][3] - item[1][1]),
1067
+ reverse=True,
1068
+ )
1069
+ return regions
1070
+
1071
+ def _extract_selected_region(editor_value, base_size=None, base_image=None):
1072
+ regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
1073
+ if not regions:
1074
  return None, None
1075
+ return regions[0]
1076
 
1077
+ def _bbox_overlap_ratio(a, b):
1078
+ ax1, ay1, ax2, ay2 = a
1079
+ bx1, by1, bx2, by2 = b
1080
+ ix1 = max(ax1, bx1)
1081
+ iy1 = max(ay1, by1)
1082
+ ix2 = min(ax2, bx2)
1083
+ iy2 = min(ay2, by2)
1084
+ if ix2 <= ix1 or iy2 <= iy1:
1085
+ return 0.0, 0.0
1086
+ inter = float((ix2 - ix1) * (iy2 - iy1))
1087
+ area_a = float(max(1, (ax2 - ax1) * (ay2 - ay1)))
1088
+ area_b = float(max(1, (bx2 - bx1) * (by2 - by1)))
1089
+ return inter / area_a, inter / area_b
1090
+
1091
+ def _is_duplicate_bbox(candidate_bbox, existing_bbox):
1092
+ iou = _box_iou(candidate_bbox, existing_bbox)
1093
+ cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
1094
+ return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
1095
+
1096
+ def _clear_editor_overlays(editor_value):
1097
+ if isinstance(editor_value, dict):
1098
+ bg = _to_rgba_image(editor_value.get("background")) or _to_rgba_image(editor_value.get("composite"))
1099
+ if isinstance(bg, Image.Image):
1100
+ clean_bg = bg.convert("RGB")
1101
+ return {"background": clean_bg, "layers": [], "composite": clean_bg}
1102
+ return editor_value
1103
 
1104
  def _draw_selected_region_boxes(image, boxes):
1105
  if image is None or not boxes:
 
1138
  return [], [], "No saved regions."
1139
 
1140
  def add_selected_region(editor_value, base_size, base_image, selected_regions):
1141
+ candidates = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
 
 
 
 
1142
  regions = list(selected_regions or [])
1143
+ if not candidates:
1144
+ msg = "No region detected. Use Crop or draw/highlight a region first."
1145
+ return regions, _region_gallery_items(regions), msg, editor_value
1146
+
1147
+ existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
1148
+ added = 0
1149
+ for region_img, bbox in candidates:
1150
+ if bbox is not None and any(_is_duplicate_bbox(bbox, eb) for eb in existing_boxes):
1151
+ continue
1152
+ regions.append({"image": region_img, "bbox": bbox})
1153
+ if bbox is not None:
1154
+ existing_boxes.append(bbox)
1155
+ added += 1
1156
+
1157
+ if added == 0:
1158
+ msg = "No new region added. Draw one region, click Add Region, then draw the next region."
1159
+ return regions, _region_gallery_items(regions), msg, editor_value
1160
+
1161
+ msg = f"Added {added} region(s). {len(regions)} total."
1162
+ return regions, _region_gallery_items(regions), msg, _clear_editor_overlays(editor_value)
1163
 
1164
  def clear_selected_regions():
1165
  return _reset_selected_regions()
1166
 
1167
+ def clear_regions_and_editor(editor_value):
1168
+ regions, gallery_items, msg = _reset_selected_regions()
1169
+ return regions, gallery_items, msg, _clear_editor_overlays(editor_value)
1170
+
1171
  def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
1172
  text_display = re.sub(
1173
  r'\\\[(.+?)\\\]',
 
1409
  - Optional rectangle selection: use the **Crop** tool.
1410
  - Freehand/highlight ink is semi-transparent so underlying content stays visible.
1411
  - Optional multi-select: click **Add Region** after each selection.
1412
+ - After **Add Region**, drawing marks are cleared automatically so the next region stays separate.
1413
  Then click **Extract**.
1414
  4. Use **Clear Regions** to reset multi-select state.
1415
  5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
 
1453
  add_region_btn.click(
1454
  add_selected_region,
1455
  [region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1456
+ [selected_regions_state, selected_regions_gallery, selection_status, region_editor],
1457
  )
1458
  clear_regions_btn.click(
1459
+ clear_regions_and_editor,
1460
+ inputs=[region_editor],
1461
+ outputs=[selected_regions_state, selected_regions_gallery, selection_status, region_editor],
1462
  )
1463
 
1464
  def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):