Spaces:
Running on Zero
Running on Zero
Keep region selections separate and clear overlays after add
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ import base64
|
|
| 15 |
import html as html_lib
|
| 16 |
import markdown as md_lib
|
| 17 |
import latex2mathml.converter
|
|
|
|
| 18 |
|
| 19 |
from io import StringIO, BytesIO
|
| 20 |
|
|
@@ -967,22 +968,54 @@ def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
|
|
| 967 |
y2 = max(y1 + 1, min(bh, y2))
|
| 968 |
return (x1, y1, x2, y2)
|
| 969 |
|
| 970 |
-
def
|
| 971 |
-
|
|
|
|
|
|
|
|
|
|
| 972 |
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 977 |
if editor_value is None:
|
| 978 |
-
return
|
| 979 |
if isinstance(editor_value, Image.Image):
|
| 980 |
if base_size and tuple(editor_value.size) == tuple(base_size):
|
| 981 |
-
return
|
| 982 |
bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
|
| 983 |
-
return editor_value, bbox
|
| 984 |
if not isinstance(editor_value, dict):
|
| 985 |
-
return
|
| 986 |
|
| 987 |
background = _to_rgba_image(editor_value.get("background"))
|
| 988 |
composite = _to_rgba_image(editor_value.get("composite"))
|
|
@@ -990,16 +1023,16 @@ def _extract_selected_region(editor_value, base_size=None, base_image=None):
|
|
| 990 |
|
| 991 |
if background is None:
|
| 992 |
if composite is None:
|
| 993 |
-
return
|
| 994 |
background = composite
|
| 995 |
|
| 996 |
if not isinstance(layers, list) or not layers:
|
| 997 |
# No annotation layers; treat as explicit crop only if size changed from base.
|
| 998 |
if base_size and tuple(background.size) == tuple(base_size):
|
| 999 |
-
return
|
| 1000 |
patch = background.convert("RGB")
|
| 1001 |
bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
|
| 1002 |
-
return patch, bbox
|
| 1003 |
|
| 1004 |
alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
|
| 1005 |
for layer in layers:
|
|
@@ -1012,22 +1045,61 @@ def _extract_selected_region(editor_value, base_size=None, base_image=None):
|
|
| 1012 |
layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
|
| 1013 |
alpha_acc = np.maximum(alpha_acc, layer_alpha)
|
| 1014 |
|
| 1015 |
-
|
| 1016 |
-
if
|
| 1017 |
-
return
|
| 1018 |
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
return None, None
|
|
|
|
| 1029 |
|
| 1030 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1031 |
|
| 1032 |
def _draw_selected_region_boxes(image, boxes):
|
| 1033 |
if image is None or not boxes:
|
|
@@ -1066,18 +1138,36 @@ def _reset_selected_regions():
|
|
| 1066 |
return [], [], "No saved regions."
|
| 1067 |
|
| 1068 |
def add_selected_region(editor_value, base_size, base_image, selected_regions):
|
| 1069 |
-
|
| 1070 |
-
if region_img is None:
|
| 1071 |
-
msg = "No region detected. Use Crop or draw/highlight a region first."
|
| 1072 |
-
regions = selected_regions or []
|
| 1073 |
-
return regions, _region_gallery_items(regions), msg
|
| 1074 |
regions = list(selected_regions or [])
|
| 1075 |
-
|
| 1076 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1077 |
|
| 1078 |
def clear_selected_regions():
|
| 1079 |
return _reset_selected_regions()
|
| 1080 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1081 |
def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
| 1082 |
text_display = re.sub(
|
| 1083 |
r'\\\[(.+?)\\\]',
|
|
@@ -1319,6 +1409,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1319 |
- Optional rectangle selection: use the **Crop** tool.
|
| 1320 |
- Freehand/highlight ink is semi-transparent so underlying content stays visible.
|
| 1321 |
- Optional multi-select: click **Add Region** after each selection.
|
|
|
|
| 1322 |
Then click **Extract**.
|
| 1323 |
4. Use **Clear Regions** to reset multi-select state.
|
| 1324 |
5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
|
|
@@ -1362,11 +1453,12 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1362 |
add_region_btn.click(
|
| 1363 |
add_selected_region,
|
| 1364 |
[region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
|
| 1365 |
-
[selected_regions_state, selected_regions_gallery, selection_status],
|
| 1366 |
)
|
| 1367 |
clear_regions_btn.click(
|
| 1368 |
-
|
| 1369 |
-
|
|
|
|
| 1370 |
)
|
| 1371 |
|
| 1372 |
def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):
|
|
|
|
| 15 |
import html as html_lib
|
| 16 |
import markdown as md_lib
|
| 17 |
import latex2mathml.converter
|
| 18 |
+
from collections import deque
|
| 19 |
|
| 20 |
from io import StringIO, BytesIO
|
| 21 |
|
|
|
|
| 968 |
y2 = max(y1 + 1, min(bh, y2))
|
| 969 |
return (x1, y1, x2, y2)
|
| 970 |
|
| 971 |
+
def _component_boxes(binary_mask, min_pixels=24):
|
| 972 |
+
h, w = binary_mask.shape
|
| 973 |
+
visited = np.zeros((h, w), dtype=bool)
|
| 974 |
+
boxes = []
|
| 975 |
+
neighbors = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)]
|
| 976 |
|
| 977 |
+
ys, xs = np.where(binary_mask)
|
| 978 |
+
for sy, sx in zip(ys.tolist(), xs.tolist()):
|
| 979 |
+
if visited[sy, sx]:
|
| 980 |
+
continue
|
| 981 |
+
q = deque([(sy, sx)])
|
| 982 |
+
visited[sy, sx] = True
|
| 983 |
+
min_x = max_x = sx
|
| 984 |
+
min_y = max_y = sy
|
| 985 |
+
count = 0
|
| 986 |
+
while q:
|
| 987 |
+
y, x = q.popleft()
|
| 988 |
+
count += 1
|
| 989 |
+
if x < min_x:
|
| 990 |
+
min_x = x
|
| 991 |
+
if x > max_x:
|
| 992 |
+
max_x = x
|
| 993 |
+
if y < min_y:
|
| 994 |
+
min_y = y
|
| 995 |
+
if y > max_y:
|
| 996 |
+
max_y = y
|
| 997 |
+
for dy, dx in neighbors:
|
| 998 |
+
ny, nx = y + dy, x + dx
|
| 999 |
+
if ny < 0 or ny >= h or nx < 0 or nx >= w:
|
| 1000 |
+
continue
|
| 1001 |
+
if visited[ny, nx] or not binary_mask[ny, nx]:
|
| 1002 |
+
continue
|
| 1003 |
+
visited[ny, nx] = True
|
| 1004 |
+
q.append((ny, nx))
|
| 1005 |
+
if count >= min_pixels:
|
| 1006 |
+
boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
|
| 1007 |
+
return boxes
|
| 1008 |
+
|
| 1009 |
+
def _extract_selected_regions(editor_value, base_size=None, base_image=None):
|
| 1010 |
if editor_value is None:
|
| 1011 |
+
return []
|
| 1012 |
if isinstance(editor_value, Image.Image):
|
| 1013 |
if base_size and tuple(editor_value.size) == tuple(base_size):
|
| 1014 |
+
return []
|
| 1015 |
bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
|
| 1016 |
+
return [(editor_value, bbox)]
|
| 1017 |
if not isinstance(editor_value, dict):
|
| 1018 |
+
return []
|
| 1019 |
|
| 1020 |
background = _to_rgba_image(editor_value.get("background"))
|
| 1021 |
composite = _to_rgba_image(editor_value.get("composite"))
|
|
|
|
| 1023 |
|
| 1024 |
if background is None:
|
| 1025 |
if composite is None:
|
| 1026 |
+
return []
|
| 1027 |
background = composite
|
| 1028 |
|
| 1029 |
if not isinstance(layers, list) or not layers:
|
| 1030 |
# No annotation layers; treat as explicit crop only if size changed from base.
|
| 1031 |
if base_size and tuple(background.size) == tuple(base_size):
|
| 1032 |
+
return []
|
| 1033 |
patch = background.convert("RGB")
|
| 1034 |
bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
|
| 1035 |
+
return [(patch, bbox)]
|
| 1036 |
|
| 1037 |
alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
|
| 1038 |
for layer in layers:
|
|
|
|
| 1045 |
layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
|
| 1046 |
alpha_acc = np.maximum(alpha_acc, layer_alpha)
|
| 1047 |
|
| 1048 |
+
components = _component_boxes(alpha_acc > 0, min_pixels=24)
|
| 1049 |
+
if not components:
|
| 1050 |
+
return []
|
| 1051 |
|
| 1052 |
+
regions = []
|
| 1053 |
+
for x1, y1, x2, y2, _ in components:
|
| 1054 |
+
pad_x = max(2, int((x2 - x1) * 0.02))
|
| 1055 |
+
pad_y = max(2, int((y2 - y1) * 0.02))
|
| 1056 |
+
px1 = max(0, x1 - pad_x)
|
| 1057 |
+
py1 = max(0, y1 - pad_y)
|
| 1058 |
+
px2 = min(background.width, x2 + pad_x)
|
| 1059 |
+
py2 = min(background.height, y2 + pad_y)
|
| 1060 |
+
if px2 <= px1 or py2 <= py1:
|
| 1061 |
+
continue
|
| 1062 |
+
crop = background.crop((px1, py1, px2, py2)).convert("RGB")
|
| 1063 |
+
regions.append((crop, (px1, py1, px2, py2)))
|
| 1064 |
+
|
| 1065 |
+
regions.sort(
|
| 1066 |
+
key=lambda item: (item[1][2] - item[1][0]) * (item[1][3] - item[1][1]),
|
| 1067 |
+
reverse=True,
|
| 1068 |
+
)
|
| 1069 |
+
return regions
|
| 1070 |
+
|
| 1071 |
+
def _extract_selected_region(editor_value, base_size=None, base_image=None):
|
| 1072 |
+
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
| 1073 |
+
if not regions:
|
| 1074 |
return None, None
|
| 1075 |
+
return regions[0]
|
| 1076 |
|
| 1077 |
+
def _bbox_overlap_ratio(a, b):
|
| 1078 |
+
ax1, ay1, ax2, ay2 = a
|
| 1079 |
+
bx1, by1, bx2, by2 = b
|
| 1080 |
+
ix1 = max(ax1, bx1)
|
| 1081 |
+
iy1 = max(ay1, by1)
|
| 1082 |
+
ix2 = min(ax2, bx2)
|
| 1083 |
+
iy2 = min(ay2, by2)
|
| 1084 |
+
if ix2 <= ix1 or iy2 <= iy1:
|
| 1085 |
+
return 0.0, 0.0
|
| 1086 |
+
inter = float((ix2 - ix1) * (iy2 - iy1))
|
| 1087 |
+
area_a = float(max(1, (ax2 - ax1) * (ay2 - ay1)))
|
| 1088 |
+
area_b = float(max(1, (bx2 - bx1) * (by2 - by1)))
|
| 1089 |
+
return inter / area_a, inter / area_b
|
| 1090 |
+
|
| 1091 |
+
def _is_duplicate_bbox(candidate_bbox, existing_bbox):
|
| 1092 |
+
iou = _box_iou(candidate_bbox, existing_bbox)
|
| 1093 |
+
cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
|
| 1094 |
+
return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
|
| 1095 |
+
|
| 1096 |
+
def _clear_editor_overlays(editor_value):
|
| 1097 |
+
if isinstance(editor_value, dict):
|
| 1098 |
+
bg = _to_rgba_image(editor_value.get("background")) or _to_rgba_image(editor_value.get("composite"))
|
| 1099 |
+
if isinstance(bg, Image.Image):
|
| 1100 |
+
clean_bg = bg.convert("RGB")
|
| 1101 |
+
return {"background": clean_bg, "layers": [], "composite": clean_bg}
|
| 1102 |
+
return editor_value
|
| 1103 |
|
| 1104 |
def _draw_selected_region_boxes(image, boxes):
|
| 1105 |
if image is None or not boxes:
|
|
|
|
| 1138 |
return [], [], "No saved regions."
|
| 1139 |
|
| 1140 |
def add_selected_region(editor_value, base_size, base_image, selected_regions):
|
| 1141 |
+
candidates = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
regions = list(selected_regions or [])
|
| 1143 |
+
if not candidates:
|
| 1144 |
+
msg = "No region detected. Use Crop or draw/highlight a region first."
|
| 1145 |
+
return regions, _region_gallery_items(regions), msg, editor_value
|
| 1146 |
+
|
| 1147 |
+
existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
|
| 1148 |
+
added = 0
|
| 1149 |
+
for region_img, bbox in candidates:
|
| 1150 |
+
if bbox is not None and any(_is_duplicate_bbox(bbox, eb) for eb in existing_boxes):
|
| 1151 |
+
continue
|
| 1152 |
+
regions.append({"image": region_img, "bbox": bbox})
|
| 1153 |
+
if bbox is not None:
|
| 1154 |
+
existing_boxes.append(bbox)
|
| 1155 |
+
added += 1
|
| 1156 |
+
|
| 1157 |
+
if added == 0:
|
| 1158 |
+
msg = "No new region added. Draw one region, click Add Region, then draw the next region."
|
| 1159 |
+
return regions, _region_gallery_items(regions), msg, editor_value
|
| 1160 |
+
|
| 1161 |
+
msg = f"Added {added} region(s). {len(regions)} total."
|
| 1162 |
+
return regions, _region_gallery_items(regions), msg, _clear_editor_overlays(editor_value)
|
| 1163 |
|
| 1164 |
def clear_selected_regions():
|
| 1165 |
return _reset_selected_regions()
|
| 1166 |
|
| 1167 |
+
def clear_regions_and_editor(editor_value):
|
| 1168 |
+
regions, gallery_items, msg = _reset_selected_regions()
|
| 1169 |
+
return regions, gallery_items, msg, _clear_editor_overlays(editor_value)
|
| 1170 |
+
|
| 1171 |
def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
| 1172 |
text_display = re.sub(
|
| 1173 |
r'\\\[(.+?)\\\]',
|
|
|
|
| 1409 |
- Optional rectangle selection: use the **Crop** tool.
|
| 1410 |
- Freehand/highlight ink is semi-transparent so underlying content stays visible.
|
| 1411 |
- Optional multi-select: click **Add Region** after each selection.
|
| 1412 |
+
- After **Add Region**, drawing marks are cleared automatically so the next region stays separate.
|
| 1413 |
Then click **Extract**.
|
| 1414 |
4. Use **Clear Regions** to reset multi-select state.
|
| 1415 |
5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
|
|
|
|
| 1453 |
add_region_btn.click(
|
| 1454 |
add_selected_region,
|
| 1455 |
[region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
|
| 1456 |
+
[selected_regions_state, selected_regions_gallery, selection_status, region_editor],
|
| 1457 |
)
|
| 1458 |
clear_regions_btn.click(
|
| 1459 |
+
clear_regions_and_editor,
|
| 1460 |
+
inputs=[region_editor],
|
| 1461 |
+
outputs=[selected_regions_state, selected_regions_gallery, selection_status, region_editor],
|
| 1462 |
)
|
| 1463 |
|
| 1464 |
def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):
|