Spaces:
Running on Zero
Running on Zero
Preserve workspace zoom by delta-based region snapshots
Browse files
app.py
CHANGED
|
@@ -1006,6 +1006,55 @@ def _component_boxes(binary_mask, min_pixels=24):
|
|
| 1006 |
boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
|
| 1007 |
return boxes
|
| 1008 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1009 |
def _extract_selected_regions(editor_value, base_size=None, base_image=None):
|
| 1010 |
if editor_value is None:
|
| 1011 |
return []
|
|
@@ -1017,14 +1066,10 @@ def _extract_selected_regions(editor_value, base_size=None, base_image=None):
|
|
| 1017 |
if not isinstance(editor_value, dict):
|
| 1018 |
return []
|
| 1019 |
|
| 1020 |
-
background =
|
| 1021 |
-
composite = _to_rgba_image(editor_value.get("composite"))
|
| 1022 |
layers = editor_value.get("layers") or []
|
| 1023 |
-
|
| 1024 |
if background is None:
|
| 1025 |
-
|
| 1026 |
-
return []
|
| 1027 |
-
background = composite
|
| 1028 |
|
| 1029 |
if not isinstance(layers, list) or not layers:
|
| 1030 |
# No annotation layers; treat as explicit crop only if size changed from base.
|
|
@@ -1034,39 +1079,34 @@ def _extract_selected_regions(editor_value, base_size=None, base_image=None):
|
|
| 1034 |
bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
|
| 1035 |
return [(patch, bbox)]
|
| 1036 |
|
| 1037 |
-
|
| 1038 |
-
for layer in layers:
|
| 1039 |
-
layer_img = _to_rgba_image(layer)
|
| 1040 |
-
if layer_img is None:
|
| 1041 |
-
continue
|
| 1042 |
-
if layer_img.size != background.size:
|
| 1043 |
-
nearest = Image.Resampling.NEAREST if hasattr(Image, "Resampling") else Image.NEAREST
|
| 1044 |
-
layer_img = layer_img.resize(background.size, nearest)
|
| 1045 |
-
layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
|
| 1046 |
-
alpha_acc = np.maximum(alpha_acc, layer_alpha)
|
| 1047 |
-
|
| 1048 |
-
components = _component_boxes(alpha_acc > 0, min_pixels=24)
|
| 1049 |
-
if not components:
|
| 1050 |
return []
|
|
|
|
| 1051 |
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
py2 = min(background.height, y2 + pad_y)
|
| 1060 |
-
if px2 <= px1 or py2 <= py1:
|
| 1061 |
-
continue
|
| 1062 |
-
crop = background.crop((px1, py1, px2, py2)).convert("RGB")
|
| 1063 |
-
regions.append((crop, (px1, py1, px2, py2)))
|
| 1064 |
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1070 |
|
| 1071 |
def _extract_selected_region(editor_value, base_size=None, base_image=None):
|
| 1072 |
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
|
@@ -1093,14 +1133,6 @@ def _is_duplicate_bbox(candidate_bbox, existing_bbox):
|
|
| 1093 |
cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
|
| 1094 |
return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
|
| 1095 |
|
| 1096 |
-
def _clear_editor_overlays(editor_value):
|
| 1097 |
-
if isinstance(editor_value, dict):
|
| 1098 |
-
bg = _to_rgba_image(editor_value.get("background")) or _to_rgba_image(editor_value.get("composite"))
|
| 1099 |
-
if isinstance(bg, Image.Image):
|
| 1100 |
-
clean_bg = bg.convert("RGB")
|
| 1101 |
-
return {"background": clean_bg, "layers": [], "composite": clean_bg}
|
| 1102 |
-
return editor_value
|
| 1103 |
-
|
| 1104 |
def _draw_selected_region_boxes(image, boxes):
|
| 1105 |
if image is None or not boxes:
|
| 1106 |
return None
|
|
@@ -1137,12 +1169,20 @@ def _label_gallery_items(items, prefix=None):
|
|
| 1137 |
def _reset_selected_regions():
|
| 1138 |
return [], [], "No saved regions."
|
| 1139 |
|
| 1140 |
-
def
|
| 1141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
regions = list(selected_regions or [])
|
| 1143 |
if not candidates:
|
| 1144 |
msg = "No region detected. Use Crop or draw/highlight a region first."
|
| 1145 |
-
return regions, _region_gallery_items(regions), msg,
|
| 1146 |
|
| 1147 |
existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
|
| 1148 |
added = 0
|
|
@@ -1156,17 +1196,18 @@ def add_selected_region(editor_value, base_size, base_image, selected_regions):
|
|
| 1156 |
|
| 1157 |
if added == 0:
|
| 1158 |
msg = "No new region added. Draw one region, click Add Region, then draw the next region."
|
| 1159 |
-
return regions, _region_gallery_items(regions), msg,
|
| 1160 |
|
| 1161 |
-
msg = f"Added {added} region(s). {len(regions)} total."
|
| 1162 |
-
return regions, _region_gallery_items(regions), msg,
|
| 1163 |
|
| 1164 |
def clear_selected_regions():
|
| 1165 |
return _reset_selected_regions()
|
| 1166 |
|
| 1167 |
-
def
|
| 1168 |
regions, gallery_items, msg = _reset_selected_regions()
|
| 1169 |
-
|
|
|
|
| 1170 |
|
| 1171 |
def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
| 1172 |
text_display = re.sub(
|
|
@@ -1287,6 +1328,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1287 |
workspace_base_size = gr.State(None)
|
| 1288 |
workspace_base_image = gr.State(None)
|
| 1289 |
selected_regions_state = gr.State([])
|
|
|
|
| 1290 |
|
| 1291 |
with gr.Row():
|
| 1292 |
with gr.Column(scale=3):
|
|
@@ -1409,7 +1451,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1409 |
- Optional rectangle selection: use the **Crop** tool.
|
| 1410 |
- Freehand/highlight ink is semi-transparent so underlying content stays visible.
|
| 1411 |
- Optional multi-select: click **Add Region** after each selection.
|
| 1412 |
-
-
|
| 1413 |
Then click **Extract**.
|
| 1414 |
4. Use **Clear Regions** to reset multi-select state.
|
| 1415 |
5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
|
|
@@ -1449,16 +1491,18 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1449 |
region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
|
| 1450 |
file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1451 |
page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
|
|
|
|
|
|
| 1452 |
|
| 1453 |
add_region_btn.click(
|
| 1454 |
add_selected_region,
|
| 1455 |
-
[region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
|
| 1456 |
-
[selected_regions_state, selected_regions_gallery, selection_status,
|
| 1457 |
)
|
| 1458 |
clear_regions_btn.click(
|
| 1459 |
-
|
| 1460 |
inputs=[region_editor],
|
| 1461 |
-
outputs=[selected_regions_state, selected_regions_gallery, selection_status,
|
| 1462 |
)
|
| 1463 |
|
| 1464 |
def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):
|
|
|
|
| 1006 |
boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
|
| 1007 |
return boxes
|
| 1008 |
|
| 1009 |
+
def _extract_regions_from_mask(background, mask):
|
| 1010 |
+
components = _component_boxes(mask, min_pixels=24)
|
| 1011 |
+
if not components:
|
| 1012 |
+
return []
|
| 1013 |
+
|
| 1014 |
+
regions = []
|
| 1015 |
+
for x1, y1, x2, y2, _ in components:
|
| 1016 |
+
pad_x = max(2, int((x2 - x1) * 0.02))
|
| 1017 |
+
pad_y = max(2, int((y2 - y1) * 0.02))
|
| 1018 |
+
px1 = max(0, x1 - pad_x)
|
| 1019 |
+
py1 = max(0, y1 - pad_y)
|
| 1020 |
+
px2 = min(background.width, x2 + pad_x)
|
| 1021 |
+
py2 = min(background.height, y2 + pad_y)
|
| 1022 |
+
if px2 <= px1 or py2 <= py1:
|
| 1023 |
+
continue
|
| 1024 |
+
crop = background.crop((px1, py1, px2, py2)).convert("RGB")
|
| 1025 |
+
regions.append((crop, (px1, py1, px2, py2)))
|
| 1026 |
+
|
| 1027 |
+
regions.sort(
|
| 1028 |
+
key=lambda item: (item[1][2] - item[1][0]) * (item[1][3] - item[1][1]),
|
| 1029 |
+
reverse=True,
|
| 1030 |
+
)
|
| 1031 |
+
return regions
|
| 1032 |
+
|
| 1033 |
+
def _editor_background_and_mask(editor_value):
|
| 1034 |
+
if not isinstance(editor_value, dict):
|
| 1035 |
+
return None, None
|
| 1036 |
+
background = _to_rgba_image(editor_value.get("background"))
|
| 1037 |
+
composite = _to_rgba_image(editor_value.get("composite"))
|
| 1038 |
+
layers = editor_value.get("layers") or []
|
| 1039 |
+
if background is None:
|
| 1040 |
+
if composite is None:
|
| 1041 |
+
return None, None
|
| 1042 |
+
background = composite
|
| 1043 |
+
if not isinstance(layers, list) or not layers:
|
| 1044 |
+
return background, None
|
| 1045 |
+
|
| 1046 |
+
alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
|
| 1047 |
+
for layer in layers:
|
| 1048 |
+
layer_img = _to_rgba_image(layer)
|
| 1049 |
+
if layer_img is None:
|
| 1050 |
+
continue
|
| 1051 |
+
if layer_img.size != background.size:
|
| 1052 |
+
nearest = Image.Resampling.NEAREST if hasattr(Image, "Resampling") else Image.NEAREST
|
| 1053 |
+
layer_img = layer_img.resize(background.size, nearest)
|
| 1054 |
+
layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
|
| 1055 |
+
alpha_acc = np.maximum(alpha_acc, layer_alpha)
|
| 1056 |
+
return background, (alpha_acc > 0)
|
| 1057 |
+
|
| 1058 |
def _extract_selected_regions(editor_value, base_size=None, base_image=None):
|
| 1059 |
if editor_value is None:
|
| 1060 |
return []
|
|
|
|
| 1066 |
if not isinstance(editor_value, dict):
|
| 1067 |
return []
|
| 1068 |
|
| 1069 |
+
background, mask = _editor_background_and_mask(editor_value)
|
|
|
|
| 1070 |
layers = editor_value.get("layers") or []
|
|
|
|
| 1071 |
if background is None:
|
| 1072 |
+
return []
|
|
|
|
|
|
|
| 1073 |
|
| 1074 |
if not isinstance(layers, list) or not layers:
|
| 1075 |
# No annotation layers; treat as explicit crop only if size changed from base.
|
|
|
|
| 1079 |
bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
|
| 1080 |
return [(patch, bbox)]
|
| 1081 |
|
| 1082 |
+
if mask is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1083 |
return []
|
| 1084 |
+
return _extract_regions_from_mask(background, mask)
|
| 1085 |
|
| 1086 |
+
def _extract_new_drawn_regions(editor_value, base_size=None, base_image=None, consumed_mask=None):
|
| 1087 |
+
# For crop mode / explicit cropped image, fall back to classic extraction.
|
| 1088 |
+
if isinstance(editor_value, Image.Image):
|
| 1089 |
+
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
| 1090 |
+
return regions, consumed_mask
|
| 1091 |
+
if not isinstance(editor_value, dict):
|
| 1092 |
+
return [], consumed_mask
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1093 |
|
| 1094 |
+
background, mask = _editor_background_and_mask(editor_value)
|
| 1095 |
+
layers = editor_value.get("layers") or []
|
| 1096 |
+
if background is None:
|
| 1097 |
+
return [], consumed_mask
|
| 1098 |
+
|
| 1099 |
+
# If there are no drawn layers, treat as explicit crop mode.
|
| 1100 |
+
if not isinstance(layers, list) or not layers or mask is None:
|
| 1101 |
+
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
| 1102 |
+
return regions, consumed_mask
|
| 1103 |
+
|
| 1104 |
+
if consumed_mask is None or not isinstance(consumed_mask, np.ndarray) or consumed_mask.shape != mask.shape:
|
| 1105 |
+
delta_mask = mask
|
| 1106 |
+
else:
|
| 1107 |
+
delta_mask = np.logical_and(mask, np.logical_not(consumed_mask))
|
| 1108 |
+
regions = _extract_regions_from_mask(background, delta_mask)
|
| 1109 |
+
return regions, mask
|
| 1110 |
|
| 1111 |
def _extract_selected_region(editor_value, base_size=None, base_image=None):
|
| 1112 |
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
|
|
|
| 1133 |
cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
|
| 1134 |
return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
|
| 1135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1136 |
def _draw_selected_region_boxes(image, boxes):
|
| 1137 |
if image is None or not boxes:
|
| 1138 |
return None
|
|
|
|
| 1169 |
def _reset_selected_regions():
|
| 1170 |
return [], [], "No saved regions."
|
| 1171 |
|
| 1172 |
+
def _reset_drawn_mask():
|
| 1173 |
+
return None
|
| 1174 |
+
|
| 1175 |
+
def add_selected_region(editor_value, base_size, base_image, selected_regions, consumed_mask):
|
| 1176 |
+
candidates, updated_mask = _extract_new_drawn_regions(
|
| 1177 |
+
editor_value,
|
| 1178 |
+
base_size=base_size,
|
| 1179 |
+
base_image=base_image,
|
| 1180 |
+
consumed_mask=consumed_mask,
|
| 1181 |
+
)
|
| 1182 |
regions = list(selected_regions or [])
|
| 1183 |
if not candidates:
|
| 1184 |
msg = "No region detected. Use Crop or draw/highlight a region first."
|
| 1185 |
+
return regions, _region_gallery_items(regions), msg, updated_mask
|
| 1186 |
|
| 1187 |
existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
|
| 1188 |
added = 0
|
|
|
|
| 1196 |
|
| 1197 |
if added == 0:
|
| 1198 |
msg = "No new region added. Draw one region, click Add Region, then draw the next region."
|
| 1199 |
+
return regions, _region_gallery_items(regions), msg, updated_mask
|
| 1200 |
|
| 1201 |
+
msg = f"Added {added} region(s). {len(regions)} total. Zoom/pan is preserved."
|
| 1202 |
+
return regions, _region_gallery_items(regions), msg, updated_mask
|
| 1203 |
|
| 1204 |
def clear_selected_regions():
|
| 1205 |
return _reset_selected_regions()
|
| 1206 |
|
| 1207 |
+
def clear_regions_preserve_view(editor_value):
|
| 1208 |
regions, gallery_items, msg = _reset_selected_regions()
|
| 1209 |
+
_, mask = _editor_background_and_mask(editor_value)
|
| 1210 |
+
return regions, gallery_items, msg, mask
|
| 1211 |
|
| 1212 |
def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
|
| 1213 |
text_display = re.sub(
|
|
|
|
| 1328 |
workspace_base_size = gr.State(None)
|
| 1329 |
workspace_base_image = gr.State(None)
|
| 1330 |
selected_regions_state = gr.State([])
|
| 1331 |
+
drawn_mask_state = gr.State(None)
|
| 1332 |
|
| 1333 |
with gr.Row():
|
| 1334 |
with gr.Column(scale=3):
|
|
|
|
| 1451 |
- Optional rectangle selection: use the **Crop** tool.
|
| 1452 |
- Freehand/highlight ink is semi-transparent so underlying content stays visible.
|
| 1453 |
- Optional multi-select: click **Add Region** after each selection.
|
| 1454 |
+
- **Add Region** snapshots only newly drawn pixels so zoom/pan stays in place while you continue selecting.
|
| 1455 |
Then click **Extract**.
|
| 1456 |
4. Use **Clear Regions** to reset multi-select state.
|
| 1457 |
5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
|
|
|
|
| 1491 |
region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
|
| 1492 |
file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1493 |
page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1494 |
+
file_in.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
| 1495 |
+
page_selector.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
| 1496 |
|
| 1497 |
add_region_btn.click(
|
| 1498 |
add_selected_region,
|
| 1499 |
+
[region_editor, workspace_base_size, workspace_base_image, selected_regions_state, drawn_mask_state],
|
| 1500 |
+
[selected_regions_state, selected_regions_gallery, selection_status, drawn_mask_state],
|
| 1501 |
)
|
| 1502 |
clear_regions_btn.click(
|
| 1503 |
+
clear_regions_preserve_view,
|
| 1504 |
inputs=[region_editor],
|
| 1505 |
+
outputs=[selected_regions_state, selected_regions_gallery, selection_status, drawn_mask_state],
|
| 1506 |
)
|
| 1507 |
|
| 1508 |
def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):
|