Spaces:

ricklon
/

DeepSeek-OCR-2-Math

Running on Zero

App Files Files Community

ricklon commited on 7 days ago

Commit

d3dfd44

1 Parent(s): fce2f1f

Keep region selections separate and clear overlays after add

Browse files

Files changed (1) hide show

app.py +128 -36

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ import base64
 import html as html_lib
 import markdown as md_lib
 import latex2mathml.converter
 from io import StringIO, BytesIO
@@ -967,22 +968,54 @@ def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
     y2 = max(y1 + 1, min(bh, y2))
     return (x1, y1, x2, y2)
-def _extract_selected_region(editor_value, base_size=None, base_image=None):
-    """Extract a clean selected region from ImageEditor data.
-    Strategy:
-    1) Prefer explicit crop (editor background/composite already reduced in size).
-    2) Otherwise, infer region from drawn layers' alpha mask bbox and crop background.
-    """
     if editor_value is None:
-        return None
     if isinstance(editor_value, Image.Image):
         if base_size and tuple(editor_value.size) == tuple(base_size):
-            return None, None
         bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
-        return editor_value, bbox
     if not isinstance(editor_value, dict):
-        return None, None
     background = _to_rgba_image(editor_value.get("background"))
     composite = _to_rgba_image(editor_value.get("composite"))
@@ -990,16 +1023,16 @@ def _extract_selected_region(editor_value, base_size=None, base_image=None):
     if background is None:
         if composite is None:
-            return None, None
         background = composite
     if not isinstance(layers, list) or not layers:
         # No annotation layers; treat as explicit crop only if size changed from base.
         if base_size and tuple(background.size) == tuple(base_size):
-            return None, None
         patch = background.convert("RGB")
         bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
-        return patch, bbox
     alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
     for layer in layers:
@@ -1012,22 +1045,61 @@ def _extract_selected_region(editor_value, base_size=None, base_image=None):
         layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
         alpha_acc = np.maximum(alpha_acc, layer_alpha)
-    ys, xs = np.where(alpha_acc > 0)
-    if xs.size == 0 or ys.size == 0:
-        return None, None
-    x1, y1 = int(xs.min()), int(ys.min())
-    x2, y2 = int(xs.max()) + 1, int(ys.max()) + 1
-    pad_x = max(2, int((x2 - x1) * 0.02))
-    pad_y = max(2, int((y2 - y1) * 0.02))
-    x1 = max(0, x1 - pad_x)
-    y1 = max(0, y1 - pad_y)
-    x2 = min(background.width, x2 + pad_x)
-    y2 = min(background.height, y2 + pad_y)
-    if x2 <= x1 or y2 <= y1:
         return None, None
-    return background.crop((x1, y1, x2, y2)).convert("RGB"), (x1, y1, x2, y2)
 def _draw_selected_region_boxes(image, boxes):
     if image is None or not boxes:
@@ -1066,18 +1138,36 @@ def _reset_selected_regions():
     return [], [], "No saved regions."
 def add_selected_region(editor_value, base_size, base_image, selected_regions):
-    region_img, bbox = _extract_selected_region(editor_value, base_size=base_size, base_image=base_image)
-    if region_img is None:
-        msg = "No region detected. Use Crop or draw/highlight a region first."
-        regions = selected_regions or []
-        return regions, _region_gallery_items(regions), msg
     regions = list(selected_regions or [])
-    regions.append({"image": region_img, "bbox": bbox})
-    return regions, _region_gallery_items(regions), f"{len(regions)} region(s) saved."
 def clear_selected_regions():
     return _reset_selected_regions()
 def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
     text_display = re.sub(
         r'\\\[(.+?)\\\]',
@@ -1319,6 +1409,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
            - Optional rectangle selection: use the **Crop** tool.
            - Freehand/highlight ink is semi-transparent so underlying content stays visible.
            - Optional multi-select: click **Add Region** after each selection.
            Then click **Extract**.
         4. Use **Clear Regions** to reset multi-select state.
         5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
@@ -1362,11 +1453,12 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
     add_region_btn.click(
         add_selected_region,
         [region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
-        [selected_regions_state, selected_regions_gallery, selection_status],
     )
     clear_regions_btn.click(
-        clear_selected_regions,
-        outputs=[selected_regions_state, selected_regions_gallery, selection_status],
     )
     def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):

 import html as html_lib
 import markdown as md_lib
 import latex2mathml.converter
+from collections import deque
 from io import StringIO, BytesIO
     y2 = max(y1 + 1, min(bh, y2))
     return (x1, y1, x2, y2)
+def _component_boxes(binary_mask, min_pixels=24):
+    h, w = binary_mask.shape
+    visited = np.zeros((h, w), dtype=bool)
+    boxes = []
+    neighbors = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)]
+    ys, xs = np.where(binary_mask)
+    for sy, sx in zip(ys.tolist(), xs.tolist()):
+        if visited[sy, sx]:
+            continue
+        q = deque([(sy, sx)])
+        visited[sy, sx] = True
+        min_x = max_x = sx
+        min_y = max_y = sy
+        count = 0
+        while q:
+            y, x = q.popleft()
+            count += 1
+            if x < min_x:
+                min_x = x
+            if x > max_x:
+                max_x = x
+            if y < min_y:
+                min_y = y
+            if y > max_y:
+                max_y = y
+            for dy, dx in neighbors:
+                ny, nx = y + dy, x + dx
+                if ny < 0 or ny >= h or nx < 0 or nx >= w:
+                    continue
+                if visited[ny, nx] or not binary_mask[ny, nx]:
+                    continue
+                visited[ny, nx] = True
+                q.append((ny, nx))
+        if count >= min_pixels:
+            boxes.append((min_x, min_y, max_x + 1, max_y + 1, count))
+    return boxes
+def _extract_selected_regions(editor_value, base_size=None, base_image=None):
     if editor_value is None:
+        return []
     if isinstance(editor_value, Image.Image):
         if base_size and tuple(editor_value.size) == tuple(base_size):
+            return []
         bbox = _locate_patch_bbox(base_image, editor_value) if base_image is not None else None
+        return [(editor_value, bbox)]
     if not isinstance(editor_value, dict):
+        return []
     background = _to_rgba_image(editor_value.get("background"))
     composite = _to_rgba_image(editor_value.get("composite"))
     if background is None:
         if composite is None:
+            return []
         background = composite
     if not isinstance(layers, list) or not layers:
         # No annotation layers; treat as explicit crop only if size changed from base.
         if base_size and tuple(background.size) == tuple(base_size):
+            return []
         patch = background.convert("RGB")
         bbox = _locate_patch_bbox(base_image, patch) if base_image is not None else None
+        return [(patch, bbox)]
     alpha_acc = np.zeros((background.height, background.width), dtype=np.uint8)
     for layer in layers:
         layer_alpha = np.asarray(layer_img, dtype=np.uint8)[:, :, 3]
         alpha_acc = np.maximum(alpha_acc, layer_alpha)
+    components = _component_boxes(alpha_acc > 0, min_pixels=24)
+    if not components:
+        return []
+    regions = []
+    for x1, y1, x2, y2, _ in components:
+        pad_x = max(2, int((x2 - x1) * 0.02))
+        pad_y = max(2, int((y2 - y1) * 0.02))
+        px1 = max(0, x1 - pad_x)
+        py1 = max(0, y1 - pad_y)
+        px2 = min(background.width, x2 + pad_x)
+        py2 = min(background.height, y2 + pad_y)
+        if px2 <= px1 or py2 <= py1:
+            continue
+        crop = background.crop((px1, py1, px2, py2)).convert("RGB")
+        regions.append((crop, (px1, py1, px2, py2)))
+    regions.sort(
+        key=lambda item: (item[1][2] - item[1][0]) * (item[1][3] - item[1][1]),
+        reverse=True,
+    )
+    return regions
+def _extract_selected_region(editor_value, base_size=None, base_image=None):
+    regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
+    if not regions:
         return None, None
+    return regions[0]
+def _bbox_overlap_ratio(a, b):
+    ax1, ay1, ax2, ay2 = a
+    bx1, by1, bx2, by2 = b
+    ix1 = max(ax1, bx1)
+    iy1 = max(ay1, by1)
+    ix2 = min(ax2, bx2)
+    iy2 = min(ay2, by2)
+    if ix2 <= ix1 or iy2 <= iy1:
+        return 0.0, 0.0
+    inter = float((ix2 - ix1) * (iy2 - iy1))
+    area_a = float(max(1, (ax2 - ax1) * (ay2 - ay1)))
+    area_b = float(max(1, (bx2 - bx1) * (by2 - by1)))
+    return inter / area_a, inter / area_b
+def _is_duplicate_bbox(candidate_bbox, existing_bbox):
+    iou = _box_iou(candidate_bbox, existing_bbox)
+    cover_cand, cover_exist = _bbox_overlap_ratio(candidate_bbox, existing_bbox)
+    return iou >= 0.85 or cover_cand >= 0.92 or cover_exist >= 0.97
+def _clear_editor_overlays(editor_value):
+    if isinstance(editor_value, dict):
+        bg = _to_rgba_image(editor_value.get("background")) or _to_rgba_image(editor_value.get("composite"))
+        if isinstance(bg, Image.Image):
+            clean_bg = bg.convert("RGB")
+            return {"background": clean_bg, "layers": [], "composite": clean_bg}
+    return editor_value
 def _draw_selected_region_boxes(image, boxes):
     if image is None or not boxes:
     return [], [], "No saved regions."
 def add_selected_region(editor_value, base_size, base_image, selected_regions):
+    candidates = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
     regions = list(selected_regions or [])
+    if not candidates:
+        msg = "No region detected. Use Crop or draw/highlight a region first."
+        return regions, _region_gallery_items(regions), msg, editor_value
+    existing_boxes = [r.get("bbox") for r in regions if r.get("bbox") is not None]
+    added = 0
+    for region_img, bbox in candidates:
+        if bbox is not None and any(_is_duplicate_bbox(bbox, eb) for eb in existing_boxes):
+            continue
+        regions.append({"image": region_img, "bbox": bbox})
+        if bbox is not None:
+            existing_boxes.append(bbox)
+        added += 1
+    if added == 0:
+        msg = "No new region added. Draw one region, click Add Region, then draw the next region."
+        return regions, _region_gallery_items(regions), msg, editor_value
+    msg = f"Added {added} region(s). {len(regions)} total."
+    return regions, _region_gallery_items(regions), msg, _clear_editor_overlays(editor_value)
 def clear_selected_regions():
     return _reset_selected_regions()
+def clear_regions_and_editor(editor_value):
+    regions, gallery_items, msg = _reset_selected_regions()
+    return regions, gallery_items, msg, _clear_editor_overlays(editor_value)
 def _compose_ui_outputs(cleaned, markdown, raw, img_out, gallery_items):
     text_display = re.sub(
         r'\\\[(.+?)\\\]',
            - Optional rectangle selection: use the **Crop** tool.
            - Freehand/highlight ink is semi-transparent so underlying content stays visible.
            - Optional multi-select: click **Add Region** after each selection.
+           - After **Add Region**, drawing marks are cleared automatically so the next region stays separate.
            Then click **Extract**.
         4. Use **Clear Regions** to reset multi-select state.
         5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
     add_region_btn.click(
         add_selected_region,
         [region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
+        [selected_regions_state, selected_regions_gallery, selection_status, region_editor],
     )
     clear_regions_btn.click(
+        clear_regions_and_editor,
+        inputs=[region_editor],
+        outputs=[selected_regions_state, selected_regions_gallery, selection_status, region_editor],
     )
     def run(file_path, task, custom_prompt, page_num, enable_equation_zoom, detect_eq_lines, scope, region_value, base_size, base_image, selected_regions):