Spaces:
Running on Zero
Running on Zero
Prefer Paint workspace for live drawing with ImageEditor fallback
Browse files
app.py
CHANGED
|
@@ -20,8 +20,10 @@ from collections import deque
|
|
| 20 |
from io import StringIO, BytesIO
|
| 21 |
|
| 22 |
HAS_IMAGE_EDITOR = hasattr(gr, "ImageEditor")
|
|
|
|
| 23 |
HAS_BRUSH = hasattr(gr, "Brush")
|
| 24 |
HAS_ERASER = hasattr(gr, "Eraser")
|
|
|
|
| 25 |
|
| 26 |
# Model options — swap MODEL_NAME to reduce VRAM usage on GPUs with <= 8GB
|
| 27 |
#
|
|
@@ -891,7 +893,7 @@ def _extract_editor_background(editor_value):
|
|
| 891 |
|
| 892 |
def _to_rgba_image(obj):
|
| 893 |
if isinstance(obj, dict):
|
| 894 |
-
for k in ("image", "layer", "composite", "background"):
|
| 895 |
if k in obj:
|
| 896 |
return _to_rgba_image(obj[k])
|
| 897 |
return None
|
|
@@ -909,6 +911,32 @@ def _to_rgba_image(obj):
|
|
| 909 |
return Image.fromarray(arr.astype(np.uint8), mode="RGBA")
|
| 910 |
return None
|
| 911 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
|
| 913 |
"""Approximate patch location in base image using downscaled SSD search."""
|
| 914 |
if base_image is None or patch_image is None:
|
|
@@ -1034,12 +1062,23 @@ def _editor_background_and_mask(editor_value):
|
|
| 1034 |
if not isinstance(editor_value, dict):
|
| 1035 |
return None, None
|
| 1036 |
background = _to_rgba_image(editor_value.get("background"))
|
|
|
|
|
|
|
| 1037 |
composite = _to_rgba_image(editor_value.get("composite"))
|
| 1038 |
layers = editor_value.get("layers") or []
|
| 1039 |
if background is None:
|
| 1040 |
if composite is None:
|
| 1041 |
return None, None
|
| 1042 |
background = composite
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1043 |
if not isinstance(layers, list) or not layers:
|
| 1044 |
return background, None
|
| 1045 |
|
|
@@ -1096,11 +1135,17 @@ def _extract_new_drawn_regions(editor_value, base_size=None, base_image=None, co
|
|
| 1096 |
if background is None:
|
| 1097 |
return [], consumed_mask
|
| 1098 |
|
| 1099 |
-
|
| 1100 |
-
|
|
|
|
|
|
|
|
|
|
| 1101 |
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
| 1102 |
return regions, consumed_mask
|
| 1103 |
|
|
|
|
|
|
|
|
|
|
| 1104 |
if consumed_mask is None or not isinstance(consumed_mask, np.ndarray) or consumed_mask.shape != mask.shape:
|
| 1105 |
delta_mask = mask
|
| 1106 |
else:
|
|
@@ -1349,13 +1394,14 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1349 |
with gr.Column(scale=3):
|
| 1350 |
workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
|
| 1351 |
gr.Markdown("**Image Workspace (full page + region selection)**")
|
| 1352 |
-
if
|
| 1353 |
editor_kwargs = {}
|
| 1354 |
if HAS_BRUSH:
|
| 1355 |
try:
|
|
|
|
| 1356 |
editor_kwargs["brush"] = gr.Brush(
|
| 1357 |
-
colors=[
|
| 1358 |
-
default_color=
|
| 1359 |
color_mode="fixed",
|
| 1360 |
default_size=22,
|
| 1361 |
)
|
|
@@ -1374,15 +1420,39 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1374 |
editor_kwargs["eraser"] = gr.Eraser(default_size=26)
|
| 1375 |
except TypeError:
|
| 1376 |
editor_kwargs["eraser"] = gr.Eraser()
|
| 1377 |
-
|
| 1378 |
-
|
| 1379 |
-
|
| 1380 |
-
|
| 1381 |
-
|
| 1382 |
-
|
| 1383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1384 |
else:
|
| 1385 |
-
gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
|
| 1386 |
region_editor = gr.State(None)
|
| 1387 |
|
| 1388 |
with gr.Column(scale=1):
|
|
@@ -1433,7 +1503,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1433 |
"examples/2022-0922 Section 14 Notes.png",
|
| 1434 |
"examples/2022-0922 Section 15 Notes.png",
|
| 1435 |
]
|
| 1436 |
-
if
|
| 1437 |
gr.Examples(
|
| 1438 |
label="Image Examples (click thumbnail to load into workspace)",
|
| 1439 |
examples=image_examples,
|
|
@@ -1504,7 +1574,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1504 |
task.change(toggle_prompt, [task], [prompt])
|
| 1505 |
task.change(select_boxes, [task], [tabs])
|
| 1506 |
input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
|
| 1507 |
-
if
|
| 1508 |
file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1509 |
page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1510 |
region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
|
|
|
|
| 20 |
from io import StringIO, BytesIO
|
| 21 |
|
| 22 |
HAS_IMAGE_EDITOR = hasattr(gr, "ImageEditor")
|
| 23 |
+
HAS_PAINT = hasattr(gr, "Paint")
|
| 24 |
HAS_BRUSH = hasattr(gr, "Brush")
|
| 25 |
HAS_ERASER = hasattr(gr, "Eraser")
|
| 26 |
+
HAS_REGION_WORKSPACE = HAS_PAINT or HAS_IMAGE_EDITOR
|
| 27 |
|
| 28 |
# Model options — swap MODEL_NAME to reduce VRAM usage on GPUs with <= 8GB
|
| 29 |
#
|
|
|
|
| 893 |
|
| 894 |
def _to_rgba_image(obj):
|
| 895 |
if isinstance(obj, dict):
|
| 896 |
+
for k in ("image", "layer", "composite", "background", "mask"):
|
| 897 |
if k in obj:
|
| 898 |
return _to_rgba_image(obj[k])
|
| 899 |
return None
|
|
|
|
| 911 |
return Image.fromarray(arr.astype(np.uint8), mode="RGBA")
|
| 912 |
return None
|
| 913 |
|
| 914 |
+
def _to_mask_array(obj):
|
| 915 |
+
if obj is None:
|
| 916 |
+
return None
|
| 917 |
+
if isinstance(obj, dict):
|
| 918 |
+
for k in ("mask", "image", "layer", "composite", "background"):
|
| 919 |
+
if k in obj:
|
| 920 |
+
arr = _to_mask_array(obj[k])
|
| 921 |
+
if arr is not None:
|
| 922 |
+
return arr
|
| 923 |
+
return None
|
| 924 |
+
|
| 925 |
+
if isinstance(obj, Image.Image):
|
| 926 |
+
arr = np.asarray(obj)
|
| 927 |
+
elif isinstance(obj, np.ndarray):
|
| 928 |
+
arr = obj
|
| 929 |
+
else:
|
| 930 |
+
return None
|
| 931 |
+
|
| 932 |
+
if arr.ndim == 2:
|
| 933 |
+
return arr > 0
|
| 934 |
+
if arr.ndim == 3:
|
| 935 |
+
if arr.shape[2] >= 4:
|
| 936 |
+
return arr[:, :, 3] > 0
|
| 937 |
+
return np.max(arr[:, :, :3], axis=2) > 0
|
| 938 |
+
return None
|
| 939 |
+
|
| 940 |
def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
|
| 941 |
"""Approximate patch location in base image using downscaled SSD search."""
|
| 942 |
if base_image is None or patch_image is None:
|
|
|
|
| 1062 |
if not isinstance(editor_value, dict):
|
| 1063 |
return None, None
|
| 1064 |
background = _to_rgba_image(editor_value.get("background"))
|
| 1065 |
+
if background is None:
|
| 1066 |
+
background = _to_rgba_image(editor_value.get("image"))
|
| 1067 |
composite = _to_rgba_image(editor_value.get("composite"))
|
| 1068 |
layers = editor_value.get("layers") or []
|
| 1069 |
if background is None:
|
| 1070 |
if composite is None:
|
| 1071 |
return None, None
|
| 1072 |
background = composite
|
| 1073 |
+
|
| 1074 |
+
mask = _to_mask_array(editor_value.get("mask"))
|
| 1075 |
+
if mask is not None:
|
| 1076 |
+
if mask.shape[:2] != (background.height, background.width):
|
| 1077 |
+
mask_img = Image.fromarray(mask.astype(np.uint8) * 255, mode="L")
|
| 1078 |
+
nearest = Image.Resampling.NEAREST if hasattr(Image, "Resampling") else Image.NEAREST
|
| 1079 |
+
mask = np.asarray(mask_img.resize((background.width, background.height), nearest)) > 0
|
| 1080 |
+
return background, mask
|
| 1081 |
+
|
| 1082 |
if not isinstance(layers, list) or not layers:
|
| 1083 |
return background, None
|
| 1084 |
|
|
|
|
| 1135 |
if background is None:
|
| 1136 |
return [], consumed_mask
|
| 1137 |
|
| 1138 |
+
has_layer_data = isinstance(layers, list) and len(layers) > 0
|
| 1139 |
+
has_draw_data = (mask is not None) or has_layer_data
|
| 1140 |
+
|
| 1141 |
+
# If there are no draw layers/mask, treat as explicit crop mode.
|
| 1142 |
+
if not has_draw_data:
|
| 1143 |
regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
|
| 1144 |
return regions, consumed_mask
|
| 1145 |
|
| 1146 |
+
if mask is None:
|
| 1147 |
+
return [], consumed_mask
|
| 1148 |
+
|
| 1149 |
if consumed_mask is None or not isinstance(consumed_mask, np.ndarray) or consumed_mask.shape != mask.shape:
|
| 1150 |
delta_mask = mask
|
| 1151 |
else:
|
|
|
|
| 1394 |
with gr.Column(scale=3):
|
| 1395 |
workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
|
| 1396 |
gr.Markdown("**Image Workspace (full page + region selection)**")
|
| 1397 |
+
if HAS_REGION_WORKSPACE:
|
| 1398 |
editor_kwargs = {}
|
| 1399 |
if HAS_BRUSH:
|
| 1400 |
try:
|
| 1401 |
+
highlight = ("#2563eb", 0.35)
|
| 1402 |
editor_kwargs["brush"] = gr.Brush(
|
| 1403 |
+
colors=[highlight],
|
| 1404 |
+
default_color=highlight,
|
| 1405 |
color_mode="fixed",
|
| 1406 |
default_size=22,
|
| 1407 |
)
|
|
|
|
| 1420 |
editor_kwargs["eraser"] = gr.Eraser(default_size=26)
|
| 1421 |
except TypeError:
|
| 1422 |
editor_kwargs["eraser"] = gr.Eraser()
|
| 1423 |
+
if HAS_PAINT:
|
| 1424 |
+
try:
|
| 1425 |
+
region_editor = gr.Paint(
|
| 1426 |
+
label="Image Workspace",
|
| 1427 |
+
show_label=False,
|
| 1428 |
+
type="pil",
|
| 1429 |
+
height=640,
|
| 1430 |
+
**editor_kwargs,
|
| 1431 |
+
)
|
| 1432 |
+
except TypeError:
|
| 1433 |
+
try:
|
| 1434 |
+
region_editor = gr.Paint(
|
| 1435 |
+
label="Image Workspace",
|
| 1436 |
+
show_label=False,
|
| 1437 |
+
height=640,
|
| 1438 |
+
**editor_kwargs,
|
| 1439 |
+
)
|
| 1440 |
+
except TypeError:
|
| 1441 |
+
region_editor = gr.Paint(
|
| 1442 |
+
label="Image Workspace",
|
| 1443 |
+
show_label=False,
|
| 1444 |
+
height=640,
|
| 1445 |
+
)
|
| 1446 |
+
else:
|
| 1447 |
+
region_editor = gr.ImageEditor(
|
| 1448 |
+
label="Image Workspace",
|
| 1449 |
+
show_label=False,
|
| 1450 |
+
type="pil",
|
| 1451 |
+
height=640,
|
| 1452 |
+
**editor_kwargs,
|
| 1453 |
+
)
|
| 1454 |
else:
|
| 1455 |
+
gr.Markdown("Region drawing requires a newer Gradio version with `Paint` or `ImageEditor` support.")
|
| 1456 |
region_editor = gr.State(None)
|
| 1457 |
|
| 1458 |
with gr.Column(scale=1):
|
|
|
|
| 1503 |
"examples/2022-0922 Section 14 Notes.png",
|
| 1504 |
"examples/2022-0922 Section 15 Notes.png",
|
| 1505 |
]
|
| 1506 |
+
if HAS_REGION_WORKSPACE and region_editor is not None:
|
| 1507 |
gr.Examples(
|
| 1508 |
label="Image Examples (click thumbnail to load into workspace)",
|
| 1509 |
examples=image_examples,
|
|
|
|
| 1574 |
task.change(toggle_prompt, [task], [prompt])
|
| 1575 |
task.change(select_boxes, [task], [tabs])
|
| 1576 |
input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
|
| 1577 |
+
if HAS_REGION_WORKSPACE and region_editor is not None:
|
| 1578 |
file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1579 |
page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1580 |
region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
|