ricklon commited on
Commit
a09a317
·
1 Parent(s): e8612b6

Prefer Paint workspace for live drawing with ImageEditor fallback

Browse files
Files changed (1) hide show
  1. app.py +86 -16
app.py CHANGED
@@ -20,8 +20,10 @@ from collections import deque
20
  from io import StringIO, BytesIO
21
 
22
  HAS_IMAGE_EDITOR = hasattr(gr, "ImageEditor")
 
23
  HAS_BRUSH = hasattr(gr, "Brush")
24
  HAS_ERASER = hasattr(gr, "Eraser")
 
25
 
26
  # Model options — swap MODEL_NAME to reduce VRAM usage on GPUs with <= 8GB
27
  #
@@ -891,7 +893,7 @@ def _extract_editor_background(editor_value):
891
 
892
  def _to_rgba_image(obj):
893
  if isinstance(obj, dict):
894
- for k in ("image", "layer", "composite", "background"):
895
  if k in obj:
896
  return _to_rgba_image(obj[k])
897
  return None
@@ -909,6 +911,32 @@ def _to_rgba_image(obj):
909
  return Image.fromarray(arr.astype(np.uint8), mode="RGBA")
910
  return None
911
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
  def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
913
  """Approximate patch location in base image using downscaled SSD search."""
914
  if base_image is None or patch_image is None:
@@ -1034,12 +1062,23 @@ def _editor_background_and_mask(editor_value):
1034
  if not isinstance(editor_value, dict):
1035
  return None, None
1036
  background = _to_rgba_image(editor_value.get("background"))
 
 
1037
  composite = _to_rgba_image(editor_value.get("composite"))
1038
  layers = editor_value.get("layers") or []
1039
  if background is None:
1040
  if composite is None:
1041
  return None, None
1042
  background = composite
 
 
 
 
 
 
 
 
 
1043
  if not isinstance(layers, list) or not layers:
1044
  return background, None
1045
 
@@ -1096,11 +1135,17 @@ def _extract_new_drawn_regions(editor_value, base_size=None, base_image=None, co
1096
  if background is None:
1097
  return [], consumed_mask
1098
 
1099
- # If there are no drawn layers, treat as explicit crop mode.
1100
- if not isinstance(layers, list) or not layers or mask is None:
 
 
 
1101
  regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
1102
  return regions, consumed_mask
1103
 
 
 
 
1104
  if consumed_mask is None or not isinstance(consumed_mask, np.ndarray) or consumed_mask.shape != mask.shape:
1105
  delta_mask = mask
1106
  else:
@@ -1349,13 +1394,14 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1349
  with gr.Column(scale=3):
1350
  workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
1351
  gr.Markdown("**Image Workspace (full page + region selection)**")
1352
- if HAS_IMAGE_EDITOR:
1353
  editor_kwargs = {}
1354
  if HAS_BRUSH:
1355
  try:
 
1356
  editor_kwargs["brush"] = gr.Brush(
1357
- colors=["#2563eb"],
1358
- default_color="#2563eb",
1359
  color_mode="fixed",
1360
  default_size=22,
1361
  )
@@ -1374,15 +1420,39 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1374
  editor_kwargs["eraser"] = gr.Eraser(default_size=26)
1375
  except TypeError:
1376
  editor_kwargs["eraser"] = gr.Eraser()
1377
- region_editor = gr.ImageEditor(
1378
- label="Image Workspace",
1379
- show_label=False,
1380
- type="pil",
1381
- height=640,
1382
- **editor_kwargs,
1383
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1384
  else:
1385
- gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
1386
  region_editor = gr.State(None)
1387
 
1388
  with gr.Column(scale=1):
@@ -1433,7 +1503,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1433
  "examples/2022-0922 Section 14 Notes.png",
1434
  "examples/2022-0922 Section 15 Notes.png",
1435
  ]
1436
- if HAS_IMAGE_EDITOR and region_editor is not None:
1437
  gr.Examples(
1438
  label="Image Examples (click thumbnail to load into workspace)",
1439
  examples=image_examples,
@@ -1504,7 +1574,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1504
  task.change(toggle_prompt, [task], [prompt])
1505
  task.change(select_boxes, [task], [tabs])
1506
  input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
1507
- if HAS_IMAGE_EDITOR and region_editor is not None:
1508
  file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1509
  page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1510
  region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
 
20
  from io import StringIO, BytesIO
21
 
22
  HAS_IMAGE_EDITOR = hasattr(gr, "ImageEditor")
23
+ HAS_PAINT = hasattr(gr, "Paint")
24
  HAS_BRUSH = hasattr(gr, "Brush")
25
  HAS_ERASER = hasattr(gr, "Eraser")
26
+ HAS_REGION_WORKSPACE = HAS_PAINT or HAS_IMAGE_EDITOR
27
 
28
  # Model options — swap MODEL_NAME to reduce VRAM usage on GPUs with <= 8GB
29
  #
 
893
 
894
  def _to_rgba_image(obj):
895
  if isinstance(obj, dict):
896
+ for k in ("image", "layer", "composite", "background", "mask"):
897
  if k in obj:
898
  return _to_rgba_image(obj[k])
899
  return None
 
911
  return Image.fromarray(arr.astype(np.uint8), mode="RGBA")
912
  return None
913
 
914
+ def _to_mask_array(obj):
915
+ if obj is None:
916
+ return None
917
+ if isinstance(obj, dict):
918
+ for k in ("mask", "image", "layer", "composite", "background"):
919
+ if k in obj:
920
+ arr = _to_mask_array(obj[k])
921
+ if arr is not None:
922
+ return arr
923
+ return None
924
+
925
+ if isinstance(obj, Image.Image):
926
+ arr = np.asarray(obj)
927
+ elif isinstance(obj, np.ndarray):
928
+ arr = obj
929
+ else:
930
+ return None
931
+
932
+ if arr.ndim == 2:
933
+ return arr > 0
934
+ if arr.ndim == 3:
935
+ if arr.shape[2] >= 4:
936
+ return arr[:, :, 3] > 0
937
+ return np.max(arr[:, :, :3], axis=2) > 0
938
+ return None
939
+
940
  def _locate_patch_bbox(base_image: Image.Image, patch_image: Image.Image):
941
  """Approximate patch location in base image using downscaled SSD search."""
942
  if base_image is None or patch_image is None:
 
1062
  if not isinstance(editor_value, dict):
1063
  return None, None
1064
  background = _to_rgba_image(editor_value.get("background"))
1065
+ if background is None:
1066
+ background = _to_rgba_image(editor_value.get("image"))
1067
  composite = _to_rgba_image(editor_value.get("composite"))
1068
  layers = editor_value.get("layers") or []
1069
  if background is None:
1070
  if composite is None:
1071
  return None, None
1072
  background = composite
1073
+
1074
+ mask = _to_mask_array(editor_value.get("mask"))
1075
+ if mask is not None:
1076
+ if mask.shape[:2] != (background.height, background.width):
1077
+ mask_img = Image.fromarray(mask.astype(np.uint8) * 255, mode="L")
1078
+ nearest = Image.Resampling.NEAREST if hasattr(Image, "Resampling") else Image.NEAREST
1079
+ mask = np.asarray(mask_img.resize((background.width, background.height), nearest)) > 0
1080
+ return background, mask
1081
+
1082
  if not isinstance(layers, list) or not layers:
1083
  return background, None
1084
 
 
1135
  if background is None:
1136
  return [], consumed_mask
1137
 
1138
+ has_layer_data = isinstance(layers, list) and len(layers) > 0
1139
+ has_draw_data = (mask is not None) or has_layer_data
1140
+
1141
+ # If there are no draw layers/mask, treat as explicit crop mode.
1142
+ if not has_draw_data:
1143
  regions = _extract_selected_regions(editor_value, base_size=base_size, base_image=base_image)
1144
  return regions, consumed_mask
1145
 
1146
+ if mask is None:
1147
+ return [], consumed_mask
1148
+
1149
  if consumed_mask is None or not isinstance(consumed_mask, np.ndarray) or consumed_mask.shape != mask.shape:
1150
  delta_mask = mask
1151
  else:
 
1394
  with gr.Column(scale=3):
1395
  workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
1396
  gr.Markdown("**Image Workspace (full page + region selection)**")
1397
+ if HAS_REGION_WORKSPACE:
1398
  editor_kwargs = {}
1399
  if HAS_BRUSH:
1400
  try:
1401
+ highlight = ("#2563eb", 0.35)
1402
  editor_kwargs["brush"] = gr.Brush(
1403
+ colors=[highlight],
1404
+ default_color=highlight,
1405
  color_mode="fixed",
1406
  default_size=22,
1407
  )
 
1420
  editor_kwargs["eraser"] = gr.Eraser(default_size=26)
1421
  except TypeError:
1422
  editor_kwargs["eraser"] = gr.Eraser()
1423
+ if HAS_PAINT:
1424
+ try:
1425
+ region_editor = gr.Paint(
1426
+ label="Image Workspace",
1427
+ show_label=False,
1428
+ type="pil",
1429
+ height=640,
1430
+ **editor_kwargs,
1431
+ )
1432
+ except TypeError:
1433
+ try:
1434
+ region_editor = gr.Paint(
1435
+ label="Image Workspace",
1436
+ show_label=False,
1437
+ height=640,
1438
+ **editor_kwargs,
1439
+ )
1440
+ except TypeError:
1441
+ region_editor = gr.Paint(
1442
+ label="Image Workspace",
1443
+ show_label=False,
1444
+ height=640,
1445
+ )
1446
+ else:
1447
+ region_editor = gr.ImageEditor(
1448
+ label="Image Workspace",
1449
+ show_label=False,
1450
+ type="pil",
1451
+ height=640,
1452
+ **editor_kwargs,
1453
+ )
1454
  else:
1455
+ gr.Markdown("Region drawing requires a newer Gradio version with `Paint` or `ImageEditor` support.")
1456
  region_editor = gr.State(None)
1457
 
1458
  with gr.Column(scale=1):
 
1503
  "examples/2022-0922 Section 14 Notes.png",
1504
  "examples/2022-0922 Section 15 Notes.png",
1505
  ]
1506
+ if HAS_REGION_WORKSPACE and region_editor is not None:
1507
  gr.Examples(
1508
  label="Image Examples (click thumbnail to load into workspace)",
1509
  examples=image_examples,
 
1574
  task.change(toggle_prompt, [task], [prompt])
1575
  task.change(select_boxes, [task], [tabs])
1576
  input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
1577
+ if HAS_REGION_WORKSPACE and region_editor is not None:
1578
  file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1579
  page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1580
  region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])