Spaces:
Running on Zero
Running on Zero
Populate full-page cropped gallery from detected refs
Browse files
app.py
CHANGED
|
@@ -242,6 +242,32 @@ def draw_bounding_boxes(image, refs, extract_images=False):
|
|
| 242 |
img_draw.paste(overlay, (0, 0), overlay)
|
| 243 |
return img_draw, crops
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
def clean_output(text, include_images=False):
|
| 246 |
if not text:
|
| 247 |
return ""
|
|
@@ -994,6 +1020,7 @@ def process_image(image, task, custom_prompt, enable_equation_zoom=True, infer_c
|
|
| 994 |
|
| 995 |
img_out = None
|
| 996 |
crops = []
|
|
|
|
| 997 |
result_for_layout = result
|
| 998 |
|
| 999 |
if has_grounding and '<|ref|>' in result:
|
|
@@ -1001,12 +1028,15 @@ def process_image(image, task, custom_prompt, enable_equation_zoom=True, infer_c
|
|
| 1001 |
if task == "📋 Markdown" and enable_equation_zoom:
|
| 1002 |
refs.extend(_refine_equation_refs(image, result))
|
| 1003 |
if refs:
|
| 1004 |
-
img_out,
|
|
|
|
| 1005 |
synthetic = [r[0] for r in refs if r[1] == "equation_detail"]
|
| 1006 |
if synthetic:
|
| 1007 |
result_for_layout = result + "\n" + "\n".join(synthetic)
|
| 1008 |
-
|
| 1009 |
-
markdown = embed_images(markdown,
|
|
|
|
|
|
|
| 1010 |
|
| 1011 |
return cleaned, markdown, result_for_layout, img_out, crops
|
| 1012 |
|
|
|
|
| 242 |
img_draw.paste(overlay, (0, 0), overlay)
|
| 243 |
return img_draw, crops
|
| 244 |
|
| 245 |
+
def _extract_labeled_crops_from_refs(image, refs, max_items=24):
|
| 246 |
+
img_w, img_h = image.size
|
| 247 |
+
items = []
|
| 248 |
+
seen = set()
|
| 249 |
+
|
| 250 |
+
for ref in refs:
|
| 251 |
+
label = str(ref[1])
|
| 252 |
+
coords = _parse_coord_payload(ref[2])
|
| 253 |
+
for box in coords:
|
| 254 |
+
x1 = int(box[0] / 999.0 * img_w)
|
| 255 |
+
y1 = int(box[1] / 999.0 * img_h)
|
| 256 |
+
x2 = int(box[2] / 999.0 * img_w)
|
| 257 |
+
y2 = int(box[3] / 999.0 * img_h)
|
| 258 |
+
if x2 - x1 < 8 or y2 - y1 < 8:
|
| 259 |
+
continue
|
| 260 |
+
key = (label.lower(), x1, y1, x2, y2)
|
| 261 |
+
if key in seen:
|
| 262 |
+
continue
|
| 263 |
+
seen.add(key)
|
| 264 |
+
crop = image.crop((x1, y1, x2, y2))
|
| 265 |
+
caption = f"{label} ({crop.width}x{crop.height})"
|
| 266 |
+
items.append((crop, caption))
|
| 267 |
+
if len(items) >= max_items:
|
| 268 |
+
return items
|
| 269 |
+
return items
|
| 270 |
+
|
| 271 |
def clean_output(text, include_images=False):
|
| 272 |
if not text:
|
| 273 |
return ""
|
|
|
|
| 1020 |
|
| 1021 |
img_out = None
|
| 1022 |
crops = []
|
| 1023 |
+
figure_crops = []
|
| 1024 |
result_for_layout = result
|
| 1025 |
|
| 1026 |
if has_grounding and '<|ref|>' in result:
|
|
|
|
| 1028 |
if task == "📋 Markdown" and enable_equation_zoom:
|
| 1029 |
refs.extend(_refine_equation_refs(image, result))
|
| 1030 |
if refs:
|
| 1031 |
+
img_out, figure_crops = draw_bounding_boxes(image, refs, True)
|
| 1032 |
+
crops = _extract_labeled_crops_from_refs(image, refs)
|
| 1033 |
synthetic = [r[0] for r in refs if r[1] == "equation_detail"]
|
| 1034 |
if synthetic:
|
| 1035 |
result_for_layout = result + "\n" + "\n".join(synthetic)
|
| 1036 |
+
|
| 1037 |
+
markdown = embed_images(markdown, figure_crops)
|
| 1038 |
+
if not crops and figure_crops:
|
| 1039 |
+
crops = _label_gallery_items(figure_crops, prefix="Figure")
|
| 1040 |
|
| 1041 |
return cleaned, markdown, result_for_layout, img_out, crops
|
| 1042 |
|