Spaces:
Sleeping
Sleeping
Yaz Hobooti
commited on
Commit
·
535aaab
1
Parent(s):
9878852
Fix barcode detection: reduce duplicates, restore 115mm bottom filter, remove rotations
Browse files
app.py
CHANGED
|
@@ -1048,10 +1048,13 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
|
|
| 1048 |
def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
|
| 1049 |
seen=set(); out=[]
|
| 1050 |
for r in hits:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1051 |
# Round coords to reduce jitter then dedupe
|
| 1052 |
-
key=(r.get("type",""),
|
| 1053 |
-
int(round(r.get("left",0)/
|
| 1054 |
-
int(round(r.get("width",0)/3)*3), int(round(r.get("height",0)/3)*3))
|
| 1055 |
if key in seen:
|
| 1056 |
continue
|
| 1057 |
seen.add(key)
|
|
@@ -1059,21 +1062,20 @@ def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
|
|
| 1059 |
return out
|
| 1060 |
|
| 1061 |
def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
|
| 1062 |
-
|
| 1063 |
-
variants
|
| 1064 |
w,h = pil.size
|
| 1065 |
if max(w,h) < 1600:
|
| 1066 |
up = pil.resize((w*2,h*2), resample=Image.NEAREST)
|
| 1067 |
-
variants += [up
|
|
|
|
| 1068 |
hits=[]
|
| 1069 |
for v in variants:
|
| 1070 |
-
# try
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
hits += _decode_dmtx(vv)
|
| 1076 |
-
hits += _decode_cv2_qr(vv)
|
| 1077 |
return _dedupe_hits(hits)
|
| 1078 |
|
| 1079 |
def _pix_to_pil(pix) -> Image.Image:
|
|
@@ -1460,6 +1462,12 @@ def find_barcodes_in_pdf(pdf_path: str, image_size: Optional[Tuple[int,int]]=Non
|
|
| 1460 |
epix = fitz.Pixmap(doc, xref)
|
| 1461 |
epil = _pix_to_pil(epix)
|
| 1462 |
for r in _decode_variants(epil):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1463 |
b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
|
| 1464 |
boxes.append(b)
|
| 1465 |
infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"embed:{ix+1}"})
|
|
@@ -1467,6 +1475,12 @@ def find_barcodes_in_pdf(pdf_path: str, image_size: Optional[Tuple[int,int]]=Non
|
|
| 1467 |
pass
|
| 1468 |
# 2) page raster
|
| 1469 |
for r in _decode_variants(pil):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1470 |
b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
|
| 1471 |
boxes.append(b)
|
| 1472 |
infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"page@scale{scale:.2f}"})
|
|
|
|
| 1048 |
def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
|
| 1049 |
seen=set(); out=[]
|
| 1050 |
for r in hits:
|
| 1051 |
+
# More aggressive deduplication based on content and approximate location
|
| 1052 |
+
data = r.get("data", "").strip()
|
| 1053 |
+
if not data: # Skip empty detections
|
| 1054 |
+
continue
|
| 1055 |
# Round coords to reduce jitter then dedupe
|
| 1056 |
+
key=(r.get("type",""), data,
|
| 1057 |
+
int(round(r.get("left",0)/10)*10), int(round(r.get("top",0)/10)*10))
|
|
|
|
| 1058 |
if key in seen:
|
| 1059 |
continue
|
| 1060 |
seen.add(key)
|
|
|
|
| 1062 |
return out
|
| 1063 |
|
| 1064 |
def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
|
| 1065 |
+
# Start with original image only to avoid false positives
|
| 1066 |
+
variants=[pil]
|
| 1067 |
w,h = pil.size
|
| 1068 |
if max(w,h) < 1600:
|
| 1069 |
up = pil.resize((w*2,h*2), resample=Image.NEAREST)
|
| 1070 |
+
variants += [up]
|
| 1071 |
+
|
| 1072 |
hits=[]
|
| 1073 |
for v in variants:
|
| 1074 |
+
# Only try original orientation to avoid coordinate mapping issues
|
| 1075 |
+
hits += _decode_zxing_all(v)
|
| 1076 |
+
hits += _decode_zbar(v)
|
| 1077 |
+
hits += _decode_dmtx(v)
|
| 1078 |
+
hits += _decode_cv2_qr(v)
|
|
|
|
|
|
|
| 1079 |
return _dedupe_hits(hits)
|
| 1080 |
|
| 1081 |
def _pix_to_pil(pix) -> Image.Image:
|
|
|
|
| 1462 |
epix = fitz.Pixmap(doc, xref)
|
| 1463 |
epil = _pix_to_pil(epix)
|
| 1464 |
for r in _decode_variants(epil):
|
| 1465 |
+
# Check if barcode is in the excluded bottom 115mm area
|
| 1466 |
+
per_page_box = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
|
| 1467 |
+
effective_dpi = int(round(72.0 * scale))
|
| 1468 |
+
if _is_in_excluded_bottom_area(per_page_box, pil.size[1], excluded_height_mm=115.0, dpi=effective_dpi):
|
| 1469 |
+
continue
|
| 1470 |
+
|
| 1471 |
b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
|
| 1472 |
boxes.append(b)
|
| 1473 |
infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"embed:{ix+1}"})
|
|
|
|
| 1475 |
pass
|
| 1476 |
# 2) page raster
|
| 1477 |
for r in _decode_variants(pil):
|
| 1478 |
+
# Check if barcode is in the excluded bottom 115mm area
|
| 1479 |
+
per_page_box = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
|
| 1480 |
+
effective_dpi = int(round(72.0 * scale))
|
| 1481 |
+
if _is_in_excluded_bottom_area(per_page_box, pil.size[1], excluded_height_mm=115.0, dpi=effective_dpi):
|
| 1482 |
+
continue
|
| 1483 |
+
|
| 1484 |
b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
|
| 1485 |
boxes.append(b)
|
| 1486 |
infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"page@scale{scale:.2f}"})
|