Yaz Hobooti commited on
Commit
535aaab
·
1 Parent(s): 9878852

Fix barcode detection: reduce duplicates, restore 115mm bottom filter, remove rotations

Browse files
Files changed (1) hide show
  1. app.py +27 -13
app.py CHANGED
@@ -1048,10 +1048,13 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
1048
  def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
1049
  seen=set(); out=[]
1050
  for r in hits:
 
 
 
 
1051
  # Round coords to reduce jitter then dedupe
1052
- key=(r.get("type",""), r.get("data",""),
1053
- int(round(r.get("left",0)/3)*3), int(round(r.get("top",0)/3)*3),
1054
- int(round(r.get("width",0)/3)*3), int(round(r.get("height",0)/3)*3))
1055
  if key in seen:
1056
  continue
1057
  seen.add(key)
@@ -1059,21 +1062,20 @@ def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
1059
  return out
1060
 
1061
  def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
1062
- variants=[pil, ImageOps.grayscale(pil)]
1063
- variants.append(_binarize(pil))
1064
  w,h = pil.size
1065
  if max(w,h) < 1600:
1066
  up = pil.resize((w*2,h*2), resample=Image.NEAREST)
1067
- variants += [up, _binarize(up)]
 
1068
  hits=[]
1069
  for v in variants:
1070
- # try 0/90/180/270
1071
- for angle in (0,90,180,270):
1072
- vv = v if angle==0 else v.rotate(angle, expand=True)
1073
- hits += _decode_zxing_all(vv)
1074
- hits += _decode_zbar(vv)
1075
- hits += _decode_dmtx(vv)
1076
- hits += _decode_cv2_qr(vv)
1077
  return _dedupe_hits(hits)
1078
 
1079
  def _pix_to_pil(pix) -> Image.Image:
@@ -1460,6 +1462,12 @@ def find_barcodes_in_pdf(pdf_path: str, image_size: Optional[Tuple[int,int]]=Non
1460
  epix = fitz.Pixmap(doc, xref)
1461
  epil = _pix_to_pil(epix)
1462
  for r in _decode_variants(epil):
 
 
 
 
 
 
1463
  b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1464
  boxes.append(b)
1465
  infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"embed:{ix+1}"})
@@ -1467,6 +1475,12 @@ def find_barcodes_in_pdf(pdf_path: str, image_size: Optional[Tuple[int,int]]=Non
1467
  pass
1468
  # 2) page raster
1469
  for r in _decode_variants(pil):
 
 
 
 
 
 
1470
  b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1471
  boxes.append(b)
1472
  infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"page@scale{scale:.2f}"})
 
1048
  def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
1049
  seen=set(); out=[]
1050
  for r in hits:
1051
+ # More aggressive deduplication based on content and approximate location
1052
+ data = r.get("data", "").strip()
1053
+ if not data: # Skip empty detections
1054
+ continue
1055
  # Round coords to reduce jitter then dedupe
1056
+ key=(r.get("type",""), data,
1057
+ int(round(r.get("left",0)/10)*10), int(round(r.get("top",0)/10)*10))
 
1058
  if key in seen:
1059
  continue
1060
  seen.add(key)
 
1062
  return out
1063
 
1064
  def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
1065
+ # Start with original image only to avoid false positives
1066
+ variants=[pil]
1067
  w,h = pil.size
1068
  if max(w,h) < 1600:
1069
  up = pil.resize((w*2,h*2), resample=Image.NEAREST)
1070
+ variants += [up]
1071
+
1072
  hits=[]
1073
  for v in variants:
1074
+ # Only try original orientation to avoid coordinate mapping issues
1075
+ hits += _decode_zxing_all(v)
1076
+ hits += _decode_zbar(v)
1077
+ hits += _decode_dmtx(v)
1078
+ hits += _decode_cv2_qr(v)
 
 
1079
  return _dedupe_hits(hits)
1080
 
1081
  def _pix_to_pil(pix) -> Image.Image:
 
1462
  epix = fitz.Pixmap(doc, xref)
1463
  epil = _pix_to_pil(epix)
1464
  for r in _decode_variants(epil):
1465
+ # Check if barcode is in the excluded bottom 115mm area
1466
+ per_page_box = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1467
+ effective_dpi = int(round(72.0 * scale))
1468
+ if _is_in_excluded_bottom_area(per_page_box, pil.size[1], excluded_height_mm=115.0, dpi=effective_dpi):
1469
+ continue
1470
+
1471
  b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1472
  boxes.append(b)
1473
  infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"embed:{ix+1}"})
 
1475
  pass
1476
  # 2) page raster
1477
  for r in _decode_variants(pil):
1478
+ # Check if barcode is in the excluded bottom 115mm area
1479
+ per_page_box = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1480
+ effective_dpi = int(round(72.0 * scale))
1481
+ if _is_in_excluded_bottom_area(per_page_box, pil.size[1], excluded_height_mm=115.0, dpi=effective_dpi):
1482
+ continue
1483
+
1484
  b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1485
  boxes.append(b)
1486
  infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"page@scale{scale:.2f}"})