Yaz Hobooti commited on
Commit
9878852
·
1 Parent(s): 257098e

Implement ChatGPT barcode fixes: unified backends, better ZXing, accumulator approach, no bottom filter

Browse files
Files changed (1) hide show
  1. app.py +172 -123
app.py CHANGED
@@ -892,6 +892,12 @@ try:
892
  import cv2; HAS_CV2=True
893
  except Exception: HAS_CV2=False
894
 
 
 
 
 
 
 
895
  # your Box(y1,x1,y2,x2,area) assumed to exist
896
 
897
  def _binarize(img: Image.Image) -> Image.Image:
@@ -934,58 +940,83 @@ def parse_gs1(text: str) -> Optional[dict]:
934
  out[ai] = val
935
  return out or None
936
 
937
- def _decode_zxing(pil: Image.Image) -> List[Dict[str,Any]]:
938
- if not HAS_ZXING:
939
- print("ZXing not available")
940
  return []
941
  arr = np.asarray(pil.convert("L"))
942
- out=[]
943
- hints = _zxing_hints_all()
 
 
944
  try:
945
- res = zxingcpp.read_barcodes(arr, hints=hints) if hints is not None else zxingcpp.read_barcodes(arr)
946
- print(f"ZXing found {len(res) if res else 0} barcodes")
947
- except Exception as e:
948
- print(f"ZXing error: {e}")
949
- res = []
950
-
951
- for r in res or []:
952
- x1=y1=x2=y2=w=h=0
953
- pos = getattr(r, "position", None)
954
- pts: List[Any] = []
955
- if pos is not None:
956
- try:
957
- pts = list(pos)
958
- except TypeError:
959
- corner_names = (
960
- "top_left","topLeft","top_right","topRight",
961
- "bottom_left","bottomLeft","bottom_right","bottomRight",
962
- "point1","point2","point3","point4",
963
- )
964
- seen=set()
965
- for name in corner_names:
966
- if hasattr(pos, name):
967
- p = getattr(pos, name)
968
- if id(p) not in seen and hasattr(p,"x") and hasattr(p,"y"):
969
- pts.append(p); seen.add(id(p))
970
- if pts:
971
- xs=[int(getattr(p,"x",0)) for p in pts]
972
- ys=[int(getattr(p,"y",0)) for p in pts]
973
- x1,x2=min(xs),max(xs); y1,y2=min(ys),max(ys); w,h=x2-x1,y2-y1
974
-
975
- fmt = getattr(getattr(r,"format",None),"name", None) or str(getattr(r,"format",""))
976
- out.append({
977
- "type": fmt,
978
- "data": r.text or "",
979
- "left": x1, "top": y1, "width": w, "height": h,
980
- })
981
- return out
 
 
 
 
 
 
 
 
 
982
 
983
  def _decode_zbar(pil: Image.Image) -> List[Dict[str,Any]]:
984
- if not HAS_ZBAR: return []
985
- syms=[ZBarSymbol.QRCODE,ZBarSymbol.EAN13,ZBarSymbol.EAN8,ZBarSymbol.UPCA,ZBarSymbol.CODE128] if ZBarSymbol else None
986
- res=zbar_decode(pil, symbols=syms) if syms else zbar_decode(pil)
987
- return [{"type": d.type, "data": (d.data.decode("utf-8","ignore") if isinstance(d.data,(bytes,bytearray)) else str(d.data)),
988
- "left": d.rect.left, "top": d.rect.top, "width": d.rect.width, "height": d.rect.height} for d in res]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
989
 
990
  def _decode_dmtx(pil: Image.Image) -> List[Dict[str,Any]]:
991
  if not HAS_DMTX: return []
@@ -1014,70 +1045,36 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
1014
  pass
1015
  return []
1016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1017
  def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
1018
- """
1019
- Multi-variant decode with coord-safe upscales and a tiling fallback.
1020
- We rely on ZXing's internal rotation search via hints (so no manual rotate).
1021
- """
1022
- def _decode_and_rescale(img: Image.Image, scale: float) -> List[Dict[str,Any]]:
1023
- res = _decode_zxing(img) or _decode_zbar(img) or _decode_dmtx(img) or _decode_cv2_qr(img)
1024
- if not res: return []
1025
- if scale != 1.0:
1026
- for r in res:
1027
- r["left"] = int(round(r.get("left", 0) / scale))
1028
- r["top"] = int(round(r.get("top", 0) / scale))
1029
- r["width"] = int(round(r.get("width", 0) / scale))
1030
- r["height"] = int(round(r.get("height",0) / scale))
1031
- return res
1032
-
1033
- print(f"Starting _decode_variants on image {pil.size}")
1034
-
1035
- # 1) Whole-page variants
1036
- W,H = pil.size
1037
- variants = [
1038
- (pil, 1.0),
1039
- (ImageOps.grayscale(pil).convert("RGB"), 1.0),
1040
- (_binarize(pil).convert("RGB"), 1.0),
1041
- ]
1042
- if max(W,H) < 1800:
1043
- up2 = pil.resize((W*2, H*2), resample=Image.NEAREST)
1044
- variants += [
1045
- (up2, 2.0),
1046
- (_binarize(up2).convert("RGB"), 2.0),
1047
- ]
1048
- for vimg, sc in variants:
1049
- res = _decode_and_rescale(vimg, sc)
1050
- if res:
1051
- print(f"Found {len(res)} barcodes in whole-page variant at scale {sc}")
1052
- return res
1053
-
1054
- # 2) Tiled fallback (helps tiny or stacked GS1)
1055
- # Overlapping 3x3 grid
1056
- grid = 3
1057
- step_x = W // grid
1058
- step_y = H // grid
1059
- ovx, ovy = step_x // 6, step_y // 6
1060
- hits: List[Dict[str,Any]] = []
1061
- for iy in range(grid):
1062
- for ix in range(grid):
1063
- x0 = max(ix*step_x - ovx, 0)
1064
- y0 = max(iy*step_y - ovy, 0)
1065
- x1 = min((ix+1)*step_x + ovx, W)
1066
- y1 = min((iy+1)*step_y + ovy, H)
1067
- tile = pil.crop((x0,y0,x1,y1))
1068
- # light variants per tile
1069
- for vimg, sc in [(tile,1.0), (_binarize(tile).convert("RGB"),1.0)]:
1070
- res = _decode_and_rescale(vimg, sc)
1071
- for r in res:
1072
- r["left"] += x0
1073
- r["top"] += y0
1074
- # width/height already scaled
1075
- hits.append(r)
1076
- if hits:
1077
- print(f"Found {len(hits)} barcodes in tiled fallback")
1078
- return hits
1079
- print("No barcodes found in any variant")
1080
- return []
1081
 
1082
  def _pix_to_pil(pix) -> Image.Image:
1083
  # convert PyMuPDF Pixmap to grayscale PIL without alpha (avoids blur)
@@ -1213,19 +1210,23 @@ def compare_pdfs(file_a, file_b):
1213
  # Debug: Print spell check results
1214
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1215
 
1216
- # Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
1217
- try:
1218
- print(f"Starting barcode detection for file A: {file_a.name}")
1219
- bar_a, info_a = find_barcode_boxes_and_info_from_pdf(file_a.name, image_size=image_size)
1220
- print(f"Barcode detection A complete: {len(bar_a)} boxes, {len(info_a)} infos")
1221
-
1222
- print(f"Starting barcode detection for file B: {file_b.name}")
1223
- bar_b, info_b = find_barcode_boxes_and_info_from_pdf(file_b.name, image_size=image_size)
1224
- print(f"Barcode detection B complete: {len(bar_b)} boxes, {len(info_b)} infos")
1225
- except Exception as e:
1226
- print(f"Barcode detection error: {e}")
1227
- import traceback
1228
- traceback.print_exc()
 
 
 
 
1229
  bar_a, info_a = [], []
1230
  bar_b, info_b = [], []
1231
 
@@ -1435,6 +1436,54 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
1435
  print(f"\nDebug images saved to: {outdir}/")
1436
  print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
1437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1438
  def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
1439
  """Detect barcodes from the original PDF and return boxes in the same
1440
  coordinate space as the combined display image.
 
892
  import cv2; HAS_CV2=True
893
  except Exception: HAS_CV2=False
894
 
895
+ # Consider barcode capability present if ANY backend is available
896
+ HAS_ANY_BARCODE = any([locals().get("HAS_ZXING", False),
897
+ locals().get("HAS_ZBAR", False),
898
+ locals().get("HAS_DMTX", False),
899
+ locals().get("HAS_CV2", False)])
900
+
901
  # your Box(y1,x1,y2,x2,area) assumed to exist
902
 
903
  def _binarize(img: Image.Image) -> Image.Image:
 
940
  out[ai] = val
941
  return out or None
942
 
943
+ def _decode_zxing_all(pil: Image.Image) -> List[Dict[str, Any]]:
944
+ if not HAS_ZXING:
 
945
  return []
946
  arr = np.asarray(pil.convert("L"))
947
+ # Try to use ReaderOptions if available (newer zxing-cpp)
948
+ ReaderOptions = getattr(zxingcpp, "ReaderOptions", None)
949
+ BarcodeFormat = getattr(zxingcpp, "BarcodeFormat", None)
950
+ results = []
951
  try:
952
+ if ReaderOptions and BarcodeFormat:
953
+ opts = ReaderOptions()
954
+ # Enable wide coverage including GS1/stacked-capable formats
955
+ opts.formats = (
956
+ BarcodeFormat.QR_CODE | getattr(BarcodeFormat, "MICRO_QR", 0) |
957
+ BarcodeFormat.DATA_MATRIX |
958
+ BarcodeFormat.PDF417 | # stacked rows
959
+ BarcodeFormat.AZTEC |
960
+ BarcodeFormat.MAXICODE |
961
+ BarcodeFormat.EAN_13 | BarcodeFormat.EAN_8 | BarcodeFormat.UPC_A | getattr(BarcodeFormat, "UPC_E", 0) |
962
+ BarcodeFormat.CODE_128 | BarcodeFormat.CODE_39 | getattr(BarcodeFormat, "CODE_93", 0) |
963
+ BarcodeFormat.ITF | BarcodeFormat.CODABAR |
964
+ getattr(BarcodeFormat, "RSS_14", 0) | getattr(BarcodeFormat, "RSS_EXPANDED", 0) # GS1 DataBar
965
+ )
966
+ opts.try_harder = True
967
+ opts.try_rotate = True
968
+ # read_barcodes accepts numpy array + options
969
+ zx = zxingcpp.read_barcodes(arr, opts)
970
+ else:
971
+ # Older binding: falls back to default behavior
972
+ zx = zxingcpp.read_barcodes(arr)
973
+ for r in zx or []:
974
+ x1=y1=w=h=0
975
+ pos = getattr(r, "position", None)
976
+ pts=[]
977
+ if pos is not None:
978
+ try:
979
+ pts=list(pos)
980
+ except TypeError:
981
+ for name in ("top_left","topLeft","top_right","topRight","bottom_left","bottomLeft","bottom_right","bottomRight",
982
+ "point1","point2","point3","point4"):
983
+ if hasattr(pos, name):
984
+ p=getattr(pos,name)
985
+ if hasattr(p,"x") and hasattr(p,"y"):
986
+ pts.append(p)
987
+ if pts:
988
+ xs=[int(getattr(p,"x",0)) for p in pts]; ys=[int(getattr(p,"y",0)) for p in pts]
989
+ x1, x2 = min(xs), max(xs); y1, y2 = min(ys), max(ys); w, h = x2-x1, y2-y1
990
+ results.append({
991
+ "type": str(getattr(r,"format", "")),
992
+ "data": getattr(r,"text","") or "",
993
+ "left": x1, "top": y1, "width": w, "height": h
994
+ })
995
+ except Exception:
996
+ return []
997
+ return results
998
 
999
  def _decode_zbar(pil: Image.Image) -> List[Dict[str,Any]]:
1000
+ if not HAS_ZBAR:
1001
+ return []
1002
+ try:
1003
+ # Add more 1D formats ZBar supports
1004
+ syms = []
1005
+ for nm in ("QRCODE","EAN13","EAN8","UPCA","UPCE","CODE128","CODE39","I25","CODABAR"):
1006
+ if hasattr(ZBarSymbol, nm):
1007
+ syms.append(getattr(ZBarSymbol, nm))
1008
+ res = zbar_decode(pil, symbols=syms) if syms else zbar_decode(pil)
1009
+ out=[]
1010
+ for d in res:
1011
+ data = d.data.decode("utf-8","ignore") if isinstance(d.data,(bytes,bytearray)) else str(d.data)
1012
+ out.append({
1013
+ "type": d.type, "data": data,
1014
+ "left": d.rect.left, "top": d.rect.top,
1015
+ "width": d.rect.width, "height": d.rect.height
1016
+ })
1017
+ return out
1018
+ except Exception:
1019
+ return []
1020
 
1021
  def _decode_dmtx(pil: Image.Image) -> List[Dict[str,Any]]:
1022
  if not HAS_DMTX: return []
 
1045
  pass
1046
  return []
1047
 
1048
+ def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
1049
+ seen=set(); out=[]
1050
+ for r in hits:
1051
+ # Round coords to reduce jitter then dedupe
1052
+ key=(r.get("type",""), r.get("data",""),
1053
+ int(round(r.get("left",0)/3)*3), int(round(r.get("top",0)/3)*3),
1054
+ int(round(r.get("width",0)/3)*3), int(round(r.get("height",0)/3)*3))
1055
+ if key in seen:
1056
+ continue
1057
+ seen.add(key)
1058
+ out.append(r)
1059
+ return out
1060
+
1061
  def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
1062
+ variants=[pil, ImageOps.grayscale(pil)]
1063
+ variants.append(_binarize(pil))
1064
+ w,h = pil.size
1065
+ if max(w,h) < 1600:
1066
+ up = pil.resize((w*2,h*2), resample=Image.NEAREST)
1067
+ variants += [up, _binarize(up)]
1068
+ hits=[]
1069
+ for v in variants:
1070
+ # try 0/90/180/270
1071
+ for angle in (0,90,180,270):
1072
+ vv = v if angle==0 else v.rotate(angle, expand=True)
1073
+ hits += _decode_zxing_all(vv)
1074
+ hits += _decode_zbar(vv)
1075
+ hits += _decode_dmtx(vv)
1076
+ hits += _decode_cv2_qr(vv)
1077
+ return _dedupe_hits(hits)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1078
 
1079
  def _pix_to_pil(pix) -> Image.Image:
1080
  # convert PyMuPDF Pixmap to grayscale PIL without alpha (avoids blur)
 
1210
  # Debug: Print spell check results
1211
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1212
 
1213
+ if HAS_ANY_BARCODE:
1214
+ try:
1215
+ print(f"Starting barcode detection for file A: {file_a.name}")
1216
+ bar_a, info_a = find_barcodes_in_pdf(file_a.name, image_size=image_size) if HAS_PYMUPDF else find_barcodes_in_image(a)
1217
+ print(f"Barcode detection A complete: {len(bar_a)} boxes, {len(info_a)} infos")
1218
+
1219
+ print(f"Starting barcode detection for file B: {file_b.name}")
1220
+ bar_b, info_b = find_barcodes_in_pdf(file_b.name, image_size=image_size) if HAS_PYMUPDF else find_barcodes_in_image(b)
1221
+ print(f"Barcode detection B complete: {len(bar_b)} boxes, {len(info_b)} infos")
1222
+ except Exception as e:
1223
+ print(f"Barcode detection error: {e}")
1224
+ import traceback
1225
+ traceback.print_exc()
1226
+ bar_a, info_a = [], []
1227
+ bar_b, info_b = [], []
1228
+ else:
1229
+ print("No barcode backends available")
1230
  bar_a, info_a = [], []
1231
  bar_b, info_b = [], []
1232
 
 
1436
  print(f"\nDebug images saved to: {outdir}/")
1437
  print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
1438
 
1439
+ def find_barcodes_in_pdf(pdf_path: str, image_size: Optional[Tuple[int,int]]=None, max_pages: int = 10):
1440
+ boxes: List[Box] = []; infos: List[Dict[str,Any]]=[]
1441
+ try:
1442
+ doc = fitz.open(pdf_path)
1443
+ n = min(len(doc), max_pages)
1444
+ y_offset = 0
1445
+ target_width = int(image_size[0]) if image_size else None
1446
+ for page_idx in range(n):
1447
+ page = doc[page_idx]
1448
+ if target_width:
1449
+ scale = max(1.0, float(target_width)/float(page.rect.width))
1450
+ else:
1451
+ scale = 600.0/72.0
1452
+ try:
1453
+ pix = page.get_pixmap(matrix=fitz.Matrix(scale,scale), colorspace=fitz.csGRAY, alpha=False)
1454
+ except TypeError:
1455
+ pix = page.get_pixmap(matrix=fitz.Matrix(scale,scale), alpha=False)
1456
+ pil = _pix_to_pil(pix)
1457
+ # 1) embedded XObjects (often crisp)
1458
+ for ix,(xref,*_) in enumerate(page.get_images(full=True)):
1459
+ try:
1460
+ epix = fitz.Pixmap(doc, xref)
1461
+ epil = _pix_to_pil(epix)
1462
+ for r in _decode_variants(epil):
1463
+ b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1464
+ boxes.append(b)
1465
+ infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"embed:{ix+1}"})
1466
+ except Exception:
1467
+ pass
1468
+ # 2) page raster
1469
+ for r in _decode_variants(pil):
1470
+ b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1471
+ boxes.append(b)
1472
+ infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"page@scale{scale:.2f}"})
1473
+ y_offset += pil.size[1]
1474
+ doc.close()
1475
+ except Exception:
1476
+ return [], []
1477
+ return boxes, infos
1478
+
1479
+ def find_barcodes_in_image(pil: Image.Image):
1480
+ boxes: List[Box] = []; infos: List[Dict[str,Any]]=[]
1481
+ for r in _decode_variants(pil):
1482
+ b = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1483
+ boxes.append(b)
1484
+ infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": 1, "source": "image"})
1485
+ return boxes, infos
1486
+
1487
  def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
1488
  """Detect barcodes from the original PDF and return boxes in the same
1489
  coordinate space as the combined display image.