Spaces:

Digitaljoint
/

ProofCheck

Sleeping

App Files Files Community

Yaz Hobooti commited on Sep 27, 2025

Commit

9878852

1 Parent(s): 257098e

Implement ChatGPT barcode fixes: unified backends, better ZXing, accumulator approach, no bottom filter

Browse files

Files changed (1) hide show

app.py +172 -123

app.py CHANGED Viewed

@@ -892,6 +892,12 @@ try:
     import cv2; HAS_CV2=True
 except Exception: HAS_CV2=False
 # your Box(y1,x1,y2,x2,area) assumed to exist
 def _binarize(img: Image.Image) -> Image.Image:
@@ -934,58 +940,83 @@ def parse_gs1(text: str) -> Optional[dict]:
         out[ai] = val
     return out or None
-def _decode_zxing(pil: Image.Image) -> List[Dict[str,Any]]:
-    if not HAS_ZXING:
-        print("ZXing not available")
         return []
     arr = np.asarray(pil.convert("L"))
-    out=[]
-    hints = _zxing_hints_all()
     try:
-        res = zxingcpp.read_barcodes(arr, hints=hints) if hints is not None else zxingcpp.read_barcodes(arr)
-        print(f"ZXing found {len(res) if res else 0} barcodes")
-    except Exception as e:
-        print(f"ZXing error: {e}")
-        res = []
-    for r in res or []:
-        x1=y1=x2=y2=w=h=0
-        pos = getattr(r, "position", None)
-        pts: List[Any] = []
-        if pos is not None:
-            try:
-                pts = list(pos)
-            except TypeError:
-                corner_names = (
-                    "top_left","topLeft","top_right","topRight",
-                    "bottom_left","bottomLeft","bottom_right","bottomRight",
-                    "point1","point2","point3","point4",
-                )
-                seen=set()
-                for name in corner_names:
-                    if hasattr(pos, name):
-                        p = getattr(pos, name)
-                        if id(p) not in seen and hasattr(p,"x") and hasattr(p,"y"):
-                            pts.append(p); seen.add(id(p))
-        if pts:
-            xs=[int(getattr(p,"x",0)) for p in pts]
-            ys=[int(getattr(p,"y",0)) for p in pts]
-            x1,x2=min(xs),max(xs); y1,y2=min(ys),max(ys); w,h=x2-x1,y2-y1
-        fmt = getattr(getattr(r,"format",None),"name", None) or str(getattr(r,"format",""))
-        out.append({
-            "type": fmt,
-            "data": r.text or "",
-            "left": x1, "top": y1, "width": w, "height": h,
-        })
-    return out
 def _decode_zbar(pil: Image.Image) -> List[Dict[str,Any]]:
-    if not HAS_ZBAR: return []
-    syms=[ZBarSymbol.QRCODE,ZBarSymbol.EAN13,ZBarSymbol.EAN8,ZBarSymbol.UPCA,ZBarSymbol.CODE128] if ZBarSymbol else None
-    res=zbar_decode(pil, symbols=syms) if syms else zbar_decode(pil)
-    return [{"type": d.type, "data": (d.data.decode("utf-8","ignore") if isinstance(d.data,(bytes,bytearray)) else str(d.data)),
-             "left": d.rect.left, "top": d.rect.top, "width": d.rect.width, "height": d.rect.height} for d in res]
 def _decode_dmtx(pil: Image.Image) -> List[Dict[str,Any]]:
     if not HAS_DMTX: return []
@@ -1014,70 +1045,36 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
         pass
     return []
 def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
-    """
-    Multi-variant decode with coord-safe upscales and a tiling fallback.
-    We rely on ZXing's internal rotation search via hints (so no manual rotate).
-    """
-    def _decode_and_rescale(img: Image.Image, scale: float) -> List[Dict[str,Any]]:
-        res = _decode_zxing(img) or _decode_zbar(img) or _decode_dmtx(img) or _decode_cv2_qr(img)
-        if not res: return []
-        if scale != 1.0:
-            for r in res:
-                r["left"]   = int(round(r.get("left", 0) / scale))
-                r["top"]    = int(round(r.get("top",  0) / scale))
-                r["width"]  = int(round(r.get("width", 0) / scale))
-                r["height"] = int(round(r.get("height",0) / scale))
-        return res
-    print(f"Starting _decode_variants on image {pil.size}")
-    # 1) Whole-page variants
-    W,H = pil.size
-    variants = [
-        (pil, 1.0),
-        (ImageOps.grayscale(pil).convert("RGB"), 1.0),
-        (_binarize(pil).convert("RGB"), 1.0),
-    ]
-    if max(W,H) < 1800:
-        up2 = pil.resize((W*2, H*2), resample=Image.NEAREST)
-        variants += [
-            (up2, 2.0),
-            (_binarize(up2).convert("RGB"), 2.0),
-        ]
-    for vimg, sc in variants:
-        res = _decode_and_rescale(vimg, sc)
-        if res:
-            print(f"Found {len(res)} barcodes in whole-page variant at scale {sc}")
-            return res
-    # 2) Tiled fallback (helps tiny or stacked GS1)
-    # Overlapping 3x3 grid
-    grid = 3
-    step_x = W // grid
-    step_y = H // grid
-    ovx, ovy = step_x // 6, step_y // 6
-    hits: List[Dict[str,Any]] = []
-    for iy in range(grid):
-        for ix in range(grid):
-            x0 = max(ix*step_x - ovx, 0)
-            y0 = max(iy*step_y - ovy, 0)
-            x1 = min((ix+1)*step_x + ovx, W)
-            y1 = min((iy+1)*step_y + ovy, H)
-            tile = pil.crop((x0,y0,x1,y1))
-            # light variants per tile
-            for vimg, sc in [(tile,1.0), (_binarize(tile).convert("RGB"),1.0)]:
-                res = _decode_and_rescale(vimg, sc)
-                for r in res:
-                    r["left"]  += x0
-                    r["top"]   += y0
-                    # width/height already scaled
-                    hits.append(r)
-            if hits:
-                print(f"Found {len(hits)} barcodes in tiled fallback")
-                return hits
-    print("No barcodes found in any variant")
-    return []
 def _pix_to_pil(pix) -> Image.Image:
     # convert PyMuPDF Pixmap to grayscale PIL without alpha (avoids blur)
@@ -1213,19 +1210,23 @@ def compare_pdfs(file_a, file_b):
         # Debug: Print spell check results
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
-        # Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
-        try:
-            print(f"Starting barcode detection for file A: {file_a.name}")
-            bar_a, info_a = find_barcode_boxes_and_info_from_pdf(file_a.name, image_size=image_size)
-            print(f"Barcode detection A complete: {len(bar_a)} boxes, {len(info_a)} infos")
-            print(f"Starting barcode detection for file B: {file_b.name}")
-            bar_b, info_b = find_barcode_boxes_and_info_from_pdf(file_b.name, image_size=image_size)
-            print(f"Barcode detection B complete: {len(bar_b)} boxes, {len(info_b)} infos")
-        except Exception as e:
-            print(f"Barcode detection error: {e}")
-            import traceback
-            traceback.print_exc()
             bar_a, info_a = [], []
             bar_b, info_b = [], []
@@ -1435,6 +1436,54 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
     print(f"\nDebug images saved to: {outdir}/")
     print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
 def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
     """Detect barcodes from the original PDF and return boxes in the same
     coordinate space as the combined display image.

     import cv2; HAS_CV2=True
 except Exception: HAS_CV2=False
+# Consider barcode capability present if ANY backend is available
+HAS_ANY_BARCODE = any([locals().get("HAS_ZXING", False),
+                       locals().get("HAS_ZBAR", False),
+                       locals().get("HAS_DMTX", False),
+                       locals().get("HAS_CV2", False)])
 # your Box(y1,x1,y2,x2,area) assumed to exist
 def _binarize(img: Image.Image) -> Image.Image:
         out[ai] = val
     return out or None
+def _decode_zxing_all(pil: Image.Image) -> List[Dict[str, Any]]:
+    if not HAS_ZXING:
         return []
     arr = np.asarray(pil.convert("L"))
+    # Try to use ReaderOptions if available (newer zxing-cpp)
+    ReaderOptions = getattr(zxingcpp, "ReaderOptions", None)
+    BarcodeFormat = getattr(zxingcpp, "BarcodeFormat", None)
+    results = []
     try:
+        if ReaderOptions and BarcodeFormat:
+            opts = ReaderOptions()
+            # Enable wide coverage including GS1/stacked-capable formats
+            opts.formats = (
+                BarcodeFormat.QR_CODE | getattr(BarcodeFormat, "MICRO_QR", 0) |
+                BarcodeFormat.DATA_MATRIX |
+                BarcodeFormat.PDF417 |  # stacked rows
+                BarcodeFormat.AZTEC |
+                BarcodeFormat.MAXICODE |
+                BarcodeFormat.EAN_13 | BarcodeFormat.EAN_8 | BarcodeFormat.UPC_A | getattr(BarcodeFormat, "UPC_E", 0) |
+                BarcodeFormat.CODE_128 | BarcodeFormat.CODE_39 | getattr(BarcodeFormat, "CODE_93", 0) |
+                BarcodeFormat.ITF | BarcodeFormat.CODABAR |
+                getattr(BarcodeFormat, "RSS_14", 0) | getattr(BarcodeFormat, "RSS_EXPANDED", 0)  # GS1 DataBar
+            )
+            opts.try_harder = True
+            opts.try_rotate = True
+            # read_barcodes accepts numpy array + options
+            zx = zxingcpp.read_barcodes(arr, opts)
+        else:
+            # Older binding: falls back to default behavior
+            zx = zxingcpp.read_barcodes(arr)
+        for r in zx or []:
+            x1=y1=w=h=0
+            pos = getattr(r, "position", None)
+            pts=[]
+            if pos is not None:
+                try:
+                    pts=list(pos)
+                except TypeError:
+                    for name in ("top_left","topLeft","top_right","topRight","bottom_left","bottomLeft","bottom_right","bottomRight",
+                                 "point1","point2","point3","point4"):
+                        if hasattr(pos, name):
+                            p=getattr(pos,name)
+                            if hasattr(p,"x") and hasattr(p,"y"):
+                                pts.append(p)
+            if pts:
+                xs=[int(getattr(p,"x",0)) for p in pts]; ys=[int(getattr(p,"y",0)) for p in pts]
+                x1, x2 = min(xs), max(xs); y1, y2 = min(ys), max(ys); w, h = x2-x1, y2-y1
+            results.append({
+                "type": str(getattr(r,"format", "")),
+                "data": getattr(r,"text","") or "",
+                "left": x1, "top": y1, "width": w, "height": h
+            })
+    except Exception:
+        return []
+    return results
 def _decode_zbar(pil: Image.Image) -> List[Dict[str,Any]]:
+    if not HAS_ZBAR:
+        return []
+    try:
+        # Add more 1D formats ZBar supports
+        syms = []
+        for nm in ("QRCODE","EAN13","EAN8","UPCA","UPCE","CODE128","CODE39","I25","CODABAR"):
+            if hasattr(ZBarSymbol, nm):
+                syms.append(getattr(ZBarSymbol, nm))
+        res = zbar_decode(pil, symbols=syms) if syms else zbar_decode(pil)
+        out=[]
+        for d in res:
+            data = d.data.decode("utf-8","ignore") if isinstance(d.data,(bytes,bytearray)) else str(d.data)
+            out.append({
+                "type": d.type, "data": data,
+                "left": d.rect.left, "top": d.rect.top,
+                "width": d.rect.width, "height": d.rect.height
+            })
+        return out
+    except Exception:
+        return []
 def _decode_dmtx(pil: Image.Image) -> List[Dict[str,Any]]:
     if not HAS_DMTX: return []
         pass
     return []
+def _dedupe_hits(hits: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
+    seen=set(); out=[]
+    for r in hits:
+        # Round coords to reduce jitter then dedupe
+        key=(r.get("type",""), r.get("data",""),
+             int(round(r.get("left",0)/3)*3), int(round(r.get("top",0)/3)*3),
+             int(round(r.get("width",0)/3)*3), int(round(r.get("height",0)/3)*3))
+        if key in seen:
+            continue
+        seen.add(key)
+        out.append(r)
+    return out
 def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
+    variants=[pil, ImageOps.grayscale(pil)]
+    variants.append(_binarize(pil))
+    w,h = pil.size
+    if max(w,h) < 1600:
+        up = pil.resize((w*2,h*2), resample=Image.NEAREST)
+        variants += [up, _binarize(up)]
+    hits=[]
+    for v in variants:
+        # try 0/90/180/270
+        for angle in (0,90,180,270):
+            vv = v if angle==0 else v.rotate(angle, expand=True)
+            hits += _decode_zxing_all(vv)
+            hits += _decode_zbar(vv)
+            hits += _decode_dmtx(vv)
+            hits += _decode_cv2_qr(vv)
+    return _dedupe_hits(hits)
 def _pix_to_pil(pix) -> Image.Image:
     # convert PyMuPDF Pixmap to grayscale PIL without alpha (avoids blur)
         # Debug: Print spell check results
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
+        if HAS_ANY_BARCODE:
+            try:
+                print(f"Starting barcode detection for file A: {file_a.name}")
+                bar_a, info_a = find_barcodes_in_pdf(file_a.name, image_size=image_size) if HAS_PYMUPDF else find_barcodes_in_image(a)
+                print(f"Barcode detection A complete: {len(bar_a)} boxes, {len(info_a)} infos")
+                print(f"Starting barcode detection for file B: {file_b.name}")
+                bar_b, info_b = find_barcodes_in_pdf(file_b.name, image_size=image_size) if HAS_PYMUPDF else find_barcodes_in_image(b)
+                print(f"Barcode detection B complete: {len(bar_b)} boxes, {len(info_b)} infos")
+            except Exception as e:
+                print(f"Barcode detection error: {e}")
+                import traceback
+                traceback.print_exc()
+                bar_a, info_a = [], []
+                bar_b, info_b = [], []
+        else:
+            print("No barcode backends available")
             bar_a, info_a = [], []
             bar_b, info_b = [], []
     print(f"\nDebug images saved to: {outdir}/")
     print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
+def find_barcodes_in_pdf(pdf_path: str, image_size: Optional[Tuple[int,int]]=None, max_pages: int = 10):
+    boxes: List[Box] = []; infos: List[Dict[str,Any]]=[]
+    try:
+        doc = fitz.open(pdf_path)
+        n = min(len(doc), max_pages)
+        y_offset = 0
+        target_width = int(image_size[0]) if image_size else None
+        for page_idx in range(n):
+            page = doc[page_idx]
+            if target_width:
+                scale = max(1.0, float(target_width)/float(page.rect.width))
+            else:
+                scale = 600.0/72.0
+            try:
+                pix = page.get_pixmap(matrix=fitz.Matrix(scale,scale), colorspace=fitz.csGRAY, alpha=False)
+            except TypeError:
+                pix = page.get_pixmap(matrix=fitz.Matrix(scale,scale), alpha=False)
+            pil = _pix_to_pil(pix)
+            # 1) embedded XObjects (often crisp)
+            for ix,(xref,*_) in enumerate(page.get_images(full=True)):
+                try:
+                    epix = fitz.Pixmap(doc, xref)
+                    epil = _pix_to_pil(epix)
+                    for r in _decode_variants(epil):
+                        b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
+                        boxes.append(b)
+                        infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"embed:{ix+1}"})
+                except Exception:
+                    pass
+            # 2) page raster
+            for r in _decode_variants(pil):
+                b = Box(r["top"]+y_offset, r["left"], r["top"]+y_offset+r["height"], r["left"]+r["width"], r["width"]*r["height"])
+                boxes.append(b)
+                infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": page_idx+1, "source": f"page@scale{scale:.2f}"})
+            y_offset += pil.size[1]
+        doc.close()
+    except Exception:
+        return [], []
+    return boxes, infos
+def find_barcodes_in_image(pil: Image.Image):
+    boxes: List[Box] = []; infos: List[Dict[str,Any]]=[]
+    for r in _decode_variants(pil):
+        b = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
+        boxes.append(b)
+        infos.append({**r, "valid": _validate(r.get("type",""), r.get("data","")), "page": 1, "source": "image"})
+    return boxes, infos
 def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
     """Detect barcodes from the original PDF and return boxes in the same
     coordinate space as the combined display image.