Spaces:

Digitaljoint
/

ProofCheck

Sleeping

App Files Files Community

Yaz Hobooti commited on Sep 27, 2025

Commit

204147c

1 Parent(s): 3401128

Implement improved barcode detection: Use OpenCV contrib with PDF XObject extraction

Browse files

Files changed (3) hide show

app.py +59 -8
barcode_utils.py +169 -0
requirements.txt +1 -4

app.py CHANGED Viewed

@@ -52,10 +52,10 @@ except Exception:
     HAS_REGEX = False
 try:
-    from pyzbar.pyzbar import decode as zbar_decode
     HAS_BARCODE = True
 except Exception:
-    zbar_decode = None
     HAS_BARCODE = False
 # -------------------- Core Data --------------------
@@ -1106,12 +1106,63 @@ def compare_pdfs(file_a, file_b):
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
         if HAS_BARCODE:
-            # Use PDF-based barcode detection instead of rasterized image
-            bar_a, info_a = find_barcode_boxes_and_info_from_pdf(file_a.name, image_size=image_size) if HAS_PYMUPDF else find_barcode_boxes_and_info(a)
-            bar_b, info_b = find_barcode_boxes_and_info_from_pdf(file_b.name, image_size=image_size) if HAS_PYMUPDF else find_barcode_boxes_and_info(b)
-            # Debug: Print barcode detection results
-            print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
         else:
             bar_a, info_a = [], []
             bar_b, info_b = [], []

     HAS_REGEX = False
 try:
+    from barcode_utils import read_barcodes_from_path
     HAS_BARCODE = True
 except Exception:
+    read_barcodes_from_path = None
     HAS_BARCODE = False
 # -------------------- Core Data --------------------
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
         if HAS_BARCODE:
+            # Use new barcode detection from barcode_utils
+            try:
+                codes_a = read_barcodes_from_path(file_a.name, max_pages=5, raster_dpi=900)
+                codes_b = read_barcodes_from_path(file_b.name, max_pages=5, raster_dpi=900)
+                # Convert to old format for compatibility
+                bar_a, info_a = [], []
+                bar_b, info_b = []
+                for code in codes_a:
+                    if "error" not in code:
+                        # Create a simple box for visualization (center of polygon)
+                        if "polygon" in code:
+                            pts = np.array(code["polygon"])
+                            x1, y1 = pts.min(axis=0)
+                            x2, y2 = pts.max(axis=0)
+                            box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
+                            bar_a.append(box)
+                            info_a.append({
+                                "type": code.get("type", ""),
+                                "data": code.get("text", ""),
+                                "left": int(x1),
+                                "top": int(y1),
+                                "width": int(x2-x1),
+                                "height": int(y2-y1),
+                                "valid": True,
+                                "page": code.get("page", 0) + 1,
+                                "source": code.get("source", "")
+                            })
+                for code in codes_b:
+                    if "error" not in code:
+                        # Create a simple box for visualization (center of polygon)
+                        if "polygon" in code:
+                            pts = np.array(code["polygon"])
+                            x1, y1 = pts.min(axis=0)
+                            x2, y2 = pts.max(axis=0)
+                            box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
+                            bar_b.append(box)
+                            info_b.append({
+                                "type": code.get("type", ""),
+                                "data": code.get("text", ""),
+                                "left": int(x1),
+                                "top": int(y1),
+                                "width": int(x2-x1),
+                                "height": int(y2-y1),
+                                "valid": True,
+                                "page": code.get("page", 0) + 1,
+                                "source": code.get("source", "")
+                            })
+                # Debug: Print barcode detection results
+                print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
+            except Exception as e:
+                print(f"Barcode detection error: {e}")
+                bar_a, info_a = [], []
+                bar_b, info_b = [], []
         else:
             bar_a, info_a = [], []
             bar_b, info_b = [], []

barcode_utils.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import io
+import os
+from typing import List, Dict, Any, Tuple, Optional
+import cv2
+import numpy as np
+from PIL import Image
+# PDF support via PyMuPDF (preferred for extracting original image XObjects)
+try:
+    import fitz  # PyMuPDF
+    HAS_PYMUPDF = True
+except Exception:
+    fitz = None
+    HAS_PYMUPDF = False
+def _ensure_contrib():
+    if not hasattr(cv2, "barcode") or not hasattr(cv2.barcode, "BarcodeDetector"):
+        raise RuntimeError(
+            "OpenCV was built without the 'barcode' module. "
+            "Install 'opencv-contrib-python-headless' (not 'opencv-python-headless')."
+        )
+def _pil_to_bgr(pil: Image.Image) -> np.ndarray:
+    arr = np.array(pil.convert("RGB"))
+    return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
+def _decode_with_opencv(img_bgr: np.ndarray) -> List[Dict[str, Any]]:
+    _ensure_contrib()
+    det = cv2.barcode.BarcodeDetector()
+    # Try 4 orientations
+    results: List[Dict[str, Any]] = []
+    for k, rot in enumerate([0, 1, 2, 3]):  # 0, 90, 180, 270
+        if rot > 0:
+            img = np.ascontiguousarray(np.rot90(img_bgr, k=rot))
+        else:
+            img = img_bgr
+        # Optional light preproc to help 1D codes
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        gray = cv2.bilateralFilter(gray, d=5, sigmaColor=50, sigmaSpace=50)
+        ok, decoded_info, decoded_type, corners = det.detectAndDecode(gray)
+        if not ok:
+            continue
+        # corners: list of Nx4x2
+        for txt, typ, pts in zip(decoded_info, decoded_type, corners):
+            if not txt:
+                continue
+            pts = np.asarray(pts, dtype=np.float32)
+            # rotate points back to original orientation
+            if rot > 0:
+                h, w = img_bgr.shape[:2]
+                if rot == 1:   # 90
+                    pts = np.stack([h - pts[:,1], pts[:,0]], axis=1)
+                elif rot == 2: # 180
+                    pts = np.stack([w - pts[:,0], h - pts[:,1]], axis=1)
+                elif rot == 3: # 270
+                    pts = np.stack([pts[:,1], w - pts[:,0]], axis=1)
+            results.append({
+                "text": txt,
+                "type": typ,
+                "polygon": pts.tolist(),  # four points
+                "rotation_quarters": rot
+            })
+    return results
+def _extract_pdf_images_bgr(path: str, page_index: Optional[int] = None) -> List[Tuple[int, np.ndarray]]:
+    """
+    Returns list of (page_idx, img_bgr) extracted at native resolution from image XObjects.
+    """
+    if not HAS_PYMUPDF:
+        return []
+    out: List[Tuple[int, np.ndarray]] = []
+    doc = fitz.open(path)
+    pages = range(len(doc)) if page_index is None else [page_index]
+    for pno in pages:
+        page = doc[pno]
+        for imginfo in page.get_images(full=True):
+            xref = imginfo[0]
+            pix = fitz.Pixmap(doc, xref)
+            # Convert to RGB if needed
+            if pix.n >= 4:  # RGBA or CMYK+alpha
+                pix = fitz.Pixmap(fitz.csRGB, pix)
+            pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
+            out.append((pno, _pil_to_bgr(pil)))
+            pix = None
+    doc.close()
+    return out
+def _render_pdf_page_bgr(path: str, pno: int, dpi: int = 600) -> np.ndarray:
+    if not HAS_PYMUPDF:
+        raise RuntimeError("PyMuPDF not available to render PDF pages.")
+    doc = fitz.open(path)
+    if pno >= len(doc):
+        doc.close()
+        raise ValueError(f"Page {pno} out of range (PDF has {len(doc)} pages).")
+    page = doc[pno]
+    scale = dpi / 72.0
+    mat = fitz.Matrix(scale, scale)
+    pix = page.get_pixmap(matrix=mat, alpha=False)
+    pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
+    doc.close()
+    return _pil_to_bgr(pil)
+def read_barcodes_from_path(path: str, max_pages: int = 5, raster_dpi: int = 900) -> List[Dict[str, Any]]:
+    """
+    Unified entry point:
+    - For images: decode directly with OpenCV.
+    - For PDFs: try original image XObjects first (raw), then rasterize pages at high DPI as fallback.
+    Returns a list of dicts: {source, page, type, text, polygon}
+    """
+    ext = os.path.splitext(path.lower())[1]
+    results: List[Dict[str, Any]] = []
+    if ext == ".pdf":
+        # 1) Try native images embedded in the PDF
+        for pno, img in _extract_pdf_images_bgr(path):
+            hits = _decode_with_opencv(img)
+            for h in hits:
+                results.append({
+                    "source": "pdf_xobject_image",
+                    "page": pno,
+                    **h
+                })
+        if results:
+            return results
+        # 2) Fallback: rasterize a few pages crisply and decode
+        if not HAS_PYMUPDF:
+            raise RuntimeError("No PyMuPDF; cannot rasterize PDF pages. Add 'pymupdf' to requirements.")
+        doc = fitz.open(path)
+        for pno in range(min(len(doc), max_pages)):
+            page_img = _render_pdf_page_bgr(path, pno, dpi=raster_dpi)
+            hits = _decode_with_opencv(page_img)
+            for h in hits:
+                results.append({
+                    "source": "pdf_rasterized",
+                    "page": pno,
+                    **h
+                })
+        doc.close()
+        return results
+    else:
+        # Image path
+        pil = Image.open(path).convert("RGB")
+        img = _pil_to_bgr(pil)
+        hits = _decode_with_opencv(img)
+        for h in hits:
+            results.append({
+                "source": "image",
+                "page": 0,
+                **h
+            })
+        return results
+def draw_polys(bgr: np.ndarray, polys: list) -> np.ndarray:
+    """Draw polygons on the image for visualization"""
+    out = bgr.copy()
+    for p in polys:
+        if "polygon" in p:
+            pts = np.array(p["polygon"], dtype=np.int32).reshape(-1,1,2)
+            cv2.polylines(out, [pts], True, (0, 255, 0), 2)
+    return out

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-opencv-python-headless==4.10.0.84
 numpy
 pillow
 pdf2image
@@ -7,7 +7,4 @@ PyMuPDF>=1.24
 pytesseract
 pyspellchecker
 regex
-pyzbar
-zxing-cpp
-pylibdmtx
 scikit-image

+opencv-contrib-python-headless==4.10.0.84
 numpy
 pillow
 pdf2image
 pytesseract
 pyspellchecker
 regex
 scikit-image