Yaz Hobooti commited on
Commit
204147c
·
1 Parent(s): 3401128

Implement improved barcode detection: Use OpenCV contrib with PDF XObject extraction

Browse files
Files changed (3) hide show
  1. app.py +59 -8
  2. barcode_utils.py +169 -0
  3. requirements.txt +1 -4
app.py CHANGED
@@ -52,10 +52,10 @@ except Exception:
52
  HAS_REGEX = False
53
 
54
  try:
55
- from pyzbar.pyzbar import decode as zbar_decode
56
  HAS_BARCODE = True
57
  except Exception:
58
- zbar_decode = None
59
  HAS_BARCODE = False
60
 
61
  # -------------------- Core Data --------------------
@@ -1106,12 +1106,63 @@ def compare_pdfs(file_a, file_b):
1106
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1107
 
1108
  if HAS_BARCODE:
1109
- # Use PDF-based barcode detection instead of rasterized image
1110
- bar_a, info_a = find_barcode_boxes_and_info_from_pdf(file_a.name, image_size=image_size) if HAS_PYMUPDF else find_barcode_boxes_and_info(a)
1111
- bar_b, info_b = find_barcode_boxes_and_info_from_pdf(file_b.name, image_size=image_size) if HAS_PYMUPDF else find_barcode_boxes_and_info(b)
1112
-
1113
- # Debug: Print barcode detection results
1114
- print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1115
  else:
1116
  bar_a, info_a = [], []
1117
  bar_b, info_b = [], []
 
52
  HAS_REGEX = False
53
 
54
  try:
55
+ from barcode_utils import read_barcodes_from_path
56
  HAS_BARCODE = True
57
  except Exception:
58
+ read_barcodes_from_path = None
59
  HAS_BARCODE = False
60
 
61
  # -------------------- Core Data --------------------
 
1106
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1107
 
1108
  if HAS_BARCODE:
1109
+ # Use new barcode detection from barcode_utils
1110
+ try:
1111
+ codes_a = read_barcodes_from_path(file_a.name, max_pages=5, raster_dpi=900)
1112
+ codes_b = read_barcodes_from_path(file_b.name, max_pages=5, raster_dpi=900)
1113
+
1114
+ # Convert to old format for compatibility
1115
+ bar_a, info_a = [], []
1116
+ bar_b, info_b = []
1117
+
1118
+ for code in codes_a:
1119
+ if "error" not in code:
1120
+ # Create a simple box for visualization (center of polygon)
1121
+ if "polygon" in code:
1122
+ pts = np.array(code["polygon"])
1123
+ x1, y1 = pts.min(axis=0)
1124
+ x2, y2 = pts.max(axis=0)
1125
+ box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1126
+ bar_a.append(box)
1127
+ info_a.append({
1128
+ "type": code.get("type", ""),
1129
+ "data": code.get("text", ""),
1130
+ "left": int(x1),
1131
+ "top": int(y1),
1132
+ "width": int(x2-x1),
1133
+ "height": int(y2-y1),
1134
+ "valid": True,
1135
+ "page": code.get("page", 0) + 1,
1136
+ "source": code.get("source", "")
1137
+ })
1138
+
1139
+ for code in codes_b:
1140
+ if "error" not in code:
1141
+ # Create a simple box for visualization (center of polygon)
1142
+ if "polygon" in code:
1143
+ pts = np.array(code["polygon"])
1144
+ x1, y1 = pts.min(axis=0)
1145
+ x2, y2 = pts.max(axis=0)
1146
+ box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1147
+ bar_b.append(box)
1148
+ info_b.append({
1149
+ "type": code.get("type", ""),
1150
+ "data": code.get("text", ""),
1151
+ "left": int(x1),
1152
+ "top": int(y1),
1153
+ "width": int(x2-x1),
1154
+ "height": int(y2-y1),
1155
+ "valid": True,
1156
+ "page": code.get("page", 0) + 1,
1157
+ "source": code.get("source", "")
1158
+ })
1159
+
1160
+ # Debug: Print barcode detection results
1161
+ print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
1162
+ except Exception as e:
1163
+ print(f"Barcode detection error: {e}")
1164
+ bar_a, info_a = [], []
1165
+ bar_b, info_b = [], []
1166
  else:
1167
  bar_a, info_a = [], []
1168
  bar_b, info_b = [], []
barcode_utils.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ from typing import List, Dict, Any, Tuple, Optional
4
+
5
+ import cv2
6
+ import numpy as np
7
+ from PIL import Image
8
+
9
+ # PDF support via PyMuPDF (preferred for extracting original image XObjects)
10
+ try:
11
+ import fitz # PyMuPDF
12
+ HAS_PYMUPDF = True
13
+ except Exception:
14
+ fitz = None
15
+ HAS_PYMUPDF = False
16
+
17
+
18
+ def _ensure_contrib():
19
+ if not hasattr(cv2, "barcode") or not hasattr(cv2.barcode, "BarcodeDetector"):
20
+ raise RuntimeError(
21
+ "OpenCV was built without the 'barcode' module. "
22
+ "Install 'opencv-contrib-python-headless' (not 'opencv-python-headless')."
23
+ )
24
+
25
+ def _pil_to_bgr(pil: Image.Image) -> np.ndarray:
26
+ arr = np.array(pil.convert("RGB"))
27
+ return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
28
+
29
+ def _decode_with_opencv(img_bgr: np.ndarray) -> List[Dict[str, Any]]:
30
+ _ensure_contrib()
31
+ det = cv2.barcode.BarcodeDetector()
32
+
33
+ # Try 4 orientations
34
+ results: List[Dict[str, Any]] = []
35
+ for k, rot in enumerate([0, 1, 2, 3]): # 0, 90, 180, 270
36
+ if rot > 0:
37
+ img = np.ascontiguousarray(np.rot90(img_bgr, k=rot))
38
+ else:
39
+ img = img_bgr
40
+
41
+ # Optional light preproc to help 1D codes
42
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
43
+ gray = cv2.bilateralFilter(gray, d=5, sigmaColor=50, sigmaSpace=50)
44
+
45
+ ok, decoded_info, decoded_type, corners = det.detectAndDecode(gray)
46
+ if not ok:
47
+ continue
48
+
49
+ # corners: list of Nx4x2
50
+ for txt, typ, pts in zip(decoded_info, decoded_type, corners):
51
+ if not txt:
52
+ continue
53
+ pts = np.asarray(pts, dtype=np.float32)
54
+ # rotate points back to original orientation
55
+ if rot > 0:
56
+ h, w = img_bgr.shape[:2]
57
+ if rot == 1: # 90
58
+ pts = np.stack([h - pts[:,1], pts[:,0]], axis=1)
59
+ elif rot == 2: # 180
60
+ pts = np.stack([w - pts[:,0], h - pts[:,1]], axis=1)
61
+ elif rot == 3: # 270
62
+ pts = np.stack([pts[:,1], w - pts[:,0]], axis=1)
63
+
64
+ results.append({
65
+ "text": txt,
66
+ "type": typ,
67
+ "polygon": pts.tolist(), # four points
68
+ "rotation_quarters": rot
69
+ })
70
+ return results
71
+
72
+ def _extract_pdf_images_bgr(path: str, page_index: Optional[int] = None) -> List[Tuple[int, np.ndarray]]:
73
+ """
74
+ Returns list of (page_idx, img_bgr) extracted at native resolution from image XObjects.
75
+ """
76
+ if not HAS_PYMUPDF:
77
+ return []
78
+ out: List[Tuple[int, np.ndarray]] = []
79
+ doc = fitz.open(path)
80
+ pages = range(len(doc)) if page_index is None else [page_index]
81
+ for pno in pages:
82
+ page = doc[pno]
83
+ for imginfo in page.get_images(full=True):
84
+ xref = imginfo[0]
85
+ pix = fitz.Pixmap(doc, xref)
86
+ # Convert to RGB if needed
87
+ if pix.n >= 4: # RGBA or CMYK+alpha
88
+ pix = fitz.Pixmap(fitz.csRGB, pix)
89
+ pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
90
+ out.append((pno, _pil_to_bgr(pil)))
91
+ pix = None
92
+ doc.close()
93
+ return out
94
+
95
+ def _render_pdf_page_bgr(path: str, pno: int, dpi: int = 600) -> np.ndarray:
96
+ if not HAS_PYMUPDF:
97
+ raise RuntimeError("PyMuPDF not available to render PDF pages.")
98
+ doc = fitz.open(path)
99
+ if pno >= len(doc):
100
+ doc.close()
101
+ raise ValueError(f"Page {pno} out of range (PDF has {len(doc)} pages).")
102
+ page = doc[pno]
103
+ scale = dpi / 72.0
104
+ mat = fitz.Matrix(scale, scale)
105
+ pix = page.get_pixmap(matrix=mat, alpha=False)
106
+ pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
107
+ doc.close()
108
+ return _pil_to_bgr(pil)
109
+
110
+ def read_barcodes_from_path(path: str, max_pages: int = 5, raster_dpi: int = 900) -> List[Dict[str, Any]]:
111
+ """
112
+ Unified entry point:
113
+ - For images: decode directly with OpenCV.
114
+ - For PDFs: try original image XObjects first (raw), then rasterize pages at high DPI as fallback.
115
+ Returns a list of dicts: {source, page, type, text, polygon}
116
+ """
117
+ ext = os.path.splitext(path.lower())[1]
118
+ results: List[Dict[str, Any]] = []
119
+
120
+ if ext == ".pdf":
121
+ # 1) Try native images embedded in the PDF
122
+ for pno, img in _extract_pdf_images_bgr(path):
123
+ hits = _decode_with_opencv(img)
124
+ for h in hits:
125
+ results.append({
126
+ "source": "pdf_xobject_image",
127
+ "page": pno,
128
+ **h
129
+ })
130
+ if results:
131
+ return results
132
+
133
+ # 2) Fallback: rasterize a few pages crisply and decode
134
+ if not HAS_PYMUPDF:
135
+ raise RuntimeError("No PyMuPDF; cannot rasterize PDF pages. Add 'pymupdf' to requirements.")
136
+ doc = fitz.open(path)
137
+ for pno in range(min(len(doc), max_pages)):
138
+ page_img = _render_pdf_page_bgr(path, pno, dpi=raster_dpi)
139
+ hits = _decode_with_opencv(page_img)
140
+ for h in hits:
141
+ results.append({
142
+ "source": "pdf_rasterized",
143
+ "page": pno,
144
+ **h
145
+ })
146
+ doc.close()
147
+ return results
148
+
149
+ else:
150
+ # Image path
151
+ pil = Image.open(path).convert("RGB")
152
+ img = _pil_to_bgr(pil)
153
+ hits = _decode_with_opencv(img)
154
+ for h in hits:
155
+ results.append({
156
+ "source": "image",
157
+ "page": 0,
158
+ **h
159
+ })
160
+ return results
161
+
162
+ def draw_polys(bgr: np.ndarray, polys: list) -> np.ndarray:
163
+ """Draw polygons on the image for visualization"""
164
+ out = bgr.copy()
165
+ for p in polys:
166
+ if "polygon" in p:
167
+ pts = np.array(p["polygon"], dtype=np.int32).reshape(-1,1,2)
168
+ cv2.polylines(out, [pts], True, (0, 255, 0), 2)
169
+ return out
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- opencv-python-headless==4.10.0.84
2
  numpy
3
  pillow
4
  pdf2image
@@ -7,7 +7,4 @@ PyMuPDF>=1.24
7
  pytesseract
8
  pyspellchecker
9
  regex
10
- pyzbar
11
- zxing-cpp
12
- pylibdmtx
13
  scikit-image
 
1
+ opencv-contrib-python-headless==4.10.0.84
2
  numpy
3
  pillow
4
  pdf2image
 
7
  pytesseract
8
  pyspellchecker
9
  regex
 
 
 
10
  scikit-image