Yaz Hobooti commited on
Commit
6830c6e
·
1 Parent(s): 828bfe1

Final barcode reader implementation: ZXing-CPP with exact versions and improved robustness

Browse files
Files changed (3) hide show
  1. app.py +5 -3
  2. barcode_reader.py +43 -40
  3. requirements.txt +6 -5
app.py CHANGED
@@ -1124,7 +1124,7 @@ def compare_pdfs(file_a, file_b):
1124
 
1125
  # Convert to old format for compatibility
1126
  bar_a, info_a = [], []
1127
- bar_b, info_b = []
1128
 
1129
  for code in codes_a:
1130
  if "error" not in code:
@@ -1144,7 +1144,8 @@ def compare_pdfs(file_a, file_b):
1144
  "height": int(y2-y1),
1145
  "valid": True,
1146
  "page": code.get("page", 0) + 1,
1147
- "source": code.get("source", "")
 
1148
  })
1149
 
1150
  for code in codes_b:
@@ -1165,7 +1166,8 @@ def compare_pdfs(file_a, file_b):
1165
  "height": int(y2-y1),
1166
  "valid": True,
1167
  "page": code.get("page", 0) + 1,
1168
- "source": code.get("source", "")
 
1169
  })
1170
 
1171
  # Debug: Print barcode detection results
 
1124
 
1125
  # Convert to old format for compatibility
1126
  bar_a, info_a = [], []
1127
+ bar_b, info_b = [], []
1128
 
1129
  for code in codes_a:
1130
  if "error" not in code:
 
1144
  "height": int(y2-y1),
1145
  "valid": True,
1146
  "page": code.get("page", 0) + 1,
1147
+ "source": code.get("source", ""),
1148
+ "engine": code.get("engine", "")
1149
  })
1150
 
1151
  for code in codes_b:
 
1166
  "height": int(y2-y1),
1167
  "valid": True,
1168
  "page": code.get("page", 0) + 1,
1169
+ "source": code.get("source", ""),
1170
+ "engine": code.get("engine", "")
1171
  })
1172
 
1173
  # Debug: Print barcode detection results
barcode_reader.py CHANGED
@@ -1,17 +1,13 @@
1
  """
2
- Robust barcode reader for images and PDFs.
3
 
4
- Strategy (in order):
5
- 1) PDF -> extract embedded image XObjects at native resolution (no raster loss) and decode.
6
- 2) If nothing found, rasterize PDF page(s) at high DPI (400/600/900) and decode.
7
- 3) For plain images, decode directly.
8
 
9
- Engines:
10
- - Primary: ZXing-CPP (zxingcpp) -> no system packages required
11
- - Fallback: OpenCV contrib barcode (if available)
12
-
13
- Outputs are normalized dicts:
14
- { 'engine', 'source', 'page', 'type', 'text', 'polygon': [[x,y] * 4] }
15
  """
16
 
17
  from __future__ import annotations
@@ -24,23 +20,23 @@ from PIL import Image
24
  import cv2
25
 
26
  # ---------- Engines ----------
27
- HAS_ZXING = False
28
  try:
29
  import zxingcpp # pip install zxing-cpp
30
- HAS_ZXING = True
31
  except Exception:
32
  zxingcpp = None
33
- HAS_ZXING = False
34
 
35
- HAS_OCV_BARCODE = hasattr(cv2, "barcode") and hasattr(getattr(cv2, "barcode"), "BarcodeDetector")
36
 
37
  # ---------- PDF (PyMuPDF) ----------
38
  try:
39
  import fitz # PyMuPDF
40
- HAS_PYMUPDF = True
41
  except Exception:
42
  fitz = None
43
- HAS_PYMUPDF = False
44
 
45
 
46
  # =========================
@@ -56,25 +52,25 @@ def _as_gray(arr_bgr: np.ndarray) -> np.ndarray:
56
 
57
  def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
58
  """
59
- Generate a small set of preprocess variants to improve 1D and 2D decoding.
60
- Keep this list short—HF Spaces need to stay responsive.
61
  """
62
  out = [bgr]
63
  h, w = bgr.shape[:2]
64
 
65
- # Slight sharpening helps thin 1D bars
66
  k = np.array([[0, -1, 0],
67
  [-1, 5, -1],
68
  [0, -1, 0]], dtype=np.float32)
69
  sharp = cv2.filter2D(bgr, -1, k)
70
  out.append(sharp)
71
 
72
- # CLAHE on gray
73
  g = _as_gray(bgr)
74
  clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8)).apply(g)
75
  out.append(cv2.cvtColor(clahe, cv2.COLOR_GRAY2BGR))
76
 
77
- # Slight upscale for tiny barcodes
78
  if max(h, w) < 1600:
79
  up = cv2.resize(bgr, (0, 0), fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
80
  out.append(up)
@@ -83,15 +79,14 @@ def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
83
 
84
  def _norm_polygon(pts: Any, w: int, h: int) -> List[List[float]]:
85
  """
86
- Normalize whatever the engine returns into 4 point polygon [[x,y],...].
87
- If fewer than 4 points are given, approximate with a bounding box.
88
  """
89
  try:
90
  p = np.array(pts, dtype=np.float32).reshape(-1, 2)
91
  if p.shape[0] >= 4:
92
  p = p[:4]
93
  else:
94
- # make a box
95
  x1, y1 = p.min(axis=0)
96
  x2, y2 = p.max(axis=0)
97
  p = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32)
@@ -133,13 +128,12 @@ def _dedupe(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
133
  # =========================
134
 
135
  def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
136
- if not HAS_ZXING:
137
  return []
138
  hits: List[Dict[str, Any]] = []
139
- # ZXing works on gray or color; we'll try a couple of variants
140
  for candidate in _preprocess_candidates(bgr):
141
  try:
142
- res = zxingcpp.read_barcodes(candidate) # returns list
143
  except Exception:
144
  continue
145
  for r in res or []:
@@ -161,12 +155,12 @@ def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
161
  "polygon": poly,
162
  })
163
  if hits:
164
- break # good enough
165
  return hits
166
 
167
 
168
  def _decode_opencv(bgr: np.ndarray) -> List[Dict[str, Any]]:
169
- if not HAS_OCV_BARCODE:
170
  return []
171
  det = cv2.barcode.BarcodeDetector()
172
  hits: List[Dict[str, Any]] = []
@@ -207,7 +201,7 @@ def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) ->
207
  """
208
  Return (page, image_bgr) tuples for image XObjects extracted at native resolution.
209
  """
210
- if not HAS_PYMUPDF:
211
  return []
212
  out: List[Tuple[int, np.ndarray]] = []
213
  doc = fitz.open(path)
@@ -217,8 +211,7 @@ def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) ->
217
  for info in page.get_images(full=True):
218
  xref = info[0]
219
  pix = fitz.Pixmap(doc, xref)
220
- # Convert to RGB if not already
221
- if pix.n >= 4: # includes alpha or CMYK+alpha
222
  pix = fitz.Pixmap(fitz.csRGB, pix)
223
  pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
224
  out.append((pno, _to_bgr(pil)))
@@ -228,9 +221,9 @@ def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) ->
228
 
229
  def _pdf_render_page(path: str, page: int, dpi: int) -> np.ndarray:
230
  """
231
- Rasterize one page at the given DPI (for vector codes).
232
  """
233
- if not HAS_PYMUPDF:
234
  raise RuntimeError("PyMuPDF not available; cannot rasterize PDF.")
235
  doc = fitz.open(path)
236
  if page >= len(doc):
@@ -256,7 +249,8 @@ def _decode_image_path(path: str) -> List[Dict[str, Any]]:
256
 
257
  def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
258
  results: List[Dict[str, Any]] = []
259
- # 1) Try original embedded images first
 
260
  for pno, img_bgr in _pdf_extract_xobject_images(path):
261
  hits = _decode_any(img_bgr)
262
  for h in hits:
@@ -265,9 +259,8 @@ def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...]
265
  if results:
266
  return _dedupe(results)
267
 
268
- # 2) Fallback: rasterize pages at increasing DPIs
269
- if not HAS_PYMUPDF:
270
- # No way to rasterize; return empty
271
  return []
272
  doc = fitz.open(path)
273
  n = min(len(doc), max_pages)
@@ -303,7 +296,7 @@ def read_barcodes_from_path(path: str,
303
 
304
 
305
  # =========================
306
- # Optional: drawing helper
307
  # =========================
308
 
309
  def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarray:
@@ -315,3 +308,13 @@ def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarr
315
  x, y = poly[0, 0, 0], poly[0, 0, 1]
316
  cv2.putText(out, txt[:48], (x, max(15, y - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 50, 255), 1, cv2.LINE_AA)
317
  return out
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Robust, HF-friendly barcode reader for images & PDFs.
3
 
4
+ Order of operations:
5
+ 1) PDF -> extract embedded image XObjects (native pixels), decode (ZXing-CPP preferred)
6
+ 2) If none found, rasterize the PDF at 400/600/900 DPI and decode
7
+ 3) For plain images, decode directly
8
 
9
+ Outputs: list of dicts:
10
+ {engine, source, page, type, text, polygon: [[x,y]*4]}
 
 
 
 
11
  """
12
 
13
  from __future__ import annotations
 
20
  import cv2
21
 
22
  # ---------- Engines ----------
23
+ _HAS_ZXING = False
24
  try:
25
  import zxingcpp # pip install zxing-cpp
26
+ _HAS_ZXING = True
27
  except Exception:
28
  zxingcpp = None
29
+ _HAS_ZXING = False
30
 
31
+ _HAS_OCV_BARCODE = hasattr(cv2, "barcode") and hasattr(getattr(cv2, "barcode"), "BarcodeDetector")
32
 
33
  # ---------- PDF (PyMuPDF) ----------
34
  try:
35
  import fitz # PyMuPDF
36
+ _HAS_PYMUPDF = True
37
  except Exception:
38
  fitz = None
39
+ _HAS_PYMUPDF = False
40
 
41
 
42
  # =========================
 
52
 
53
  def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
54
  """
55
+ Generate a tiny set of preprocess variants to improve robustness
56
+ without slowing HF Spaces too much.
57
  """
58
  out = [bgr]
59
  h, w = bgr.shape[:2]
60
 
61
+ # mild sharpen
62
  k = np.array([[0, -1, 0],
63
  [-1, 5, -1],
64
  [0, -1, 0]], dtype=np.float32)
65
  sharp = cv2.filter2D(bgr, -1, k)
66
  out.append(sharp)
67
 
68
+ # CLAHE on gray (helps low-contrast/photographed codes)
69
  g = _as_gray(bgr)
70
  clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8)).apply(g)
71
  out.append(cv2.cvtColor(clahe, cv2.COLOR_GRAY2BGR))
72
 
73
+ # slight upscale for tiny codes
74
  if max(h, w) < 1600:
75
  up = cv2.resize(bgr, (0, 0), fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
76
  out.append(up)
 
79
 
80
  def _norm_polygon(pts: Any, w: int, h: int) -> List[List[float]]:
81
  """
82
+ Normalize whatever the engine returns into a 4-point polygon [[x,y], ...].
83
+ If fewer points are given, approximate with a bbox.
84
  """
85
  try:
86
  p = np.array(pts, dtype=np.float32).reshape(-1, 2)
87
  if p.shape[0] >= 4:
88
  p = p[:4]
89
  else:
 
90
  x1, y1 = p.min(axis=0)
91
  x2, y2 = p.max(axis=0)
92
  p = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32)
 
128
  # =========================
129
 
130
  def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
131
+ if not _HAS_ZXING:
132
  return []
133
  hits: List[Dict[str, Any]] = []
 
134
  for candidate in _preprocess_candidates(bgr):
135
  try:
136
+ res = zxingcpp.read_barcodes(candidate) # list
137
  except Exception:
138
  continue
139
  for r in res or []:
 
155
  "polygon": poly,
156
  })
157
  if hits:
158
+ break
159
  return hits
160
 
161
 
162
  def _decode_opencv(bgr: np.ndarray) -> List[Dict[str, Any]]:
163
+ if not _HAS_OCV_BARCODE:
164
  return []
165
  det = cv2.barcode.BarcodeDetector()
166
  hits: List[Dict[str, Any]] = []
 
201
  """
202
  Return (page, image_bgr) tuples for image XObjects extracted at native resolution.
203
  """
204
+ if not _HAS_PYMUPDF:
205
  return []
206
  out: List[Tuple[int, np.ndarray]] = []
207
  doc = fitz.open(path)
 
211
  for info in page.get_images(full=True):
212
  xref = info[0]
213
  pix = fitz.Pixmap(doc, xref)
214
+ if pix.n >= 4: # RGBA or CMYK+alpha -> convert to RGB
 
215
  pix = fitz.Pixmap(fitz.csRGB, pix)
216
  pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
217
  out.append((pno, _to_bgr(pil)))
 
221
 
222
  def _pdf_render_page(path: str, page: int, dpi: int) -> np.ndarray:
223
  """
224
+ Rasterize one page at the given DPI (for vector-only codes).
225
  """
226
+ if not _HAS_PYMUPDF:
227
  raise RuntimeError("PyMuPDF not available; cannot rasterize PDF.")
228
  doc = fitz.open(path)
229
  if page >= len(doc):
 
249
 
250
  def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
251
  results: List[Dict[str, Any]] = []
252
+
253
+ # 1) try embedded images first
254
  for pno, img_bgr in _pdf_extract_xobject_images(path):
255
  hits = _decode_any(img_bgr)
256
  for h in hits:
 
259
  if results:
260
  return _dedupe(results)
261
 
262
+ # 2) rasterize a handful of pages at increasing DPI
263
+ if not _HAS_PYMUPDF:
 
264
  return []
265
  doc = fitz.open(path)
266
  n = min(len(doc), max_pages)
 
296
 
297
 
298
  # =========================
299
+ # Optional helpers for UI
300
  # =========================
301
 
302
  def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarray:
 
308
  x, y = poly[0, 0, 0], poly[0, 0, 1]
309
  cv2.putText(out, txt[:48], (x, max(15, y - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 50, 255), 1, cv2.LINE_AA)
310
  return out
311
+
312
+ def render_preview_bgr(path: str, page: int = 0, dpi: int = 220) -> np.ndarray:
313
+ """
314
+ For UI preview only (small DPI). For PDFs, render one page; for images, just load.
315
+ """
316
+ ext = os.path.splitext(path.lower())[1]
317
+ if ext == ".pdf":
318
+ return _pdf_render_page(path, page=page, dpi=dpi)
319
+ pil = Image.open(path).convert("RGB")
320
+ return _to_bgr(pil)
requirements.txt CHANGED
@@ -1,10 +1,11 @@
1
- numpy
2
- pillow
3
- pymupdf
4
  opencv-contrib-python-headless==4.10.0.84
5
- zxing-cpp>=2.2.0
 
 
6
  pdf2image
7
- gradio
8
  pytesseract
9
  pyspellchecker
10
  regex
 
1
+ numpy==1.26.4
2
+ pillow>=10.3
3
+ pymupdf>=1.24.9
4
  opencv-contrib-python-headless==4.10.0.84
5
+ zxing-cpp==2.2.0
6
+ gradio>=4.44.0
7
+ qrcode>=7.4
8
  pdf2image
 
9
  pytesseract
10
  pyspellchecker
11
  regex