Spaces:
Sleeping
Sleeping
Yaz Hobooti
commited on
Commit
·
6830c6e
1
Parent(s):
828bfe1
Final barcode reader implementation: ZXing-CPP with exact versions and improved robustness
Browse files- app.py +5 -3
- barcode_reader.py +43 -40
- requirements.txt +6 -5
app.py
CHANGED
|
@@ -1124,7 +1124,7 @@ def compare_pdfs(file_a, file_b):
|
|
| 1124 |
|
| 1125 |
# Convert to old format for compatibility
|
| 1126 |
bar_a, info_a = [], []
|
| 1127 |
-
bar_b, info_b = []
|
| 1128 |
|
| 1129 |
for code in codes_a:
|
| 1130 |
if "error" not in code:
|
|
@@ -1144,7 +1144,8 @@ def compare_pdfs(file_a, file_b):
|
|
| 1144 |
"height": int(y2-y1),
|
| 1145 |
"valid": True,
|
| 1146 |
"page": code.get("page", 0) + 1,
|
| 1147 |
-
"source": code.get("source", "")
|
|
|
|
| 1148 |
})
|
| 1149 |
|
| 1150 |
for code in codes_b:
|
|
@@ -1165,7 +1166,8 @@ def compare_pdfs(file_a, file_b):
|
|
| 1165 |
"height": int(y2-y1),
|
| 1166 |
"valid": True,
|
| 1167 |
"page": code.get("page", 0) + 1,
|
| 1168 |
-
"source": code.get("source", "")
|
|
|
|
| 1169 |
})
|
| 1170 |
|
| 1171 |
# Debug: Print barcode detection results
|
|
|
|
| 1124 |
|
| 1125 |
# Convert to old format for compatibility
|
| 1126 |
bar_a, info_a = [], []
|
| 1127 |
+
bar_b, info_b = [], []
|
| 1128 |
|
| 1129 |
for code in codes_a:
|
| 1130 |
if "error" not in code:
|
|
|
|
| 1144 |
"height": int(y2-y1),
|
| 1145 |
"valid": True,
|
| 1146 |
"page": code.get("page", 0) + 1,
|
| 1147 |
+
"source": code.get("source", ""),
|
| 1148 |
+
"engine": code.get("engine", "")
|
| 1149 |
})
|
| 1150 |
|
| 1151 |
for code in codes_b:
|
|
|
|
| 1166 |
"height": int(y2-y1),
|
| 1167 |
"valid": True,
|
| 1168 |
"page": code.get("page", 0) + 1,
|
| 1169 |
+
"source": code.get("source", ""),
|
| 1170 |
+
"engine": code.get("engine", "")
|
| 1171 |
})
|
| 1172 |
|
| 1173 |
# Debug: Print barcode detection results
|
barcode_reader.py
CHANGED
|
@@ -1,17 +1,13 @@
|
|
| 1 |
"""
|
| 2 |
-
Robust barcode reader for images
|
| 3 |
|
| 4 |
-
|
| 5 |
-
1) PDF -> extract embedded image XObjects
|
| 6 |
-
2) If
|
| 7 |
-
3) For plain images, decode directly
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
- Fallback: OpenCV contrib barcode (if available)
|
| 12 |
-
|
| 13 |
-
Outputs are normalized dicts:
|
| 14 |
-
{ 'engine', 'source', 'page', 'type', 'text', 'polygon': [[x,y] * 4] }
|
| 15 |
"""
|
| 16 |
|
| 17 |
from __future__ import annotations
|
|
@@ -24,23 +20,23 @@ from PIL import Image
|
|
| 24 |
import cv2
|
| 25 |
|
| 26 |
# ---------- Engines ----------
|
| 27 |
-
|
| 28 |
try:
|
| 29 |
import zxingcpp # pip install zxing-cpp
|
| 30 |
-
|
| 31 |
except Exception:
|
| 32 |
zxingcpp = None
|
| 33 |
-
|
| 34 |
|
| 35 |
-
|
| 36 |
|
| 37 |
# ---------- PDF (PyMuPDF) ----------
|
| 38 |
try:
|
| 39 |
import fitz # PyMuPDF
|
| 40 |
-
|
| 41 |
except Exception:
|
| 42 |
fitz = None
|
| 43 |
-
|
| 44 |
|
| 45 |
|
| 46 |
# =========================
|
|
@@ -56,25 +52,25 @@ def _as_gray(arr_bgr: np.ndarray) -> np.ndarray:
|
|
| 56 |
|
| 57 |
def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
|
| 58 |
"""
|
| 59 |
-
Generate a
|
| 60 |
-
|
| 61 |
"""
|
| 62 |
out = [bgr]
|
| 63 |
h, w = bgr.shape[:2]
|
| 64 |
|
| 65 |
-
#
|
| 66 |
k = np.array([[0, -1, 0],
|
| 67 |
[-1, 5, -1],
|
| 68 |
[0, -1, 0]], dtype=np.float32)
|
| 69 |
sharp = cv2.filter2D(bgr, -1, k)
|
| 70 |
out.append(sharp)
|
| 71 |
|
| 72 |
-
# CLAHE on gray
|
| 73 |
g = _as_gray(bgr)
|
| 74 |
clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8)).apply(g)
|
| 75 |
out.append(cv2.cvtColor(clahe, cv2.COLOR_GRAY2BGR))
|
| 76 |
|
| 77 |
-
#
|
| 78 |
if max(h, w) < 1600:
|
| 79 |
up = cv2.resize(bgr, (0, 0), fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
|
| 80 |
out.append(up)
|
|
@@ -83,15 +79,14 @@ def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
|
|
| 83 |
|
| 84 |
def _norm_polygon(pts: Any, w: int, h: int) -> List[List[float]]:
|
| 85 |
"""
|
| 86 |
-
Normalize whatever the engine returns into 4
|
| 87 |
-
If fewer
|
| 88 |
"""
|
| 89 |
try:
|
| 90 |
p = np.array(pts, dtype=np.float32).reshape(-1, 2)
|
| 91 |
if p.shape[0] >= 4:
|
| 92 |
p = p[:4]
|
| 93 |
else:
|
| 94 |
-
# make a box
|
| 95 |
x1, y1 = p.min(axis=0)
|
| 96 |
x2, y2 = p.max(axis=0)
|
| 97 |
p = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32)
|
|
@@ -133,13 +128,12 @@ def _dedupe(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
| 133 |
# =========================
|
| 134 |
|
| 135 |
def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
|
| 136 |
-
if not
|
| 137 |
return []
|
| 138 |
hits: List[Dict[str, Any]] = []
|
| 139 |
-
# ZXing works on gray or color; we'll try a couple of variants
|
| 140 |
for candidate in _preprocess_candidates(bgr):
|
| 141 |
try:
|
| 142 |
-
res = zxingcpp.read_barcodes(candidate) #
|
| 143 |
except Exception:
|
| 144 |
continue
|
| 145 |
for r in res or []:
|
|
@@ -161,12 +155,12 @@ def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
|
|
| 161 |
"polygon": poly,
|
| 162 |
})
|
| 163 |
if hits:
|
| 164 |
-
break
|
| 165 |
return hits
|
| 166 |
|
| 167 |
|
| 168 |
def _decode_opencv(bgr: np.ndarray) -> List[Dict[str, Any]]:
|
| 169 |
-
if not
|
| 170 |
return []
|
| 171 |
det = cv2.barcode.BarcodeDetector()
|
| 172 |
hits: List[Dict[str, Any]] = []
|
|
@@ -207,7 +201,7 @@ def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) ->
|
|
| 207 |
"""
|
| 208 |
Return (page, image_bgr) tuples for image XObjects extracted at native resolution.
|
| 209 |
"""
|
| 210 |
-
if not
|
| 211 |
return []
|
| 212 |
out: List[Tuple[int, np.ndarray]] = []
|
| 213 |
doc = fitz.open(path)
|
|
@@ -217,8 +211,7 @@ def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) ->
|
|
| 217 |
for info in page.get_images(full=True):
|
| 218 |
xref = info[0]
|
| 219 |
pix = fitz.Pixmap(doc, xref)
|
| 220 |
-
#
|
| 221 |
-
if pix.n >= 4: # includes alpha or CMYK+alpha
|
| 222 |
pix = fitz.Pixmap(fitz.csRGB, pix)
|
| 223 |
pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
|
| 224 |
out.append((pno, _to_bgr(pil)))
|
|
@@ -228,9 +221,9 @@ def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) ->
|
|
| 228 |
|
| 229 |
def _pdf_render_page(path: str, page: int, dpi: int) -> np.ndarray:
|
| 230 |
"""
|
| 231 |
-
Rasterize one page at the given DPI (for vector codes).
|
| 232 |
"""
|
| 233 |
-
if not
|
| 234 |
raise RuntimeError("PyMuPDF not available; cannot rasterize PDF.")
|
| 235 |
doc = fitz.open(path)
|
| 236 |
if page >= len(doc):
|
|
@@ -256,7 +249,8 @@ def _decode_image_path(path: str) -> List[Dict[str, Any]]:
|
|
| 256 |
|
| 257 |
def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
|
| 258 |
results: List[Dict[str, Any]] = []
|
| 259 |
-
|
|
|
|
| 260 |
for pno, img_bgr in _pdf_extract_xobject_images(path):
|
| 261 |
hits = _decode_any(img_bgr)
|
| 262 |
for h in hits:
|
|
@@ -265,9 +259,8 @@ def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...]
|
|
| 265 |
if results:
|
| 266 |
return _dedupe(results)
|
| 267 |
|
| 268 |
-
# 2)
|
| 269 |
-
if not
|
| 270 |
-
# No way to rasterize; return empty
|
| 271 |
return []
|
| 272 |
doc = fitz.open(path)
|
| 273 |
n = min(len(doc), max_pages)
|
|
@@ -303,7 +296,7 @@ def read_barcodes_from_path(path: str,
|
|
| 303 |
|
| 304 |
|
| 305 |
# =========================
|
| 306 |
-
# Optional
|
| 307 |
# =========================
|
| 308 |
|
| 309 |
def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarray:
|
|
@@ -315,3 +308,13 @@ def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarr
|
|
| 315 |
x, y = poly[0, 0, 0], poly[0, 0, 1]
|
| 316 |
cv2.putText(out, txt[:48], (x, max(15, y - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 50, 255), 1, cv2.LINE_AA)
|
| 317 |
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Robust, HF-friendly barcode reader for images & PDFs.
|
| 3 |
|
| 4 |
+
Order of operations:
|
| 5 |
+
1) PDF -> extract embedded image XObjects (native pixels), decode (ZXing-CPP preferred)
|
| 6 |
+
2) If none found, rasterize the PDF at 400/600/900 DPI and decode
|
| 7 |
+
3) For plain images, decode directly
|
| 8 |
|
| 9 |
+
Outputs: list of dicts:
|
| 10 |
+
{engine, source, page, type, text, polygon: [[x,y]*4]}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
from __future__ import annotations
|
|
|
|
| 20 |
import cv2
|
| 21 |
|
| 22 |
# ---------- Engines ----------
|
| 23 |
+
_HAS_ZXING = False
|
| 24 |
try:
|
| 25 |
import zxingcpp # pip install zxing-cpp
|
| 26 |
+
_HAS_ZXING = True
|
| 27 |
except Exception:
|
| 28 |
zxingcpp = None
|
| 29 |
+
_HAS_ZXING = False
|
| 30 |
|
| 31 |
+
_HAS_OCV_BARCODE = hasattr(cv2, "barcode") and hasattr(getattr(cv2, "barcode"), "BarcodeDetector")
|
| 32 |
|
| 33 |
# ---------- PDF (PyMuPDF) ----------
|
| 34 |
try:
|
| 35 |
import fitz # PyMuPDF
|
| 36 |
+
_HAS_PYMUPDF = True
|
| 37 |
except Exception:
|
| 38 |
fitz = None
|
| 39 |
+
_HAS_PYMUPDF = False
|
| 40 |
|
| 41 |
|
| 42 |
# =========================
|
|
|
|
| 52 |
|
| 53 |
def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
|
| 54 |
"""
|
| 55 |
+
Generate a tiny set of preprocess variants to improve robustness
|
| 56 |
+
without slowing HF Spaces too much.
|
| 57 |
"""
|
| 58 |
out = [bgr]
|
| 59 |
h, w = bgr.shape[:2]
|
| 60 |
|
| 61 |
+
# mild sharpen
|
| 62 |
k = np.array([[0, -1, 0],
|
| 63 |
[-1, 5, -1],
|
| 64 |
[0, -1, 0]], dtype=np.float32)
|
| 65 |
sharp = cv2.filter2D(bgr, -1, k)
|
| 66 |
out.append(sharp)
|
| 67 |
|
| 68 |
+
# CLAHE on gray (helps low-contrast/photographed codes)
|
| 69 |
g = _as_gray(bgr)
|
| 70 |
clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8)).apply(g)
|
| 71 |
out.append(cv2.cvtColor(clahe, cv2.COLOR_GRAY2BGR))
|
| 72 |
|
| 73 |
+
# slight upscale for tiny codes
|
| 74 |
if max(h, w) < 1600:
|
| 75 |
up = cv2.resize(bgr, (0, 0), fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
|
| 76 |
out.append(up)
|
|
|
|
| 79 |
|
| 80 |
def _norm_polygon(pts: Any, w: int, h: int) -> List[List[float]]:
|
| 81 |
"""
|
| 82 |
+
Normalize whatever the engine returns into a 4-point polygon [[x,y], ...].
|
| 83 |
+
If fewer points are given, approximate with a bbox.
|
| 84 |
"""
|
| 85 |
try:
|
| 86 |
p = np.array(pts, dtype=np.float32).reshape(-1, 2)
|
| 87 |
if p.shape[0] >= 4:
|
| 88 |
p = p[:4]
|
| 89 |
else:
|
|
|
|
| 90 |
x1, y1 = p.min(axis=0)
|
| 91 |
x2, y2 = p.max(axis=0)
|
| 92 |
p = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32)
|
|
|
|
| 128 |
# =========================
|
| 129 |
|
| 130 |
def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
|
| 131 |
+
if not _HAS_ZXING:
|
| 132 |
return []
|
| 133 |
hits: List[Dict[str, Any]] = []
|
|
|
|
| 134 |
for candidate in _preprocess_candidates(bgr):
|
| 135 |
try:
|
| 136 |
+
res = zxingcpp.read_barcodes(candidate) # list
|
| 137 |
except Exception:
|
| 138 |
continue
|
| 139 |
for r in res or []:
|
|
|
|
| 155 |
"polygon": poly,
|
| 156 |
})
|
| 157 |
if hits:
|
| 158 |
+
break
|
| 159 |
return hits
|
| 160 |
|
| 161 |
|
| 162 |
def _decode_opencv(bgr: np.ndarray) -> List[Dict[str, Any]]:
|
| 163 |
+
if not _HAS_OCV_BARCODE:
|
| 164 |
return []
|
| 165 |
det = cv2.barcode.BarcodeDetector()
|
| 166 |
hits: List[Dict[str, Any]] = []
|
|
|
|
| 201 |
"""
|
| 202 |
Return (page, image_bgr) tuples for image XObjects extracted at native resolution.
|
| 203 |
"""
|
| 204 |
+
if not _HAS_PYMUPDF:
|
| 205 |
return []
|
| 206 |
out: List[Tuple[int, np.ndarray]] = []
|
| 207 |
doc = fitz.open(path)
|
|
|
|
| 211 |
for info in page.get_images(full=True):
|
| 212 |
xref = info[0]
|
| 213 |
pix = fitz.Pixmap(doc, xref)
|
| 214 |
+
if pix.n >= 4: # RGBA or CMYK+alpha -> convert to RGB
|
|
|
|
| 215 |
pix = fitz.Pixmap(fitz.csRGB, pix)
|
| 216 |
pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
|
| 217 |
out.append((pno, _to_bgr(pil)))
|
|
|
|
| 221 |
|
| 222 |
def _pdf_render_page(path: str, page: int, dpi: int) -> np.ndarray:
|
| 223 |
"""
|
| 224 |
+
Rasterize one page at the given DPI (for vector-only codes).
|
| 225 |
"""
|
| 226 |
+
if not _HAS_PYMUPDF:
|
| 227 |
raise RuntimeError("PyMuPDF not available; cannot rasterize PDF.")
|
| 228 |
doc = fitz.open(path)
|
| 229 |
if page >= len(doc):
|
|
|
|
| 249 |
|
| 250 |
def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
|
| 251 |
results: List[Dict[str, Any]] = []
|
| 252 |
+
|
| 253 |
+
# 1) try embedded images first
|
| 254 |
for pno, img_bgr in _pdf_extract_xobject_images(path):
|
| 255 |
hits = _decode_any(img_bgr)
|
| 256 |
for h in hits:
|
|
|
|
| 259 |
if results:
|
| 260 |
return _dedupe(results)
|
| 261 |
|
| 262 |
+
# 2) rasterize a handful of pages at increasing DPI
|
| 263 |
+
if not _HAS_PYMUPDF:
|
|
|
|
| 264 |
return []
|
| 265 |
doc = fitz.open(path)
|
| 266 |
n = min(len(doc), max_pages)
|
|
|
|
| 296 |
|
| 297 |
|
| 298 |
# =========================
|
| 299 |
+
# Optional helpers for UI
|
| 300 |
# =========================
|
| 301 |
|
| 302 |
def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarray:
|
|
|
|
| 308 |
x, y = poly[0, 0, 0], poly[0, 0, 1]
|
| 309 |
cv2.putText(out, txt[:48], (x, max(15, y - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 50, 255), 1, cv2.LINE_AA)
|
| 310 |
return out
|
| 311 |
+
|
| 312 |
+
def render_preview_bgr(path: str, page: int = 0, dpi: int = 220) -> np.ndarray:
|
| 313 |
+
"""
|
| 314 |
+
For UI preview only (small DPI). For PDFs, render one page; for images, just load.
|
| 315 |
+
"""
|
| 316 |
+
ext = os.path.splitext(path.lower())[1]
|
| 317 |
+
if ext == ".pdf":
|
| 318 |
+
return _pdf_render_page(path, page=page, dpi=dpi)
|
| 319 |
+
pil = Image.open(path).convert("RGB")
|
| 320 |
+
return _to_bgr(pil)
|
requirements.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
-
numpy
|
| 2 |
-
pillow
|
| 3 |
-
pymupdf
|
| 4 |
opencv-contrib-python-headless==4.10.0.84
|
| 5 |
-
zxing-cpp
|
|
|
|
|
|
|
| 6 |
pdf2image
|
| 7 |
-
gradio
|
| 8 |
pytesseract
|
| 9 |
pyspellchecker
|
| 10 |
regex
|
|
|
|
| 1 |
+
numpy==1.26.4
|
| 2 |
+
pillow>=10.3
|
| 3 |
+
pymupdf>=1.24.9
|
| 4 |
opencv-contrib-python-headless==4.10.0.84
|
| 5 |
+
zxing-cpp==2.2.0
|
| 6 |
+
gradio>=4.44.0
|
| 7 |
+
qrcode>=7.4
|
| 8 |
pdf2image
|
|
|
|
| 9 |
pytesseract
|
| 10 |
pyspellchecker
|
| 11 |
regex
|