Spaces:
Sleeping
Sleeping
Yaz Hobooti
commited on
Commit
·
bd3b9d5
1
Parent(s):
601e8bb
Implement ChatGPT patches: remove HAS_BARCODE gate, enable ZXing-CPP barcode detection
Browse files
app.py
CHANGED
|
@@ -60,6 +60,15 @@ except Exception as e:
|
|
| 60 |
HAS_BARCODE = False
|
| 61 |
print(f"✗ Barcode reader import failed: {e}")
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# -------------------- Core Data --------------------
|
| 64 |
@dataclass
|
| 65 |
class Box:
|
|
@@ -1131,70 +1140,8 @@ def compare_pdfs(file_a, file_b):
|
|
| 1131 |
print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
|
| 1132 |
|
| 1133 |
# Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
|
| 1137 |
-
|
| 1138 |
-
# Convert to old format for compatibility
|
| 1139 |
-
bar_a, info_a = [], []
|
| 1140 |
-
bar_b, info_b = [], []
|
| 1141 |
-
|
| 1142 |
-
for code in codes_a:
|
| 1143 |
-
if "error" not in code:
|
| 1144 |
-
# Create a simple box for visualization (center of polygon)
|
| 1145 |
-
if "polygon" in code:
|
| 1146 |
-
pts = np.array(code["polygon"])
|
| 1147 |
-
x1, y1 = pts.min(axis=0)
|
| 1148 |
-
x2, y2 = pts.max(axis=0)
|
| 1149 |
-
box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
|
| 1150 |
-
bar_a.append(box)
|
| 1151 |
-
info_a.append({
|
| 1152 |
-
"type": code.get("type", ""),
|
| 1153 |
-
"data": code.get("text", ""),
|
| 1154 |
-
"left": int(x1),
|
| 1155 |
-
"top": int(y1),
|
| 1156 |
-
"width": int(x2-x1),
|
| 1157 |
-
"height": int(y2-y1),
|
| 1158 |
-
"valid": True,
|
| 1159 |
-
"page": code.get("page", 0) + 1,
|
| 1160 |
-
"source": code.get("source", ""),
|
| 1161 |
-
"engine": code.get("engine", "")
|
| 1162 |
-
})
|
| 1163 |
-
|
| 1164 |
-
for code in codes_b:
|
| 1165 |
-
if "error" not in code:
|
| 1166 |
-
# Create a simple box for visualization (center of polygon)
|
| 1167 |
-
if "polygon" in code:
|
| 1168 |
-
pts = np.array(code["polygon"])
|
| 1169 |
-
x1, y1 = pts.min(axis=0)
|
| 1170 |
-
x2, y2 = pts.max(axis=0)
|
| 1171 |
-
box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
|
| 1172 |
-
bar_b.append(box)
|
| 1173 |
-
info_b.append({
|
| 1174 |
-
"type": code.get("type", ""),
|
| 1175 |
-
"data": code.get("text", ""),
|
| 1176 |
-
"left": int(x1),
|
| 1177 |
-
"top": int(y1),
|
| 1178 |
-
"width": int(x2-x1),
|
| 1179 |
-
"height": int(y2-y1),
|
| 1180 |
-
"valid": True,
|
| 1181 |
-
"page": code.get("page", 0) + 1,
|
| 1182 |
-
"source": code.get("source", ""),
|
| 1183 |
-
"engine": code.get("engine", "")
|
| 1184 |
-
})
|
| 1185 |
-
|
| 1186 |
-
# Debug: Print barcode detection results
|
| 1187 |
-
print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
|
| 1188 |
-
print(f"Raw codes_a: {len(codes_a)} items")
|
| 1189 |
-
print(f"Raw codes_b: {len(codes_b)} items")
|
| 1190 |
-
if codes_a:
|
| 1191 |
-
print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
|
| 1192 |
-
if codes_b:
|
| 1193 |
-
print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
|
| 1194 |
-
except Exception as e:
|
| 1195 |
-
print(f"Barcode detection error: {e}")
|
| 1196 |
-
bar_a, info_a = [], []
|
| 1197 |
-
bar_b, info_b = [], []
|
| 1198 |
|
| 1199 |
# Always enable CMYK analysis
|
| 1200 |
cmyk_entries = compute_cmyk_diffs(a, b, red_boxes)
|
|
@@ -1325,8 +1272,7 @@ def _binarize(pil_img: Image.Image) -> Image.Image:
|
|
| 1325 |
|
| 1326 |
def _decode_once(img: Image.Image):
|
| 1327 |
"""Single decode attempt with common barcode symbols"""
|
| 1328 |
-
if
|
| 1329 |
-
return []
|
| 1330 |
syms = [ZBarSymbol.QRCODE, ZBarSymbol.EAN13, ZBarSymbol.EAN8, ZBarSymbol.UPCA, ZBarSymbol.CODE128]
|
| 1331 |
return zbar_decode(img, symbols=syms)
|
| 1332 |
|
|
@@ -1344,8 +1290,8 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
|
|
| 1344 |
Usage:
|
| 1345 |
debug_scan_pdf("your.pdf", outdir="barcode_debug", max_pages=2)
|
| 1346 |
"""
|
| 1347 |
-
if not
|
| 1348 |
-
print("ERROR: Missing
|
| 1349 |
return
|
| 1350 |
|
| 1351 |
os.makedirs(outdir, exist_ok=True)
|
|
|
|
| 60 |
HAS_BARCODE = False
|
| 61 |
print(f"✗ Barcode reader import failed: {e}")
|
| 62 |
|
| 63 |
+
# Enable barcode detection if we have ZXing-CPP or pyzbar
|
| 64 |
+
if HAS_ZXING:
|
| 65 |
+
HAS_BARCODE = True
|
| 66 |
+
print("✓ Barcode detection enabled via ZXing-CPP")
|
| 67 |
+
elif HAS_BARCODE:
|
| 68 |
+
print("✓ Barcode detection enabled via pyzbar")
|
| 69 |
+
else:
|
| 70 |
+
print("✗ No barcode detection available")
|
| 71 |
+
|
| 72 |
# -------------------- Core Data --------------------
|
| 73 |
@dataclass
|
| 74 |
class Box:
|
|
|
|
| 1140 |
print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
|
| 1141 |
|
| 1142 |
# Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
|
| 1143 |
+
bar_a, info_a = find_barcode_boxes_and_info_from_pdf(file_a.name, image_size=image_size)
|
| 1144 |
+
bar_b, info_b = find_barcode_boxes_and_info_from_pdf(file_b.name, image_size=image_size)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1145 |
|
| 1146 |
# Always enable CMYK analysis
|
| 1147 |
cmyk_entries = compute_cmyk_diffs(a, b, red_boxes)
|
|
|
|
| 1272 |
|
| 1273 |
def _decode_once(img: Image.Image):
|
| 1274 |
"""Single decode attempt with common barcode symbols"""
|
| 1275 |
+
# Only use pyzbar if available, otherwise rely on ZXing-CPP
|
|
|
|
| 1276 |
syms = [ZBarSymbol.QRCODE, ZBarSymbol.EAN13, ZBarSymbol.EAN8, ZBarSymbol.UPCA, ZBarSymbol.CODE128]
|
| 1277 |
return zbar_decode(img, symbols=syms)
|
| 1278 |
|
|
|
|
| 1290 |
Usage:
|
| 1291 |
debug_scan_pdf("your.pdf", outdir="barcode_debug", max_pages=2)
|
| 1292 |
"""
|
| 1293 |
+
if not HAS_PYMUPDF:
|
| 1294 |
+
print("ERROR: Missing PyMuPDF dependency")
|
| 1295 |
return
|
| 1296 |
|
| 1297 |
os.makedirs(outdir, exist_ok=True)
|