Yaz Hobooti commited on
Commit
bd3b9d5
·
1 Parent(s): 601e8bb

Implement ChatGPT patches: remove HAS_BARCODE gate, enable ZXing-CPP barcode detection

Browse files
Files changed (1) hide show
  1. app.py +14 -68
app.py CHANGED
@@ -60,6 +60,15 @@ except Exception as e:
60
  HAS_BARCODE = False
61
  print(f"✗ Barcode reader import failed: {e}")
62
 
 
 
 
 
 
 
 
 
 
63
  # -------------------- Core Data --------------------
64
  @dataclass
65
  class Box:
@@ -1131,70 +1140,8 @@ def compare_pdfs(file_a, file_b):
1131
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1132
 
1133
  # Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
1134
- try:
1135
- codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
1136
- codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
1137
-
1138
- # Convert to old format for compatibility
1139
- bar_a, info_a = [], []
1140
- bar_b, info_b = [], []
1141
-
1142
- for code in codes_a:
1143
- if "error" not in code:
1144
- # Create a simple box for visualization (center of polygon)
1145
- if "polygon" in code:
1146
- pts = np.array(code["polygon"])
1147
- x1, y1 = pts.min(axis=0)
1148
- x2, y2 = pts.max(axis=0)
1149
- box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1150
- bar_a.append(box)
1151
- info_a.append({
1152
- "type": code.get("type", ""),
1153
- "data": code.get("text", ""),
1154
- "left": int(x1),
1155
- "top": int(y1),
1156
- "width": int(x2-x1),
1157
- "height": int(y2-y1),
1158
- "valid": True,
1159
- "page": code.get("page", 0) + 1,
1160
- "source": code.get("source", ""),
1161
- "engine": code.get("engine", "")
1162
- })
1163
-
1164
- for code in codes_b:
1165
- if "error" not in code:
1166
- # Create a simple box for visualization (center of polygon)
1167
- if "polygon" in code:
1168
- pts = np.array(code["polygon"])
1169
- x1, y1 = pts.min(axis=0)
1170
- x2, y2 = pts.max(axis=0)
1171
- box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1172
- bar_b.append(box)
1173
- info_b.append({
1174
- "type": code.get("type", ""),
1175
- "data": code.get("text", ""),
1176
- "left": int(x1),
1177
- "top": int(y1),
1178
- "width": int(x2-x1),
1179
- "height": int(y2-y1),
1180
- "valid": True,
1181
- "page": code.get("page", 0) + 1,
1182
- "source": code.get("source", ""),
1183
- "engine": code.get("engine", "")
1184
- })
1185
-
1186
- # Debug: Print barcode detection results
1187
- print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
1188
- print(f"Raw codes_a: {len(codes_a)} items")
1189
- print(f"Raw codes_b: {len(codes_b)} items")
1190
- if codes_a:
1191
- print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
1192
- if codes_b:
1193
- print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
1194
- except Exception as e:
1195
- print(f"Barcode detection error: {e}")
1196
- bar_a, info_a = [], []
1197
- bar_b, info_b = [], []
1198
 
1199
  # Always enable CMYK analysis
1200
  cmyk_entries = compute_cmyk_diffs(a, b, red_boxes)
@@ -1325,8 +1272,7 @@ def _binarize(pil_img: Image.Image) -> Image.Image:
1325
 
1326
  def _decode_once(img: Image.Image):
1327
  """Single decode attempt with common barcode symbols"""
1328
- if not HAS_BARCODE:
1329
- return []
1330
  syms = [ZBarSymbol.QRCODE, ZBarSymbol.EAN13, ZBarSymbol.EAN8, ZBarSymbol.UPCA, ZBarSymbol.CODE128]
1331
  return zbar_decode(img, symbols=syms)
1332
 
@@ -1344,8 +1290,8 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
1344
  Usage:
1345
  debug_scan_pdf("your.pdf", outdir="barcode_debug", max_pages=2)
1346
  """
1347
- if not (HAS_BARCODE and HAS_PYMUPDF):
1348
- print("ERROR: Missing dependencies (pyzbar or PyMuPDF)")
1349
  return
1350
 
1351
  os.makedirs(outdir, exist_ok=True)
 
60
  HAS_BARCODE = False
61
  print(f"✗ Barcode reader import failed: {e}")
62
 
63
+ # Enable barcode detection if we have ZXing-CPP or pyzbar
64
+ if HAS_ZXING:
65
+ HAS_BARCODE = True
66
+ print("✓ Barcode detection enabled via ZXing-CPP")
67
+ elif HAS_BARCODE:
68
+ print("✓ Barcode detection enabled via pyzbar")
69
+ else:
70
+ print("✗ No barcode detection available")
71
+
72
  # -------------------- Core Data --------------------
73
  @dataclass
74
  class Box:
 
1140
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1141
 
1142
  # Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
1143
+ bar_a, info_a = find_barcode_boxes_and_info_from_pdf(file_a.name, image_size=image_size)
1144
+ bar_b, info_b = find_barcode_boxes_and_info_from_pdf(file_b.name, image_size=image_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1145
 
1146
  # Always enable CMYK analysis
1147
  cmyk_entries = compute_cmyk_diffs(a, b, red_boxes)
 
1272
 
1273
  def _decode_once(img: Image.Image):
1274
  """Single decode attempt with common barcode symbols"""
1275
+ # Only use pyzbar if available, otherwise rely on ZXing-CPP
 
1276
  syms = [ZBarSymbol.QRCODE, ZBarSymbol.EAN13, ZBarSymbol.EAN8, ZBarSymbol.UPCA, ZBarSymbol.CODE128]
1277
  return zbar_decode(img, symbols=syms)
1278
 
 
1290
  Usage:
1291
  debug_scan_pdf("your.pdf", outdir="barcode_debug", max_pages=2)
1292
  """
1293
+ if not HAS_PYMUPDF:
1294
+ print("ERROR: Missing PyMuPDF dependency")
1295
  return
1296
 
1297
  os.makedirs(outdir, exist_ok=True)