Yaz Hobooti commited on
Commit
7981251
·
1 Parent(s): a75a85f

Fix: revert to working version with proper indentation

Browse files
Files changed (1) hide show
  1. pdf_comparator.py +8 -74
pdf_comparator.py CHANGED
@@ -679,9 +679,6 @@ def find_misspell_boxes_from_text(
679
  img_height = image_size[1]
680
  if _is_in_excluded_bottom_area(box, img_height) and not _contains_validation_text(text):
681
  continue
682
- else:
683
- if _is_in_excluded_bottom_area(box, ph):
684
- continue
685
 
686
  boxes.append(box)
687
 
@@ -791,11 +788,7 @@ def find_misspell_boxes(
791
  continue
792
 
793
  # NOTE: adjust to match your Box constructor if needed
794
- b = Box(top, left, top + height, left + width, width * height)
795
- # Exclude bottom 115mm unless the text contains the validation phrase
796
- if _is_in_excluded_bottom_area(b, img.height) and not _contains_validation_text(raw):
797
- continue
798
- boxes.append(b)
799
 
800
  return boxes
801
 
@@ -985,11 +978,7 @@ def scan_pdf_barcodes(pdf_path: str, *, dpi_list=(900,1200), max_pages=10):
985
  pil=_pix_to_pil(pix)
986
  hits=_decode_variants(pil)
987
  for r in hits:
988
- b = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
989
- # Exclude barcodes in the bottom 115mm of the page image
990
- if _is_in_excluded_bottom_area(b, pil.height):
991
- continue
992
- boxes.append(b)
993
  sym, payload = r["type"], r["data"]
994
  infos.append({**r, "valid": _validate(sym, payload), "page": page_idx+1, "source": f"embed:{ix+1}"})
995
  except Exception:
@@ -1005,10 +994,7 @@ def scan_pdf_barcodes(pdf_path: str, *, dpi_list=(900,1200), max_pages=10):
1005
  pil=_pix_to_pil(pix)
1006
  hits=_decode_variants(pil)
1007
  for r in hits:
1008
- b = Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"])
1009
- if _is_in_excluded_bottom_area(b, pil.height):
1010
- continue
1011
- boxes.append(b)
1012
  sym, payload = r["type"], r["data"]
1013
  infos.append({**r, "valid": _validate(sym, payload), "page": page_idx+1, "source": f"page@{dpi}dpi"})
1014
  if any(i["page"]==page_idx+1 for i in infos):
@@ -1314,64 +1300,12 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
1314
  print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
1315
 
1316
  def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
1317
- """Detect barcodes from the original PDF and return boxes in the same
1318
- coordinate space as the combined display image.
1319
-
1320
- If image_size is provided (w,h of the vertically combined display image),
1321
- each page is rendered so its width matches w, then decoded. Box y-coordinates
1322
- are offset by the cumulative height of previous pages so that all boxes map
1323
- into the combined image space correctly.
1324
  """
1325
- boxes: List[Box] = []
1326
- infos: List[Dict[str, Any]] = []
1327
- try:
1328
- doc = fitz.open(pdf_path)
1329
- num_pages = min(len(doc), max_pages)
1330
- if num_pages == 0:
1331
- return [], []
1332
-
1333
- target_width = None
1334
- if image_size:
1335
- target_width = int(image_size[0])
1336
-
1337
- y_offset = 0
1338
- for page_idx in range(num_pages):
1339
- page = doc[page_idx]
1340
- # Compute scale so that rendered width matches target_width when provided
1341
- if target_width:
1342
- page_width_pts = float(page.rect.width) # points (72 dpi)
1343
- scale = max(1.0, target_width / page_width_pts)
1344
- else:
1345
- # fallback dpi ~600
1346
- scale = 600.0 / 72.0
1347
- try:
1348
- pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
1349
- except TypeError:
1350
- pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
1351
- pil = _pix_to_pil(pix)
1352
- pw, ph = pil.size
1353
- hits = _decode_variants(pil)
1354
- for r in hits:
1355
- x1 = int(r.get("left", 0))
1356
- y1 = int(r.get("top", 0)) + y_offset
1357
- w = int(r.get("width", 0))
1358
- h = int(r.get("height", 0))
1359
- x2 = x1 + w
1360
- y2 = y1 + h
1361
- b = Box(y1, x1, y2, x2, w * h)
1362
- # Exclude bottom 115mm for combined image if we know full height; else per-page
1363
- if image_size and _is_in_excluded_bottom_area(b, image_size[1]):
1364
- continue
1365
- if not image_size and _is_in_excluded_bottom_area(b, ph):
1366
- continue
1367
- boxes.append(b)
1368
- sym, payload = r.get("type", ""), r.get("data", "")
1369
- infos.append({**r, "valid": _validate(sym, payload), "page": page_idx + 1, "source": f"page@scale{scale:.2f}"})
1370
- y_offset += ph
1371
- doc.close()
1372
- except Exception:
1373
- return [], []
1374
- return boxes, infos
1375
 
1376
  if __name__ == "__main__":
1377
  demo = create_demo()
 
679
  img_height = image_size[1]
680
  if _is_in_excluded_bottom_area(box, img_height) and not _contains_validation_text(text):
681
  continue
 
 
 
682
 
683
  boxes.append(box)
684
 
 
788
  continue
789
 
790
  # NOTE: adjust to match your Box constructor if needed
791
+ boxes.append(Box(top, left, top + height, left + width, width * height))
 
 
 
 
792
 
793
  return boxes
794
 
 
978
  pil=_pix_to_pil(pix)
979
  hits=_decode_variants(pil)
980
  for r in hits:
981
+ boxes.append(Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"]))
 
 
 
 
982
  sym, payload = r["type"], r["data"]
983
  infos.append({**r, "valid": _validate(sym, payload), "page": page_idx+1, "source": f"embed:{ix+1}"})
984
  except Exception:
 
994
  pil=_pix_to_pil(pix)
995
  hits=_decode_variants(pil)
996
  for r in hits:
997
+ boxes.append(Box(r["top"], r["left"], r["top"]+r["height"], r["left"]+r["width"], r["width"]*r["height"]))
 
 
 
998
  sym, payload = r["type"], r["data"]
999
  infos.append({**r, "valid": _validate(sym, payload), "page": page_idx+1, "source": f"page@{dpi}dpi"})
1000
  if any(i["page"]==page_idx+1 for i in infos):
 
1300
  print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
1301
 
1302
  def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
1303
+ """Compatibility wrapper expected by callers.
1304
+ Delegates to scan_pdf_barcodes; image_size is unused here but
1305
+ kept to match previous signature.
1306
+ Returns (boxes, infos).
 
 
 
1307
  """
1308
+ return scan_pdf_barcodes(pdf_path, max_pages=max_pages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1309
 
1310
  if __name__ == "__main__":
1311
  demo = create_demo()