Yaz Hobooti
commited on
Commit
ยท
b90d6b5
1
Parent(s):
35d19c1
Add missing wrapper find_barcode_boxes_and_info_from_pdf delegating to scan_pdf_barcodes
Browse files- pdf_comparator.py +14 -6
pdf_comparator.py
CHANGED
|
@@ -370,7 +370,7 @@ def _contains_validation_text(text: str) -> bool:
|
|
| 370 |
"""Check if text contains the validation text '50 Carroll'"""
|
| 371 |
return "50 Carroll" in text
|
| 372 |
|
| 373 |
-
def load_pdf_pages(path: str, dpi: int =
|
| 374 |
"""Load PDF pages as images with fallback options"""
|
| 375 |
if not _is_pdf(path):
|
| 376 |
return [Image.open(path).convert("RGB")]
|
|
@@ -664,7 +664,7 @@ def find_misspell_boxes_from_text(
|
|
| 664 |
y1 = int(bbox[1] * scale_y) + (page_num * img_height)
|
| 665 |
x2 = int(bbox[2] * scale_x)
|
| 666 |
y2 = int(bbox[3] * scale_y) + (page_num * img_height)
|
| 667 |
-
|
| 668 |
x1 = int(bbox[0])
|
| 669 |
y1 = int(bbox[1]) + (page_num * 1000)
|
| 670 |
x2 = int(bbox[2])
|
|
@@ -1027,8 +1027,8 @@ def compare_pdfs(file_a, file_b):
|
|
| 1027 |
return None, None, None, "โ Please upload both PDF files to compare", [], []
|
| 1028 |
|
| 1029 |
# Load images with multiple pages support
|
| 1030 |
-
pages_a = load_pdf_pages(file_a.name, dpi=
|
| 1031 |
-
pages_b = load_pdf_pages(file_b.name, dpi=
|
| 1032 |
|
| 1033 |
# Combine pages into single images for comparison
|
| 1034 |
a = combine_pages_vertically(pages_a)
|
|
@@ -1125,7 +1125,7 @@ def create_demo():
|
|
| 1125 |
# ๐ Advanced PDF Comparison Tool
|
| 1126 |
|
| 1127 |
Upload two PDF files to get comprehensive analysis including:
|
| 1128 |
-
- **Multi-page PDF support** (up to
|
| 1129 |
- **Visual differences** with bounding boxes
|
| 1130 |
- **OCR and spell checking**
|
| 1131 |
- **Barcode/QR code detection**
|
|
@@ -1262,13 +1262,21 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
|
|
| 1262 |
rr = _decode_once(pil) or _decode_once(_binarize(pil))
|
| 1263 |
if rr:
|
| 1264 |
print(f" Embedded image {ix+1}: {[(r.type, r.data) for r in rr]}")
|
| 1265 |
-
|
| 1266 |
print(" Embedded image error:", e)
|
| 1267 |
|
| 1268 |
doc.close()
|
| 1269 |
print(f"\nDebug images saved to: {outdir}/")
|
| 1270 |
print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
|
| 1271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1272 |
if __name__ == "__main__":
|
| 1273 |
demo = create_demo()
|
| 1274 |
demo.launch(
|
|
|
|
| 370 |
"""Check if text contains the validation text '50 Carroll'"""
|
| 371 |
return "50 Carroll" in text
|
| 372 |
|
| 373 |
+
def load_pdf_pages(path: str, dpi: int = 600, max_pages: int = 15) -> List[Image.Image]:
|
| 374 |
"""Load PDF pages as images with fallback options"""
|
| 375 |
if not _is_pdf(path):
|
| 376 |
return [Image.open(path).convert("RGB")]
|
|
|
|
| 664 |
y1 = int(bbox[1] * scale_y) + (page_num * img_height)
|
| 665 |
x2 = int(bbox[2] * scale_x)
|
| 666 |
y2 = int(bbox[3] * scale_y) + (page_num * img_height)
|
| 667 |
+
else:
|
| 668 |
x1 = int(bbox[0])
|
| 669 |
y1 = int(bbox[1]) + (page_num * 1000)
|
| 670 |
x2 = int(bbox[2])
|
|
|
|
| 1027 |
return None, None, None, "โ Please upload both PDF files to compare", [], []
|
| 1028 |
|
| 1029 |
# Load images with multiple pages support
|
| 1030 |
+
pages_a = load_pdf_pages(file_a.name, dpi=600, max_pages=15)
|
| 1031 |
+
pages_b = load_pdf_pages(file_b.name, dpi=600, max_pages=15)
|
| 1032 |
|
| 1033 |
# Combine pages into single images for comparison
|
| 1034 |
a = combine_pages_vertically(pages_a)
|
|
|
|
| 1125 |
# ๐ Advanced PDF Comparison Tool
|
| 1126 |
|
| 1127 |
Upload two PDF files to get comprehensive analysis including:
|
| 1128 |
+
- **Multi-page PDF support** (up to 15 pages per document)
|
| 1129 |
- **Visual differences** with bounding boxes
|
| 1130 |
- **OCR and spell checking**
|
| 1131 |
- **Barcode/QR code detection**
|
|
|
|
| 1262 |
rr = _decode_once(pil) or _decode_once(_binarize(pil))
|
| 1263 |
if rr:
|
| 1264 |
print(f" Embedded image {ix+1}: {[(r.type, r.data) for r in rr]}")
|
| 1265 |
+
except Exception as e:
|
| 1266 |
print(" Embedded image error:", e)
|
| 1267 |
|
| 1268 |
doc.close()
|
| 1269 |
print(f"\nDebug images saved to: {outdir}/")
|
| 1270 |
print("Open the PNGs and zoom in to check bar width. If narrow bars are <2px at 600 DPI, you need 900-1200 DPI.")
|
| 1271 |
|
| 1272 |
+
def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tuple[int, int]] = None, max_pages: int = 10):
|
| 1273 |
+
"""Compatibility wrapper expected by callers.
|
| 1274 |
+
Delegates to scan_pdf_barcodes; image_size is unused here but
|
| 1275 |
+
kept to match previous signature.
|
| 1276 |
+
Returns (boxes, infos).
|
| 1277 |
+
"""
|
| 1278 |
+
return scan_pdf_barcodes(pdf_path, max_pages=max_pages)
|
| 1279 |
+
|
| 1280 |
if __name__ == "__main__":
|
| 1281 |
demo = create_demo()
|
| 1282 |
demo.launch(
|