Yaz Hobooti
commited on
Commit
·
69863ad
1
Parent(s):
df4689f
Fix indentation around box creation and align except block in debug scanner; file compiles
Browse files- pdf_comparator.py +3 -2
pdf_comparator.py
CHANGED
|
@@ -658,13 +658,14 @@ def find_misspell_boxes_from_text(
|
|
| 658 |
# Calculate coordinates
|
| 659 |
if image_size:
|
| 660 |
img_width, img_height = image_size
|
|
|
|
| 661 |
scale_x = img_width / pdf_width
|
| 662 |
scale_y = img_height / pdf_height
|
| 663 |
x1 = int(bbox[0] * scale_x)
|
| 664 |
y1 = int(bbox[1] * scale_y) + (page_num * img_height)
|
| 665 |
x2 = int(bbox[2] * scale_x)
|
| 666 |
y2 = int(bbox[3] * scale_y) + (page_num * img_height)
|
| 667 |
-
|
| 668 |
x1 = int(bbox[0])
|
| 669 |
y1 = int(bbox[1]) + (page_num * 1000)
|
| 670 |
x2 = int(bbox[2])
|
|
@@ -1262,7 +1263,7 @@ def debug_scan_pdf(pdf_path: str, outdir: str = "barcode_debug", max_pages=2):
|
|
| 1262 |
rr = _decode_once(pil) or _decode_once(_binarize(pil))
|
| 1263 |
if rr:
|
| 1264 |
print(f" Embedded image {ix+1}: {[(r.type, r.data) for r in rr]}")
|
| 1265 |
-
|
| 1266 |
print(" Embedded image error:", e)
|
| 1267 |
|
| 1268 |
doc.close()
|
|
|
|
| 658 |
# Calculate coordinates
|
| 659 |
if image_size:
|
| 660 |
img_width, img_height = image_size
|
| 661 |
+
# Convert PDF coordinates to image coordinates
|
| 662 |
scale_x = img_width / pdf_width
|
| 663 |
scale_y = img_height / pdf_height
|
| 664 |
x1 = int(bbox[0] * scale_x)
|
| 665 |
y1 = int(bbox[1] * scale_y) + (page_num * img_height)
|
| 666 |
x2 = int(bbox[2] * scale_x)
|
| 667 |
y2 = int(bbox[3] * scale_y) + (page_num * img_height)
|
| 668 |
+
else:
|
| 669 |
x1 = int(bbox[0])
|
| 670 |
y1 = int(bbox[1]) + (page_num * 1000)
|
| 671 |
x2 = int(bbox[2])
|
|
|
|
| 1263 |
rr = _decode_once(pil) or _decode_once(_binarize(pil))
|
| 1264 |
if rr:
|
| 1265 |
print(f" Embedded image {ix+1}: {[(r.type, r.data) for r in rr]}")
|
| 1266 |
+
except Exception as e:
|
| 1267 |
print(" Embedded image error:", e)
|
| 1268 |
|
| 1269 |
doc.close()
|