Update Code_2_7.py
Browse files- Code_2_7.py +13 -0
Code_2_7.py
CHANGED
|
@@ -1753,7 +1753,20 @@ def remove_duplicate_annotations(pdf_path, threshold):
|
|
| 1753 |
return output_pdf_io.read()
|
| 1754 |
|
| 1755 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1756 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1757 |
def clean_annotations(annotations_data, threshold):
|
| 1758 |
"""
|
| 1759 |
Remove “nearby” duplicates from annotations_data,
|
|
|
|
| 1753 |
return output_pdf_io.read()
|
| 1754 |
|
| 1755 |
|
| 1756 |
+
def rect_distance(r1, r2):
|
| 1757 |
+
"""Euclidean distance between rect centers."""
|
| 1758 |
+
if not r1 or not r2:
|
| 1759 |
+
return float('inf')
|
| 1760 |
+
cx1, cy1 = (r1[0]+r1[2])/2, (r1[1]+r1[3])/2
|
| 1761 |
+
cx2, cy2 = (r2[0]+r2[2])/2, (r2[1]+r2[3])/2
|
| 1762 |
+
return math.hypot(cx2-cx1, cy2-cy1)
|
| 1763 |
|
| 1764 |
+
def group_rect(verts):
|
| 1765 |
+
"""Turn [[x,y],…] into (x_min, y_min, x_max, y_max)."""
|
| 1766 |
+
xs = [x for x,_ in verts]
|
| 1767 |
+
ys = [y for _,y in verts]
|
| 1768 |
+
return (min(xs), min(ys), max(xs), max(ys)) if verts else None
|
| 1769 |
+
|
| 1770 |
def clean_annotations(annotations_data, threshold):
|
| 1771 |
"""
|
| 1772 |
Remove “nearby” duplicates from annotations_data,
|