Spaces:
Sleeping
Sleeping
Update InitialMarkups.py
Browse files- InitialMarkups.py +42 -38
InitialMarkups.py
CHANGED
|
@@ -527,44 +527,41 @@ def print_tree_with_numbers(headers, indent=0):
|
|
| 527 |
f"(Level {header['level']}, p:{header['page']+1}, {size_info})")
|
| 528 |
print_tree_with_numbers(header["children"], indent + 1)
|
| 529 |
|
| 530 |
-
def highlight_boxes(doc, highlights):
|
| 531 |
-
for page_num, bbox in highlights.items():
|
| 532 |
|
|
|
|
|
|
|
| 533 |
page = doc.load_page(page_num)
|
| 534 |
page_width = page.rect.width
|
| 535 |
-
|
| 536 |
-
# Get
|
| 537 |
orig_rect = fitz.Rect(bbox)
|
| 538 |
-
rect_width = orig_rect.width
|
| 539 |
rect_height = orig_rect.height
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
)
|
| 567 |
-
annot1.update()
|
| 568 |
|
| 569 |
def get_leaf_headers_with_paths(listtoloop, path=None, output=None):
|
| 570 |
if path is None:
|
|
@@ -742,7 +739,10 @@ def extract_section_under_header(pdf_path):
|
|
| 742 |
if combined_line_norm and combined_line_norm in paths[-2]:
|
| 743 |
print(combined_line_norm)
|
| 744 |
headertoContinue2 = combined_line_norm
|
| 745 |
-
|
|
|
|
|
|
|
|
|
|
| 746 |
# Optimized header matching
|
| 747 |
existsfull = (
|
| 748 |
( combined_line_norm in allchildrenheaders_set or
|
|
@@ -839,7 +839,7 @@ def extract_section_under_header(pdf_path):
|
|
| 839 |
"Author": "ADR",
|
| 840 |
"Creation Date": formatted_time,
|
| 841 |
"Layer": "Initial",
|
| 842 |
-
"Code":
|
| 843 |
"head above 1": paths[-2],
|
| 844 |
"head above 2": paths[0]
|
| 845 |
}
|
|
@@ -935,7 +935,7 @@ def extract_section_under_header(pdf_path):
|
|
| 935 |
"Author": "ADR",
|
| 936 |
"Creation Date": formatted_time,
|
| 937 |
"Layer": "Initial",
|
| 938 |
-
"Code":
|
| 939 |
"head above 1": paths[-2],
|
| 940 |
"head above 2": paths[0]
|
| 941 |
}
|
|
@@ -976,7 +976,7 @@ def extract_section_under_header(pdf_path):
|
|
| 976 |
for page_num, bbox in current_bbox.items():
|
| 977 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 978 |
page_highlights[page_num] = bbox
|
| 979 |
-
highlight_boxes(docHighlights, page_highlights)
|
| 980 |
|
| 981 |
break_collecting = True
|
| 982 |
break
|
|
@@ -1012,7 +1012,11 @@ def extract_section_under_header(pdf_path):
|
|
| 1012 |
for page_num, bbox in current_bbox.items():
|
| 1013 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 1014 |
page_highlights[page_num] = bbox
|
| 1015 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1016 |
|
| 1017 |
# docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
|
| 1018 |
|
|
|
|
| 527 |
f"(Level {header['level']}, p:{header['page']+1}, {size_info})")
|
| 528 |
print_tree_with_numbers(header["children"], indent + 1)
|
| 529 |
|
|
|
|
|
|
|
| 530 |
|
| 531 |
+
def highlight_boxes(doc, highlights, stringtowrite, fixed_width=500): # Set your desired width here
|
| 532 |
+
for page_num, bbox in highlights.items():
|
| 533 |
page = doc.load_page(page_num)
|
| 534 |
page_width = page.rect.width
|
| 535 |
+
|
| 536 |
+
# Get original rect for vertical coordinates
|
| 537 |
orig_rect = fitz.Rect(bbox)
|
|
|
|
| 538 |
rect_height = orig_rect.height
|
| 539 |
+
|
| 540 |
+
if orig_rect.width > 10:
|
| 541 |
+
# Center horizontally using fixed width
|
| 542 |
+
center_x = page_width / 2
|
| 543 |
+
new_x0 = center_x - fixed_width / 2
|
| 544 |
+
new_x1 = center_x + fixed_width / 2
|
| 545 |
+
new_rect = fitz.Rect(new_x0, orig_rect.y0, new_x1, orig_rect.y1)
|
| 546 |
+
|
| 547 |
+
# Add highlight rectangle
|
| 548 |
+
annot = page.add_rect_annot(new_rect)
|
| 549 |
+
annot.set_colors(stroke=(1, 1, 0), fill=(1, 1, 0))
|
| 550 |
+
annot.set_opacity(0.3)
|
| 551 |
+
annot.update()
|
| 552 |
+
|
| 553 |
+
# Add right-aligned freetext annotation inside the fixed-width box
|
| 554 |
+
text = '['+stringtowrite +']'
|
| 555 |
+
annot1 = page.add_freetext_annot(
|
| 556 |
+
new_rect,
|
| 557 |
+
text,
|
| 558 |
+
fontsize=15,
|
| 559 |
+
fontname='helv',
|
| 560 |
+
text_color=(1, 0, 0),
|
| 561 |
+
rotate=page.rotation,
|
| 562 |
+
align=2 # right alignment
|
| 563 |
+
)
|
| 564 |
+
annot1.update()
|
|
|
|
|
|
|
| 565 |
|
| 566 |
def get_leaf_headers_with_paths(listtoloop, path=None, output=None):
|
| 567 |
if path is None:
|
|
|
|
| 739 |
if combined_line_norm and combined_line_norm in paths[-2]:
|
| 740 |
print(combined_line_norm)
|
| 741 |
headertoContinue2 = combined_line_norm
|
| 742 |
+
if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
|
| 743 |
+
stringtowrite='Not to be billed'
|
| 744 |
+
else:
|
| 745 |
+
stringtowrite='To be billed'
|
| 746 |
# Optimized header matching
|
| 747 |
existsfull = (
|
| 748 |
( combined_line_norm in allchildrenheaders_set or
|
|
|
|
| 839 |
"Author": "ADR",
|
| 840 |
"Creation Date": formatted_time,
|
| 841 |
"Layer": "Initial",
|
| 842 |
+
"Code": stringtowrite,
|
| 843 |
"head above 1": paths[-2],
|
| 844 |
"head above 2": paths[0]
|
| 845 |
}
|
|
|
|
| 935 |
"Author": "ADR",
|
| 936 |
"Creation Date": formatted_time,
|
| 937 |
"Layer": "Initial",
|
| 938 |
+
"Code": stringtowrite,
|
| 939 |
"head above 1": paths[-2],
|
| 940 |
"head above 2": paths[0]
|
| 941 |
}
|
|
|
|
| 976 |
for page_num, bbox in current_bbox.items():
|
| 977 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 978 |
page_highlights[page_num] = bbox
|
| 979 |
+
highlight_boxes(docHighlights, page_highlights,stringtowrite)
|
| 980 |
|
| 981 |
break_collecting = True
|
| 982 |
break
|
|
|
|
| 1012 |
for page_num, bbox in current_bbox.items():
|
| 1013 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 1014 |
page_highlights[page_num] = bbox
|
| 1015 |
+
if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
|
| 1016 |
+
stringtowrite='Not to be billed'
|
| 1017 |
+
else:
|
| 1018 |
+
stringtowrite='To be billed'
|
| 1019 |
+
highlight_boxes(docHighlights, page_highlights,stringtowrite)
|
| 1020 |
|
| 1021 |
# docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
|
| 1022 |
|