Spaces:
Paused
Paused
Update InitialMarkups.py
Browse files- InitialMarkups.py +14 -7
InitialMarkups.py
CHANGED
|
@@ -528,7 +528,7 @@ def print_tree_with_numbers(headers, indent=0):
|
|
| 528 |
print_tree_with_numbers(header["children"], indent + 1)
|
| 529 |
|
| 530 |
|
| 531 |
-
def highlight_boxes(doc, highlights, fixed_width=500): # Set your desired width here
|
| 532 |
for page_num, bbox in highlights.items():
|
| 533 |
page = doc.load_page(page_num)
|
| 534 |
page_width = page.rect.width
|
|
@@ -551,7 +551,7 @@ def highlight_boxes(doc, highlights, fixed_width=500): # Set your desired width
|
|
| 551 |
annot.update()
|
| 552 |
|
| 553 |
# Add right-aligned freetext annotation inside the fixed-width box
|
| 554 |
-
text =
|
| 555 |
annot1 = page.add_freetext_annot(
|
| 556 |
new_rect,
|
| 557 |
text,
|
|
@@ -739,7 +739,10 @@ def extract_section_under_header(pdf_path):
|
|
| 739 |
if combined_line_norm and combined_line_norm in paths[-2]:
|
| 740 |
print(combined_line_norm)
|
| 741 |
headertoContinue2 = combined_line_norm
|
| 742 |
-
|
|
|
|
|
|
|
|
|
|
| 743 |
# Optimized header matching
|
| 744 |
existsfull = (
|
| 745 |
( combined_line_norm in allchildrenheaders_set or
|
|
@@ -836,7 +839,7 @@ def extract_section_under_header(pdf_path):
|
|
| 836 |
"Author": "ADR",
|
| 837 |
"Creation Date": formatted_time,
|
| 838 |
"Layer": "Initial",
|
| 839 |
-
"Code":
|
| 840 |
"head above 1": paths[-2],
|
| 841 |
"head above 2": paths[0]
|
| 842 |
}
|
|
@@ -932,7 +935,7 @@ def extract_section_under_header(pdf_path):
|
|
| 932 |
"Author": "ADR",
|
| 933 |
"Creation Date": formatted_time,
|
| 934 |
"Layer": "Initial",
|
| 935 |
-
"Code":
|
| 936 |
"head above 1": paths[-2],
|
| 937 |
"head above 2": paths[0]
|
| 938 |
}
|
|
@@ -973,7 +976,7 @@ def extract_section_under_header(pdf_path):
|
|
| 973 |
for page_num, bbox in current_bbox.items():
|
| 974 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 975 |
page_highlights[page_num] = bbox
|
| 976 |
-
highlight_boxes(docHighlights, page_highlights)
|
| 977 |
|
| 978 |
break_collecting = True
|
| 979 |
break
|
|
@@ -1009,7 +1012,11 @@ def extract_section_under_header(pdf_path):
|
|
| 1009 |
for page_num, bbox in current_bbox.items():
|
| 1010 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 1011 |
page_highlights[page_num] = bbox
|
| 1012 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1013 |
|
| 1014 |
# docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
|
| 1015 |
|
|
|
|
| 528 |
print_tree_with_numbers(header["children"], indent + 1)
|
| 529 |
|
| 530 |
|
| 531 |
+
def highlight_boxes(doc, highlights, stringtowrite, fixed_width=500): # Set your desired width here
|
| 532 |
for page_num, bbox in highlights.items():
|
| 533 |
page = doc.load_page(page_num)
|
| 534 |
page_width = page.rect.width
|
|
|
|
| 551 |
annot.update()
|
| 552 |
|
| 553 |
# Add right-aligned freetext annotation inside the fixed-width box
|
| 554 |
+
text = '['+stringtowrite +']'
|
| 555 |
annot1 = page.add_freetext_annot(
|
| 556 |
new_rect,
|
| 557 |
text,
|
|
|
|
| 739 |
if combined_line_norm and combined_line_norm in paths[-2]:
|
| 740 |
print(combined_line_norm)
|
| 741 |
headertoContinue2 = combined_line_norm
|
| 742 |
+
if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
|
| 743 |
+
stringtowrite='Not to be billed'
|
| 744 |
+
else:
|
| 745 |
+
stringtowrite='To be billed'
|
| 746 |
# Optimized header matching
|
| 747 |
existsfull = (
|
| 748 |
( combined_line_norm in allchildrenheaders_set or
|
|
|
|
| 839 |
"Author": "ADR",
|
| 840 |
"Creation Date": formatted_time,
|
| 841 |
"Layer": "Initial",
|
| 842 |
+
"Code": stringtowrite,
|
| 843 |
"head above 1": paths[-2],
|
| 844 |
"head above 2": paths[0]
|
| 845 |
}
|
|
|
|
| 935 |
"Author": "ADR",
|
| 936 |
"Creation Date": formatted_time,
|
| 937 |
"Layer": "Initial",
|
| 938 |
+
"Code": stringtowrite,
|
| 939 |
"head above 1": paths[-2],
|
| 940 |
"head above 2": paths[0]
|
| 941 |
}
|
|
|
|
| 976 |
for page_num, bbox in current_bbox.items():
|
| 977 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 978 |
page_highlights[page_num] = bbox
|
| 979 |
+
highlight_boxes(docHighlights, page_highlights,stringtowrite)
|
| 980 |
|
| 981 |
break_collecting = True
|
| 982 |
break
|
|
|
|
| 1012 |
for page_num, bbox in current_bbox.items():
|
| 1013 |
bbox[3] = last_y1s.get(page_num, bbox[3])
|
| 1014 |
page_highlights[page_num] = bbox
|
| 1015 |
+
if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
|
| 1016 |
+
stringtowrite='Not to be billed'
|
| 1017 |
+
else:
|
| 1018 |
+
stringtowrite='To be billed'
|
| 1019 |
+
highlight_boxes(docHighlights, page_highlights,stringtowrite)
|
| 1020 |
|
| 1021 |
# docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
|
| 1022 |
|