Marthee commited on
Commit
523a76f
·
verified ·
1 Parent(s): 39e340e

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +14 -7
InitialMarkups.py CHANGED
@@ -528,7 +528,7 @@ def print_tree_with_numbers(headers, indent=0):
528
  print_tree_with_numbers(header["children"], indent + 1)
529
 
530
 
531
- def highlight_boxes(doc, highlights, fixed_width=500): # Set your desired width here
532
  for page_num, bbox in highlights.items():
533
  page = doc.load_page(page_num)
534
  page_width = page.rect.width
@@ -551,7 +551,7 @@ def highlight_boxes(doc, highlights, fixed_width=500): # Set your desired width
551
  annot.update()
552
 
553
  # Add right-aligned freetext annotation inside the fixed-width box
554
- text = "[To be billed]"
555
  annot1 = page.add_freetext_annot(
556
  new_rect,
557
  text,
@@ -739,7 +739,10 @@ def extract_section_under_header(pdf_path):
739
  if combined_line_norm and combined_line_norm in paths[-2]:
740
  print(combined_line_norm)
741
  headertoContinue2 = combined_line_norm
742
-
 
 
 
743
  # Optimized header matching
744
  existsfull = (
745
  ( combined_line_norm in allchildrenheaders_set or
@@ -836,7 +839,7 @@ def extract_section_under_header(pdf_path):
836
  "Author": "ADR",
837
  "Creation Date": formatted_time,
838
  "Layer": "Initial",
839
- "Code": "to be added",
840
  "head above 1": paths[-2],
841
  "head above 2": paths[0]
842
  }
@@ -932,7 +935,7 @@ def extract_section_under_header(pdf_path):
932
  "Author": "ADR",
933
  "Creation Date": formatted_time,
934
  "Layer": "Initial",
935
- "Code": "to be added",
936
  "head above 1": paths[-2],
937
  "head above 2": paths[0]
938
  }
@@ -973,7 +976,7 @@ def extract_section_under_header(pdf_path):
973
  for page_num, bbox in current_bbox.items():
974
  bbox[3] = last_y1s.get(page_num, bbox[3])
975
  page_highlights[page_num] = bbox
976
- highlight_boxes(docHighlights, page_highlights)
977
 
978
  break_collecting = True
979
  break
@@ -1009,7 +1012,11 @@ def extract_section_under_header(pdf_path):
1009
  for page_num, bbox in current_bbox.items():
1010
  bbox[3] = last_y1s.get(page_num, bbox[3])
1011
  page_highlights[page_num] = bbox
1012
- highlight_boxes(docHighlights, page_highlights)
 
 
 
 
1013
 
1014
  # docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
1015
 
 
528
  print_tree_with_numbers(header["children"], indent + 1)
529
 
530
 
531
+ def highlight_boxes(doc, highlights, stringtowrite, fixed_width=500): # Set your desired width here
532
  for page_num, bbox in highlights.items():
533
  page = doc.load_page(page_num)
534
  page_width = page.rect.width
 
551
  annot.update()
552
 
553
  # Add right-aligned freetext annotation inside the fixed-width box
554
+ text = '['+stringtowrite +']'
555
  annot1 = page.add_freetext_annot(
556
  new_rect,
557
  text,
 
739
  if combined_line_norm and combined_line_norm in paths[-2]:
740
  print(combined_line_norm)
741
  headertoContinue2 = combined_line_norm
742
+ if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
743
+ stringtowrite='Not to be billed'
744
+ else:
745
+ stringtowrite='To be billed'
746
  # Optimized header matching
747
  existsfull = (
748
  ( combined_line_norm in allchildrenheaders_set or
 
839
  "Author": "ADR",
840
  "Creation Date": formatted_time,
841
  "Layer": "Initial",
842
+ "Code": stringtowrite,
843
  "head above 1": paths[-2],
844
  "head above 2": paths[0]
845
  }
 
935
  "Author": "ADR",
936
  "Creation Date": formatted_time,
937
  "Layer": "Initial",
938
+ "Code": stringtowrite,
939
  "head above 1": paths[-2],
940
  "head above 2": paths[0]
941
  }
 
976
  for page_num, bbox in current_bbox.items():
977
  bbox[3] = last_y1s.get(page_num, bbox[3])
978
  page_highlights[page_num] = bbox
979
+ highlight_boxes(docHighlights, page_highlights,stringtowrite)
980
 
981
  break_collecting = True
982
  break
 
1012
  for page_num, bbox in current_bbox.items():
1013
  bbox[3] = last_y1s.get(page_num, bbox[3])
1014
  page_highlights[page_num] = bbox
1015
+ if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
1016
+ stringtowrite='Not to be billed'
1017
+ else:
1018
+ stringtowrite='To be billed'
1019
+ highlight_boxes(docHighlights, page_highlights,stringtowrite)
1020
 
1021
  # docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
1022