Marthee commited on
Commit
e3d372c
·
verified ·
1 Parent(s): 03ac6aa

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +42 -38
InitialMarkups.py CHANGED
@@ -527,44 +527,41 @@ def print_tree_with_numbers(headers, indent=0):
527
  f"(Level {header['level']}, p:{header['page']+1}, {size_info})")
528
  print_tree_with_numbers(header["children"], indent + 1)
529
 
530
- def highlight_boxes(doc, highlights):
531
- for page_num, bbox in highlights.items():
532
 
 
 
533
  page = doc.load_page(page_num)
534
  page_width = page.rect.width
535
- rect = fitz.Rect(bbox)
536
- # Get the original bounding box
537
  orig_rect = fitz.Rect(bbox)
538
- rect_width = orig_rect.width
539
  rect_height = orig_rect.height
540
- if rect_width>10:
541
- annot = page.add_rect_annot(rect)
542
-
543
- annot.set_colors(stroke=(1,1,0), fill=(1,1,0))
544
- annot.set_opacity(0.3)
545
- annot.update()
546
-
547
-
548
- # Calculate new x coordinates so the rect is centered on the page width
549
- center_x = page_width / 2
550
- new_x0 = center_x - rect_width / 2
551
- new_x1 = center_x + rect_width / 2
552
-
553
- # Create new rect centered on the page's x-center, preserving y-coordinates
554
- new_rect = fitz.Rect(new_x0, orig_rect.y0, new_x1, orig_rect.y1)
555
-
556
- # Add centered text annotation
557
- text = "[To be billed]"
558
- annot1 = page.add_freetext_annot(
559
- new_rect,
560
- text,
561
- fontsize=15,
562
- fontname='helv',
563
- text_color=(1, 0, 0),
564
- rotate=page.rotation,
565
- align=1 # centered alignment
566
- )
567
- annot1.update()
568
 
569
  def get_leaf_headers_with_paths(listtoloop, path=None, output=None):
570
  if path is None:
@@ -742,7 +739,10 @@ def extract_section_under_header(pdf_path):
742
  if combined_line_norm and combined_line_norm in paths[-2]:
743
  print(combined_line_norm)
744
  headertoContinue2 = combined_line_norm
745
-
 
 
 
746
  # Optimized header matching
747
  existsfull = (
748
  ( combined_line_norm in allchildrenheaders_set or
@@ -839,7 +839,7 @@ def extract_section_under_header(pdf_path):
839
  "Author": "ADR",
840
  "Creation Date": formatted_time,
841
  "Layer": "Initial",
842
- "Code": "to be added",
843
  "head above 1": paths[-2],
844
  "head above 2": paths[0]
845
  }
@@ -935,7 +935,7 @@ def extract_section_under_header(pdf_path):
935
  "Author": "ADR",
936
  "Creation Date": formatted_time,
937
  "Layer": "Initial",
938
- "Code": "to be added",
939
  "head above 1": paths[-2],
940
  "head above 2": paths[0]
941
  }
@@ -976,7 +976,7 @@ def extract_section_under_header(pdf_path):
976
  for page_num, bbox in current_bbox.items():
977
  bbox[3] = last_y1s.get(page_num, bbox[3])
978
  page_highlights[page_num] = bbox
979
- highlight_boxes(docHighlights, page_highlights)
980
 
981
  break_collecting = True
982
  break
@@ -1012,7 +1012,11 @@ def extract_section_under_header(pdf_path):
1012
  for page_num, bbox in current_bbox.items():
1013
  bbox[3] = last_y1s.get(page_num, bbox[3])
1014
  page_highlights[page_num] = bbox
1015
- highlight_boxes(docHighlights, page_highlights)
 
 
 
 
1016
 
1017
  # docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
1018
 
 
527
  f"(Level {header['level']}, p:{header['page']+1}, {size_info})")
528
  print_tree_with_numbers(header["children"], indent + 1)
529
 
 
 
530
 
531
+ def highlight_boxes(doc, highlights, stringtowrite, fixed_width=500): # Set your desired width here
532
+ for page_num, bbox in highlights.items():
533
  page = doc.load_page(page_num)
534
  page_width = page.rect.width
535
+
536
+ # Get original rect for vertical coordinates
537
  orig_rect = fitz.Rect(bbox)
 
538
  rect_height = orig_rect.height
539
+
540
+ if orig_rect.width > 10:
541
+ # Center horizontally using fixed width
542
+ center_x = page_width / 2
543
+ new_x0 = center_x - fixed_width / 2
544
+ new_x1 = center_x + fixed_width / 2
545
+ new_rect = fitz.Rect(new_x0, orig_rect.y0, new_x1, orig_rect.y1)
546
+
547
+ # Add highlight rectangle
548
+ annot = page.add_rect_annot(new_rect)
549
+ annot.set_colors(stroke=(1, 1, 0), fill=(1, 1, 0))
550
+ annot.set_opacity(0.3)
551
+ annot.update()
552
+
553
+ # Add right-aligned freetext annotation inside the fixed-width box
554
+ text = '['+stringtowrite +']'
555
+ annot1 = page.add_freetext_annot(
556
+ new_rect,
557
+ text,
558
+ fontsize=15,
559
+ fontname='helv',
560
+ text_color=(1, 0, 0),
561
+ rotate=page.rotation,
562
+ align=2 # right alignment
563
+ )
564
+ annot1.update()
 
 
565
 
566
  def get_leaf_headers_with_paths(listtoloop, path=None, output=None):
567
  if path is None:
 
739
  if combined_line_norm and combined_line_norm in paths[-2]:
740
  print(combined_line_norm)
741
  headertoContinue2 = combined_line_norm
742
+ if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
743
+ stringtowrite='Not to be billed'
744
+ else:
745
+ stringtowrite='To be billed'
746
  # Optimized header matching
747
  existsfull = (
748
  ( combined_line_norm in allchildrenheaders_set or
 
839
  "Author": "ADR",
840
  "Creation Date": formatted_time,
841
  "Layer": "Initial",
842
+ "Code": stringtowrite,
843
  "head above 1": paths[-2],
844
  "head above 2": paths[0]
845
  }
 
935
  "Author": "ADR",
936
  "Creation Date": formatted_time,
937
  "Layer": "Initial",
938
+ "Code": stringtowrite,
939
  "head above 1": paths[-2],
940
  "head above 2": paths[0]
941
  }
 
976
  for page_num, bbox in current_bbox.items():
977
  bbox[3] = last_y1s.get(page_num, bbox[3])
978
  page_highlights[page_num] = bbox
979
+ highlight_boxes(docHighlights, page_highlights,stringtowrite)
980
 
981
  break_collecting = True
982
  break
 
1012
  for page_num, bbox in current_bbox.items():
1013
  bbox[3] = last_y1s.get(page_num, bbox[3])
1014
  page_highlights[page_num] = bbox
1015
+ if 'installation' in paths[-2].lower() or 'execution' in paths[-2].lower() or 'miscellaneous items' in paths[-2].lower() :
1016
+ stringtowrite='Not to be billed'
1017
+ else:
1018
+ stringtowrite='To be billed'
1019
+ highlight_boxes(docHighlights, page_highlights,stringtowrite)
1020
 
1021
  # docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
1022