Marthee commited on
Commit
e3556e3
·
verified ·
1 Parent(s): 4c10f56

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +615 -77
Doors_Schedule.py CHANGED
@@ -34,11 +34,8 @@ def calculate_midpoint(x1,y1,x2,y2):
34
  ym = int((y1 + y2) / 2)
35
  return (xm, ym)
36
 
37
- def read_text(input_pdf_path,pdf_content=0):
38
- if pdf_content:
39
- pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
40
- else:
41
- pdf_document = fitz.open('pdf',input_pdf_path)
42
 
43
  for page_num in range(pdf_document.page_count):
44
  page = pdf_document[page_num]
@@ -205,11 +202,8 @@ def get_column_index(col_matches):
205
  return idx
206
 
207
 
208
- def extract_tables(schedule,schedule_content=0):
209
- if schedule_content:
210
- doc = fitz.open(stream=schedule_content, filetype="pdf")
211
- else:
212
- doc = fitz.open("pdf",schedule)
213
  for page in doc:
214
  tabs = page.find_tables()
215
  dfs = []
@@ -471,17 +465,552 @@ def get_widths_bb_format(cleaned_width, kelma):
471
  widths.append(full_text)
472
  return widths
473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  import fitz # PyMuPDF
475
  import PyPDF2
476
  import io
477
  from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
478
 
479
- def add_bluebeam_count_annotations(pdf_bytes, locations,pdf_content=0):
480
- if pdf_content:
481
- pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
482
- else:
483
- pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
484
- pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
485
 
486
  page = pdf_document[0] # First page
487
  if len(locations[0]) == 3:
@@ -607,16 +1136,13 @@ def merge_pdf_bytes_list(pdfs):
607
 
608
  return output_stream.read()
609
 
610
- def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors,pdf_content=0):
611
  #Load original PDF
612
  # with open(input_pdf_path, "rb") as file:
613
  # original_pdf_bytes = file.read()
614
 
615
  #Add Bluebeam-compatible count annotations
616
- if pdf_content:
617
- annotated_pdf_bytes = add_bluebeam_count_annotations(input_pdf_path, locations,pdf_content)
618
- else:
619
- annotated_pdf_bytes = add_bluebeam_count_annotations(input_pdf_path, locations)
620
 
621
  #Modify author field using PyPDF2
622
  final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
@@ -625,69 +1151,81 @@ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors,pdf_cont
625
  # with open(output_pdf_path, "wb") as file:
626
  # file.write(final_pdf_bytes)
627
 
628
- def mainRun(schedule, plan, searcharray,pdf_content=0,schedule_content=0):
629
  #print(type(plan))
630
- if pdf_content==0:
631
- eltype = type(plan)
632
- print(f"el type beta3 variable plan:: {eltype}")
633
- len_plan = len(plan)
634
- print(f"length of the plan's array is: {len_plan}")
635
- p1_type = type(plan[0])
636
- print(f"el mawgood fe p[0]: {p1_type}")
637
 
638
  user_input = get_user_input(searcharray)
639
- if schedule_content:
640
- dfs = extract_tables(schedule,schedule_content)
641
- else:
642
- dfs = extract_tables(schedule)
643
-
644
- selected_columns_new = get_selected_columns(dfs, user_input)
645
-
646
- kelma = get_st_op_pattern(selected_columns_new, user_input)
647
- col_dict = get_similar_colors(selected_columns_new)
648
- flattened_list = get_flattened_tuples_list(col_dict)
649
-
650
- pdfs = []
651
- for p in plan:
652
- print(f" p in plan is {type(p)}")
653
- print(p)
654
- if pdf_content:
655
- plan_texts = read_text(p,pdf_content)
656
- else:
657
- plan_texts = read_text(p)
658
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
659
- new_data = get_cleaned_data(locations)
660
- repeated_labels = get_repeated_labels(locations)
661
- if kelma == None:
662
- widths = get_width_info_tobeprinted(new_data)
663
- else:
664
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
665
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
666
- widths = get_widths_bb_format(cleaned_width, kelma)
667
- if pdf_content:
668
- final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths,pdf_content)
669
- else:
670
- final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
671
- pdfs.append(final_pdf_bytes)
672
 
673
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674
 
675
-
676
- #plan_texts = read_text(plan)
677
- #locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
678
- #new_data = get_cleaned_data(locations)
679
- #repeated_labels = get_repeated_labels(locations)
680
- #if kelma == None:
681
- # widths = get_width_info_tobeprinted(new_data)
682
- #else:
683
- # width_info_tobeprinted = get_width_info_tobeprinted(new_data)
684
- # cleaned_width = get_cleaned_width(width_info_tobeprinted)
685
- # widths = get_widths_bb_format(cleaned_width, kelma)
 
 
 
 
 
 
 
 
 
 
 
 
686
  if selected_columns_new.shape[1] == 2:
687
  widths = []
688
- #for j in range(len(locations)):
689
- # widths.append("Dimensions not found in schedule")
690
- #final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
691
  merged_pdf = merge_pdf_bytes_list(pdfs)
692
  print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
693
  not_found = []
 
34
  ym = int((y1 + y2) / 2)
35
  return (xm, ym)
36
 
37
+ def read_text(input_pdf_path):
38
+ pdf_document = fitz.open('pdf',input_pdf_path)
 
 
 
39
 
40
  for page_num in range(pdf_document.page_count):
41
  page = pdf_document[page_num]
 
202
  return idx
203
 
204
 
205
+ def extract_tables(schedule):
206
+ doc = fitz.open("pdf",schedule)
 
 
 
207
  for page in doc:
208
  tabs = page.find_tables()
209
  dfs = []
 
465
  widths.append(full_text)
466
  return widths
467
 
468
+
469
+ def get_secondary_info(dfs, user_patterns):
470
+ selected_columns = []
471
+ selected_columns_new = None # Initialize selected_columns_new to None
472
+
473
+ for i in range(len(dfs)):
474
+ cell_columns_appearance = flexible_search(dfs[i], user_patterns)
475
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
476
+
477
+
478
+ clmn_name = user_patterns
479
+
480
+
481
+ if len(cell_matches) == 0 and len(col_matches) == 0:
482
+ print(f"this is df {i}, SEARCH IN ANOTHER DF")
483
+
484
+ else:
485
+ #IN COLUMNS
486
+ if len(col_matches) == len(user_patterns):
487
+ column_index_list = get_column_index(col_matches)
488
+ print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
489
+ #print(len(clm_idx))
490
+ #details in another table
491
+ print(column_index_list)
492
+ if len(dfs[i]) <10:
493
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
494
+ #break
495
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
496
+ #details in the same table
497
+ if len(dfs[i]) >10:
498
+ selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
499
+ #break
500
+
501
+ #IN CELLS
502
+ if len(cell_matches) == len(user_patterns):
503
+ row_index_list, column_index_list = get_row_column_indices(cell_matches)
504
+ print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
505
+
506
+ #details in another table
507
+ if len(dfs[i]) <10:
508
+ #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
509
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
510
+ break
511
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
512
+ #details in the same table
513
+ if len(dfs[i]) >10:
514
+ print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
515
+ selected_columns_new = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
516
+ break
517
+ return selected_columns_new
518
+
519
+ def get_similar_colors_secondary(selected_columns_new, user_input):
520
+ def generate_rgb():
521
+ return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
522
+
523
+ unique_keys = selected_columns_new['door_type'].unique()
524
+ key_colors = {key: generate_rgb() for key in unique_keys}
525
+
526
+ # Only exclude actual None values; allow empty string column names if they exist in the DataFrame
527
+ extra_fields = [col for col in user_input[4:] if col is not None]
528
+
529
+ def col_template():
530
+ d = {
531
+ 'values': [],
532
+ 'color': None
533
+ }
534
+ if 'structural_opening' in selected_columns_new.columns:
535
+ d['widths'] = []
536
+ elif selected_columns_new.shape[1] > 2:
537
+ d['widths'] = []
538
+ d['heights'] = []
539
+ for field in extra_fields:
540
+ d[field] = []
541
+ return d
542
+
543
+ col_dict = defaultdict(col_template)
544
+
545
+ for _, row in selected_columns_new.iterrows():
546
+ key = row['door_type']
547
+ col_dict[key]['values'].append(row['door_id'])
548
+
549
+ if 'structural_opening' in selected_columns_new.columns:
550
+ col_dict[key]['widths'].append(row['structural_opening'])
551
+ elif selected_columns_new.shape[1] > 2:
552
+ col_dict[key]['widths'].append(row.get('width', 0))
553
+ col_dict[key]['heights'].append(row.get('height', 0))
554
+
555
+ for field in extra_fields:
556
+ col_dict[key][field].append(row.get(field, None))
557
+
558
+ col_dict[key]['color'] = key_colors[key]
559
+
560
+ return dict(col_dict)
561
+
562
+ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
563
+ width_info_tobeprinted = []
564
+ secondary_info_tobeprinted = []
565
+
566
+ if len(main_info) == 2 and len(secondary_info) == 1:
567
+ for coords, label, color, acous in new_data:
568
+ secondary_info_tobeprinted.append(acous)
569
+
570
+
571
+ if len(main_info) == 2 and len(secondary_info) == 2:
572
+ for coords, label, color, acous, fire in new_data:
573
+ secondary_info_tobeprinted.append((acous, fire))
574
+
575
+ if len(main_info) == 3 and len(secondary_info) == 1:
576
+ for coords, label, width, color, acous in new_data:
577
+ width_info_tobeprinted.append(width)
578
+ secondary_info_tobeprinted.append(acous)
579
+
580
+
581
+ if len(main_info) == 3 and len(secondary_info) == 2:
582
+ for coords, label, width, color, acous, fire in new_data:
583
+ width_info_tobeprinted.append(width)
584
+ secondary_info_tobeprinted.append((acous, fire))
585
+
586
+ if len(main_info) == 4 and len(secondary_info) == 1:
587
+ for coords, label, width, height, color, acous in new_data:
588
+ w = re.sub(r",", "", width)
589
+ h = re.sub(r",", "", height)
590
+ w = int(float(w))
591
+ h = int(float(h))
592
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
593
+ secondary_info_tobeprinted.append(acous)
594
+
595
+
596
+ if len(main_info) == 4 and len(secondary_info) == 2:
597
+ for coords, label, width, height, color, acous, fire in new_data:
598
+ w = re.sub(r",", "", width)
599
+ h = re.sub(r",", "", height)
600
+ w = int(float(w))
601
+ h = int(float(h))
602
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
603
+ secondary_info_tobeprinted.append((acous, fire))
604
+ return width_info_tobeprinted, secondary_info_tobeprinted
605
+
606
+ def get_flattened_tuples_list_SECONDARY(col_dict):
607
+ tuples_list = []
608
+
609
+ for key, values_dict in col_dict.items():
610
+ # Find actual keys containing "Acoustic" and "Fire"
611
+ acoustic_key = next((k for k in values_dict if 'acoustic' in k.lower()), None)
612
+ fire_key = next((k for k in values_dict if 'fire' in k.lower()), None)
613
+
614
+ acoustic_values = values_dict.get(acoustic_key, [None] * len(values_dict['values'])) if acoustic_key else [None] * len(values_dict['values'])
615
+ fire_values = values_dict.get(fire_key, [None] * len(values_dict['values'])) if fire_key else [None] * len(values_dict['values'])
616
+
617
+ if 'heights' in values_dict and 'widths' in values_dict:
618
+ tuples_list.append([
619
+ (value, width, height, values_dict["color"], acoustic, fire)
620
+ for value, width, height, acoustic, fire in zip(
621
+ values_dict['values'],
622
+ values_dict['widths'],
623
+ values_dict['heights'],
624
+ acoustic_values,
625
+ fire_values
626
+ )
627
+ ])
628
+ elif 'widths' in values_dict:
629
+ tuples_list.append([
630
+ (value, width, values_dict["color"], acoustic, fire)
631
+ for value, width, acoustic, fire in zip(
632
+ values_dict['values'],
633
+ values_dict['widths'],
634
+ acoustic_values,
635
+ fire_values
636
+ )
637
+ ])
638
+ else:
639
+ tuples_list.append([
640
+ (value, values_dict["color"], acoustic, fire)
641
+ for value, acoustic, fire in zip(
642
+ values_dict['values'],
643
+ acoustic_values,
644
+ fire_values
645
+ )
646
+ ])
647
+
648
+ flattened_list = [item for sublist in tuples_list for item in sublist]
649
+
650
+ return flattened_list
651
+
652
+ def get_word_locations_plan_secondary(flattened_list, plan_texts, main_info, secondary_info):
653
+ #hena fe 7alet en keda keda fe secondary information
654
+ locations = []
655
+ not_found = []
656
+ len_main = len(main_info) #3 or #4 #sometimes maybe 2
657
+ len_secondary = len(secondary_info) #2 or #1
658
+
659
+ if len_main == 2 and len_secondary == 2:
660
+ for lbl, clr, acoustic, fire in flattened_list:
661
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
662
+ if len(location) ==0:
663
+ not_found.append(lbl)
664
+ locations.append((location, lbl, clr, acoustic, fire))
665
+
666
+ if len_main == 2 and len_secondary == 1:
667
+ for lbl, clr, acoustic in flattened_list:
668
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
669
+ if len(location) ==0:
670
+ not_found.append(lbl)
671
+ locations.append((location, lbl, clr, acoustic))
672
+
673
+
674
+
675
+ if len_main == 3 and len_secondary == 2:
676
+ for lbl, w, clr, acoustic, fire in flattened_list:
677
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
678
+ if len(location) ==0:
679
+ not_found.append(lbl)
680
+ locations.append((location, lbl, w, clr, acoustic, fire))
681
+
682
+ if len_main == 3 and len_secondary == 1:
683
+ for lbl, w, clr, acoustic in flattened_list:
684
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
685
+ if len(location) ==0:
686
+ not_found.append(lbl)
687
+ locations.append((location, lbl, w, clr, acoustic))
688
+
689
+
690
+
691
+ if len_main == 4 and len_secondary == 2:
692
+ for lbl, w, h, clr, acoustic, fire in flattened_list:
693
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
694
+ if len(location) ==0:
695
+ not_found.append(lbl)
696
+ locations.append((location, lbl, w, h, clr, acoustic, fire))
697
+
698
+ if len_main == 4 and len_secondary == 1:
699
+ for lbl, w, h, clr, acoustic in flattened_list:
700
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
701
+ if len(location) ==0:
702
+ not_found.append(lbl)
703
+ locations.append((location, lbl, w, h, clr,acoustic))
704
+ return locations, not_found
705
+
706
+ #SECONDARY
707
+ def get_cleaned_data_secondary(locations, main_info, secondary_info):
708
+ processed = defaultdict(int)
709
+
710
+ new_data = []
711
+ if len(main_info) == 2 and len(secondary_info) == 1:
712
+ for coords, label, color, acous in locations:
713
+ if len(coords)>1:
714
+ index = processed[label] % len(coords) # Round-robin indexing
715
+ new_coord = [coords[index]] # Pick the correct coordinate
716
+ new_data.append((new_coord, label, color, acous))
717
+ processed[label] += 1 # Move to the next coordinate for this label
718
+ if len(coords)==1:
719
+ new_data.append((coords, label, color, acous))
720
+
721
+ if len(main_info) == 2 and len(secondary_info) == 2:
722
+ for coords, label, color, acous, fire in locations:
723
+ if len(coords)>1:
724
+ index = processed[label] % len(coords) # Round-robin indexing
725
+ new_coord = [coords[index]] # Pick the correct coordinate
726
+ new_data.append((new_coord, label, color, acous, fire))
727
+ processed[label] += 1 # Move to the next coordinate for this label
728
+ if len(coords)==1:
729
+ new_data.append((coords, label, color, acous, fire))
730
+
731
+
732
+ if len(main_info) == 3 and len(secondary_info) == 1:
733
+ for coords, label, widht, color, acous in locations:
734
+ if len(coords)>1:
735
+ index = processed[label] % len(coords) # Round-robin indexing
736
+ new_coord = [coords[index]] # Pick the correct coordinate
737
+ new_data.append((new_coord, label, width, color, acous))
738
+ processed[label] += 1 # Move to the next coordinate for this label
739
+ if len(coords)==1:
740
+ new_data.append((coords, label, width, color, acous))
741
+
742
+ if len(main_info) == 3 and len(secondary_info) == 2:
743
+ for coords, label, width, color, acous, fire in locations:
744
+ if len(coords)>1:
745
+ index = processed[label] % len(coords) # Round-robin indexing
746
+ new_coord = [coords[index]] # Pick the correct coordinate
747
+ new_data.append((new_coord, label, width, color, acous, fire))
748
+ processed[label] += 1 # Move to the next coordinate for this label
749
+ if len(coords)==1:
750
+ new_data.append((coords, label, width, color, acous, fire))
751
+
752
+ if len(main_info) == 4 and len(secondary_info) == 1:
753
+ for coords, label, width, height, color, acous in locations:
754
+ if len(coords)>1:
755
+ index = processed[label] % len(coords) # Round-robin indexing
756
+ new_coord = [coords[index]] # Pick the correct coordinate
757
+ new_data.append((new_coord, label, width, height, color, acous))
758
+ processed[label] += 1 # Move to the next coordinate for this label
759
+ if len(coords)==1:
760
+ new_data.append((coords, label, width, height, color, acous))
761
+
762
+ if len(main_info) == 4 and len(secondary_info) == 2:
763
+ for coords, label, width, height, color, acous, fire in locations:
764
+ if len(coords)>1:
765
+ index = processed[label] % len(coords) # Round-robin indexing
766
+ new_coord = [coords[index]] # Pick the correct coordinate
767
+ new_data.append((new_coord, label, width, height, color, acous, fire))
768
+ processed[label] += 1 # Move to the next coordinate for this label
769
+ if len(coords)==1:
770
+ new_data.append((coords, label, width, height, color, acous, fire))
771
+
772
+ return new_data
773
+
774
+ from collections import defaultdict
775
+
776
+ def get_cleaned_data_gpt(locations):
777
+ processed = defaultdict(int)
778
+ new_data = []
779
+
780
+ for entry in locations:
781
+ coords = entry[0]
782
+ label = entry[1]
783
+ index = processed[label] % len(coords) if len(coords) > 1 else 0
784
+ new_coord = [coords[index]] if len(coords) > 1 else coords
785
+ processed[label] += 1 if len(coords) > 1 else 0
786
+
787
+ # Rebuild the entry with updated coordinates
788
+ new_entry = (new_coord,) + entry[1:]
789
+ new_data.append(new_entry)
790
+
791
+ return new_data
792
+
793
+ def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
794
+ secondary_printed_clean = []
795
+ if len(secondary_info) == 1:
796
+ if any('Acoustic' in col for col in selected_secondary_info.columns):
797
+ for acous in secondary_tobeprinted:
798
+ new_text = f"acoustic rating: {acous}"
799
+ secondary_printed_clean.append(new_text)
800
+ if any('Fire' in col for col in selected_secondary_info.columns):
801
+ for fire in secondary_tobeprinted:
802
+ new_text = f"fire rating: {fire}"
803
+ secondary_printed_clean.append(new_text)
804
+ if len(secondary_info) == 2:
805
+ for acous, fire in secondary_tobeprinted:
806
+ new_text = f"fire rating: {fire}; acoustic rating: {acous}"
807
+ secondary_printed_clean.append(new_text)
808
+ print(new_text)
809
+ return secondary_printed_clean
810
+
811
+ def mix_width_secondary(widths, secondary_printed_clean):
812
+ all_print = []
813
+ for i in range(len(widths)):
814
+ newest_text = f"{widths[i]}; {secondary_printed_clean[i]}"
815
+ all_print.append(newest_text)
816
+ return all_print
817
+
818
+ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
819
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
820
+ pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
821
+
822
+ page = pdf_document[0] # First page
823
+ if len(main_info) == 2 and len(secondary_info) == 1:
824
+ for loc in locations:
825
+ coor, lbl, clr, acous = loc
826
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
827
+ for cor in coor:
828
+ #Create a Circle annotation (Count Markup)
829
+ annot = page.add_circle_annot(
830
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
831
+ )
832
+
833
+ #Assign required Bluebeam metadata
834
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
835
+ annot.set_border(width=2) # Border thickness
836
+ annot.set_opacity(1) # Fully visible
837
+
838
+ #Set annotation properties for Bluebeam Count detection
839
+ annot.set_info("name", lbl) # Unique name for each count
840
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
841
+ annot.set_info("title", lbl) # Optional
842
+ annot.update() # Apply changes
843
+
844
+ if len(main_info) == 2 and len(secondary_info) == 2:
845
+ for loc in locations:
846
+ coor, lbl, clr, acous, fire = loc
847
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
848
+ for cor in coor:
849
+ #Create a Circle annotation (Count Markup)
850
+ annot = page.add_circle_annot(
851
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
852
+ )
853
+
854
+ #Assign required Bluebeam metadata
855
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
856
+ annot.set_border(width=2) # Border thickness
857
+ annot.set_opacity(1) # Fully visible
858
+
859
+ #Set annotation properties for Bluebeam Count detection
860
+ annot.set_info("name", lbl) # Unique name for each count
861
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
862
+ annot.set_info("title", lbl) # Optional
863
+ annot.update() # Apply changes
864
+
865
+ if len(main_info) == 3 and len(secondary_info) == 1:
866
+ for loc in locations:
867
+ coor, lbl, w, clr, acous = loc
868
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
869
+ for cor in coor:
870
+ #Create a Circle annotation (Count Markup)
871
+ annot = page.add_circle_annot(
872
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
873
+ )
874
+
875
+ #Assign required Bluebeam metadata
876
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
877
+ annot.set_border(width=2) # Border thickness
878
+ annot.set_opacity(1) # Fully visible
879
+
880
+ #Set annotation properties for Bluebeam Count detection
881
+ annot.set_info("name", lbl) # Unique name for each count
882
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
883
+ annot.set_info("title", lbl) # Optional
884
+ annot.update() # Apply changes
885
+
886
+ if len(main_info) == 3 and len(secondary_info) == 2:
887
+ for loc in locations:
888
+ coor, lbl, w, clr, acous, fire = loc
889
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
890
+ for cor in coor:
891
+ #Create a Circle annotation (Count Markup)
892
+ annot = page.add_circle_annot(
893
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
894
+ )
895
+
896
+ #Assign required Bluebeam metadata
897
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
898
+ annot.set_border(width=2) # Border thickness
899
+ annot.set_opacity(1) # Fully visible
900
+
901
+ #Set annotation properties for Bluebeam Count detection
902
+ annot.set_info("name", lbl) # Unique name for each count
903
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
904
+ annot.set_info("title", lbl) # Optional
905
+ annot.update() # Apply changes
906
+
907
+ if len(main_info) == 4 and len(secondary_info) == 1:
908
+ for loc in locations:
909
+ coor, lbl, w, h, clr, acous = loc
910
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
911
+ for cor in coor:
912
+ #Create a Circle annotation (Count Markup)
913
+ annot = page.add_circle_annot(
914
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
915
+ )
916
+
917
+ #Assign required Bluebeam metadata
918
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
919
+ annot.set_border(width=2) # Border thickness
920
+ annot.set_opacity(1) # Fully visible
921
+
922
+ #Set annotation properties for Bluebeam Count detection
923
+ annot.set_info("name", lbl) # Unique name for each count
924
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
925
+ annot.set_info("title", lbl) # Optional
926
+ annot.update() # Apply changes
927
+
928
+ if len(main_info) == 4 and len(secondary_info) == 2:
929
+ for loc in locations:
930
+ coor, lbl, w, h, clr, acous, fire = loc
931
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
932
+ for cor in coor:
933
+ #Create a Circle annotation (Count Markup)
934
+ annot = page.add_circle_annot(
935
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
936
+ )
937
+
938
+ #Assign required Bluebeam metadata
939
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
940
+ annot.set_border(width=2) # Border thickness
941
+ annot.set_opacity(1) # Fully visible
942
+
943
+ #Set annotation properties for Bluebeam Count detection
944
+ annot.set_info("name", lbl) # Unique name for each count
945
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
946
+ annot.set_info("title", lbl) # Optional
947
+ annot.update() # Apply changes
948
+
949
+
950
+
951
+ #Save modified PDF to a variable instead of a file
952
+ output_stream = io.BytesIO()
953
+ pdf_document.save(output_stream)
954
+ pdf_document.close()
955
+
956
+ return output_stream.getvalue() # Return the modified PDF as bytes
957
+ def get_user_input(user_words):
958
+ user_input = []
959
+ for item in user_words:
960
+ user_input.append(item[0])
961
+ return user_input
962
+
963
+ def modify_author_in_pypdf2(pdf_bytes, new_authors):
964
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
965
+ reader = PyPDF2.PdfReader(pdf_stream)
966
+ writer = PyPDF2.PdfWriter()
967
+
968
+ author_index = 0 # Track author assignment
969
+
970
+ for page in reader.pages:
971
+ if "/Annots" in page: #Check if annotations exist
972
+ for annot in page["/Annots"]:
973
+ annot_obj = annot.get_object()
974
+ # Assign each annotation a unique author
975
+ if len(new_authors) == 0:
976
+ break
977
+ if author_index < len(new_authors):
978
+ annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
979
+ author_index += 1 # Move to next author
980
+
981
+ # If authors list is exhausted, keep the last one
982
+ else:
983
+ annot_obj.update({"/T": TextStringObject(new_authors[-1])})
984
+
985
+ writer.add_page(page)
986
+
987
+ #Save the modified PDF to a variable
988
+ output_stream = io.BytesIO()
989
+ writer.write(output_stream)
990
+ output_stream.seek(0)
991
+
992
+ return output_stream.read()
993
+
994
+ # return output_stream.getvalue() # Return modified PDF as bytes
995
+
996
+ def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
997
+
998
+ #Add Bluebeam-compatible count annotations
999
+ annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(input_pdf_path, locations, main_info, secondary_info)
1000
+
1001
+ #Modify author field using PyPDF2
1002
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1003
+
1004
+ return final_pdf_bytes
1005
+
1006
  import fitz # PyMuPDF
1007
  import PyPDF2
1008
  import io
1009
  from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
1010
 
1011
+ def add_bluebeam_count_annotations(pdf_bytes, locations):
1012
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1013
+ pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
 
 
 
1014
 
1015
  page = pdf_document[0] # First page
1016
  if len(locations[0]) == 3:
 
1136
 
1137
  return output_stream.read()
1138
 
1139
+ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1140
  #Load original PDF
1141
  # with open(input_pdf_path, "rb") as file:
1142
  # original_pdf_bytes = file.read()
1143
 
1144
  #Add Bluebeam-compatible count annotations
1145
+ annotated_pdf_bytes = add_bluebeam_count_annotations(input_pdf_path, locations)
 
 
 
1146
 
1147
  #Modify author field using PyPDF2
1148
  final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
 
1151
  # with open(output_pdf_path, "wb") as file:
1152
  # file.write(final_pdf_bytes)
1153
 
1154
+ def mainRun(schedule, plan, searcharray):
1155
  #print(type(plan))
1156
+ eltype = type(plan)
1157
+ print(f"el type beta3 variable plan:: {eltype}")
1158
+ len_plan = len(plan)
1159
+ print(f"length of the plan's array is: {len_plan}")
1160
+ p1_type = type(plan[0])
1161
+ print(f"el mawgood fe p[0]: {p1_type}")
 
1162
 
1163
  user_input = get_user_input(searcharray)
1164
+ secondary_info_presence = False
1165
+ if len(user_input) > 4:
1166
+ secondary_info_presence = True
1167
+ secondary_info = user_input[4:]
1168
+ if not user_input[3]:
1169
+ main_info = user_input[:3]
1170
+ elif len(user_input) > 4:
1171
+ main_info = user_input[:4]
1172
+
1173
+
1174
+ dfs = extract_tables(schedule)
1175
+
1176
+ if secondary_info_presence:
1177
+ selected_columns_new = get_selected_columns(dfs, main_info)
1178
+ selected_secondary_info = get_secondary_info(dfs, secondary_info)
1179
+ selected_columns_combined = pd.concat([selected_columns_new, selected_secondary_info], axis=1)
1180
+ kelma = get_st_op_pattern(selected_columns_new, user_input)
1181
+ col_dict = get_similar_colors_secondary(selected_columns_combined, user_input)
1182
+ flattened_list2 = get_flattened_tuples_list_SECONDARY(col_dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1183
 
1184
+ pdfs = []
1185
+ for p in plan:
1186
+ plan_texts = read_text(p)
1187
+ locations, not_found = get_word_locations_plan_secondary(flattened_list2,plan_texts, main_info, secondary_info)
1188
+ new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1189
+ repeated_labels = get_repeated_labels(locations)
1190
+ if kelma == None:
1191
+ #widths = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1192
+ widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1193
+ else:
1194
+ width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1195
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1196
+ widths = get_widths_bb_format(cleaned_width, kelma)
1197
+ secondary_printed_clean = get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info)
1198
+ all_print = mix_width_secondary(widths, secondary_printed_clean)
1199
+ final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", new_data3, all_print, main_info, secondary_info)
1200
+ pdfs.append(final_pdf_bytes)
1201
+
1202
 
1203
+ else:
1204
+ selected_columns_new = get_selected_columns(dfs, user_input)
1205
+ kelma = get_st_op_pattern(selected_columns_new, user_input)
1206
+ col_dict = get_similar_colors(selected_columns_new)
1207
+ flattened_list = get_flattened_tuples_list(col_dict)
1208
+
1209
+ pdfs = []
1210
+ for p in plan:
1211
+ print(f" p in plan is {type(p)}")
1212
+ print(p)
1213
+ plan_texts = read_text(p)
1214
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1215
+ new_data = get_cleaned_data(locations)
1216
+ repeated_labels = get_repeated_labels(locations)
1217
+ if kelma == None:
1218
+ widths = get_width_info_tobeprinted(new_data)
1219
+ else:
1220
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1221
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1222
+ widths = get_widths_bb_format(cleaned_width, kelma)
1223
+ final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1224
+ pdfs.append(final_pdf_bytes)
1225
+
1226
  if selected_columns_new.shape[1] == 2:
1227
  widths = []
1228
+
 
 
1229
  merged_pdf = merge_pdf_bytes_list(pdfs)
1230
  print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1231
  not_found = []