Marthee commited on
Commit
c1bc11f
·
verified ·
1 Parent(s): 3690e7c

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +496 -75
Doors_Schedule.py CHANGED
@@ -1,6 +1,7 @@
1
  from collections import defaultdict
2
  import pandas as pd
3
  import random
 
4
  import re
5
  import io
6
  import pypdfium2 as pdfium
@@ -20,6 +21,14 @@ import PyPDF2
20
  import io
21
  from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
22
  from PyPDF2 import PdfReader, PdfWriter
 
 
 
 
 
 
 
 
23
 
24
 
25
  def convert2img(path):
@@ -378,7 +387,8 @@ def get_selected_columns_all(dfs, user_patterns):
378
  #if len(dfs[i]) <2:
379
  #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
380
  selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
381
- selected_columns_new = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
 
382
 
383
  break
384
  #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
@@ -498,17 +508,26 @@ def get_width_info_tobeprinted(new_data):
498
  for _,_,_, w,h in new_data:
499
  w = re.sub(r",", "", w)
500
  h = re.sub(r",", "", h)
501
- if float(w).is_integer():
502
- w = int(float(w))
503
- else:
504
  w = w
505
- if float(h).is_integer():
506
- h = int(float(h))
507
  else:
 
 
 
 
 
 
508
  h = h
 
 
 
 
 
509
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
510
  return width_info_tobeprinted
511
-
512
  def clean_dimensions(text):
513
  # Remove commas and "mm"
514
  text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
@@ -540,7 +559,7 @@ def get_widths_bb_format(cleaned_width, kelma):
540
  return widths
541
 
542
 
543
- def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
544
  width_info_tobeprinted = []
545
  secondary_info_tobeprinted = []
546
 
@@ -582,20 +601,101 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
582
 
583
  if len(main_info) == 4 and len(secondary_info) == 2:
584
  for coords, label, width, height, acous, fire, color in new_data:
 
 
585
  w = re.sub(r",", "", width)
586
  h = re.sub(r",", "", height)
587
- if float(w).is_integer():
588
- w = int(float(w))
589
- else:
590
  w = w
591
- if float(h).is_integer():
592
- h = int(float(h))
593
  else:
 
 
 
 
 
594
  h = h
 
 
 
 
 
595
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
596
  secondary_info_tobeprinted.append((acous, fire))
597
  return width_info_tobeprinted, secondary_info_tobeprinted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
  def get_word_locations_plan_secondary(flattened_list, plan_texts, main_info, secondary_info):
600
  #hena fe 7alet en keda keda fe secondary information
601
  locations = []
@@ -795,7 +895,22 @@ def mix_width_secondary(widths, secondary_printed_clean):
795
  all_print.append(newest_text)
796
  return all_print
797
 
798
- def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
800
  pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
801
 
@@ -937,7 +1052,6 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
937
 
938
  return output_stream.getvalue() # Return the modified PDF as bytes
939
 
940
-
941
  def modify_author_in_pypdf2(pdf_bytes, new_authors):
942
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
943
  reader = PyPDF2.PdfReader(pdf_stream)
@@ -969,10 +1083,6 @@ def modify_author_in_pypdf2(pdf_bytes, new_authors):
969
 
970
  return output_stream.read()
971
 
972
-
973
-
974
-
975
-
976
  def add_bluebeam_count_annotations(pdf_bytes, locations):
977
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
978
  pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
@@ -1047,8 +1157,6 @@ def add_bluebeam_count_annotations(pdf_bytes, locations):
1047
  pdf_document.close()
1048
 
1049
  return output_stream.getvalue() # Return the modified PDF as bytes
1050
-
1051
-
1052
 
1053
  def modify_author_in_pypdf2(pdf_bytes, new_authors):
1054
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
@@ -1081,8 +1189,6 @@ def modify_author_in_pypdf2(pdf_bytes, new_authors):
1081
 
1082
  return output_stream.read()
1083
 
1084
-
1085
-
1086
  def merge_pdf_bytes_list(pdfs):
1087
  writer = PdfWriter()
1088
 
@@ -1098,7 +1204,6 @@ def merge_pdf_bytes_list(pdfs):
1098
 
1099
  return output_stream.read()
1100
 
1101
-
1102
  def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
1103
 
1104
  if isinstance(input_pdf_path, bytes):
@@ -1115,7 +1220,6 @@ def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_author
1115
 
1116
  return final_pdf_bytes
1117
 
1118
-
1119
  def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1120
  #Load original PDF
1121
  if isinstance(input_pdf_path, bytes):
@@ -1130,6 +1234,312 @@ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1130
  #Modify author field using PyPDF2
1131
  final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1132
  return final_pdf_bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133
 
1134
  def mainRun(schedule, plan, searcharray):
1135
  print("mainRun is RUNNING")
@@ -1145,23 +1555,26 @@ def mainRun(schedule, plan, searcharray):
1145
  print(f"search array: {searcharray}")
1146
 
1147
  dfs = extract_tables(schedule)
1148
-
1149
- pdfs = []
 
 
 
 
 
 
1150
  for p in plan:
 
 
1151
  pdf_document = fitz.open("pdf", p)
1152
  # Get the first page (0-indexed)
1153
  page = pdf_document[0]
1154
  rect = page.rect # Rectangle: contains x0, y0, x1, y1
1155
 
1156
- width_pdf = rect.width # or: width = rect.x1 - rect.x0
1157
- height_pdf = rect.height # or: height = rect.y1 - rect.y0
1158
-
1159
- print(f"plan width: {width_pdf}")
1160
- print(f"plan height: {height_pdf}")
1161
-
1162
- all_new_data = []
1163
- all_widths = []
1164
- pdf_outputs = []
1165
 
1166
  for j in range(len(searcharray)):
1167
  user_input = searcharray[j]
@@ -1181,6 +1594,16 @@ def mainRun(schedule, plan, searcharray):
1181
  print("mafeesh secondary information")
1182
 
1183
  selected_columns_combined = get_selected_columns_all(dfs, user_input)
 
 
 
 
 
 
 
 
 
 
1184
  kelma = get_st_op_pattern(selected_columns_combined, user_input)
1185
  col_dict = get_similar_colors_all(selected_columns_combined)
1186
  flattened_list = get_flattened_tuples_list_all(col_dict)
@@ -1200,31 +1623,22 @@ def mainRun(schedule, plan, searcharray):
1200
  width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1201
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
1202
  widths = get_widths_bb_format(cleaned_width, kelma)
1203
- #Handling schedules without dimensions (width and height)
1204
- if selected_columns_combined.shape[1] == 2:
1205
- widths = []
1206
 
1207
  secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1208
  all_print = mix_width_secondary(widths, secondary_printed_clean)
1209
-
1210
- #Single page annotation
1211
- all_widths.append(all_print)
1212
-
1213
- #flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1214
- #flat_list_widths = [item for sublist in all_widths for item in sublist]
1215
-
1216
- if pdf_outputs:
1217
- final_pdf_bytes = process_pdf_secondary(pdf_outputs[j-1], "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1218
- pdf_outputs.append(final_pdf_bytes)
1219
- else:
1220
- final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1221
- pdf_outputs.append(final_pdf_bytes)
1222
 
1223
  else:
1224
  locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1225
  new_data = get_cleaned_data(locations)
1226
- #Single page annotation
1227
- all_new_data.append(new_data)
1228
  repeated_labels = get_repeated_labels(locations)
1229
  if kelma == None:
1230
  widths = get_width_info_tobeprinted(new_data)
@@ -1232,30 +1646,36 @@ def mainRun(schedule, plan, searcharray):
1232
  width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1233
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
1234
  widths = get_widths_bb_format(cleaned_width, kelma)
1235
-
1236
- #Handling schedules without dimensions (width and height)
1237
- if selected_columns_combined.shape[1] == 2:
1238
- widths = []
1239
-
1240
- #Single page annotation
1241
- all_widths.append(widths)
1242
 
1243
- flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1244
- flat_list_widths = [item for sublist in all_widths for item in sublist]
1245
-
1246
- if pdf_outputs:
1247
- final_pdf_bytes = process_pdf(pdf_outputs[j-1], "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1248
- pdf_outputs.append(final_pdf_bytes)
1249
- else:
1250
- final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1251
- pdf_outputs.append(final_pdf_bytes)
1252
-
 
 
 
 
 
 
 
 
 
 
 
1253
 
1254
- pdfs.append(final_pdf_bytes)
1255
- merged_pdf = merge_pdf_bytes_list(pdfs)
1256
- print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1257
 
1258
- not_found = []
1259
  doc2 =fitz.open('pdf',merged_pdf)
1260
  len_doc2 = len(doc2)
1261
  print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
@@ -1286,3 +1706,4 @@ def mainRun(schedule, plan, searcharray):
1286
  x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1287
  list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1288
  return annotatedimg, doc2 , list1, repeated_labels , not_found
 
 
1
  from collections import defaultdict
2
  import pandas as pd
3
  import random
4
+ import math
5
  import re
6
  import io
7
  import pypdfium2 as pdfium
 
21
  import io
22
  from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
23
  from PyPDF2 import PdfReader, PdfWriter
24
+ import zlib
25
+ import base64
26
+ import datetime
27
+ import uuid
28
+ from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
29
+ from xml.dom.minidom import parseString
30
+ from collections import defaultdict
31
+ from xml.etree.ElementTree import Element, SubElement, tostring
32
 
33
 
34
  def convert2img(path):
 
387
  #if len(dfs[i]) <2:
388
  #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
389
  selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
390
+ selected_columns_new2 = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
391
+ selected_columns_new = pd.concat([selected_columns_new, selected_columns_new2], axis=1)
392
 
393
  break
394
  #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
 
508
  for _,_,_, w,h in new_data:
509
  w = re.sub(r",", "", w)
510
  h = re.sub(r",", "", h)
511
+
512
+ #if w == "N/A":
513
+ if w.isalpha():
514
  w = w
 
 
515
  else:
516
+ if float(w).is_integer():
517
+ w = int(float(w))
518
+ else:
519
+ w = w
520
+ #if h == "N/A":
521
+ if h.isalpha():
522
  h = h
523
+ else:
524
+ if float(h).is_integer():
525
+ h = int(float(h))
526
+ else:
527
+ h = h
528
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
529
  return width_info_tobeprinted
530
+
531
  def clean_dimensions(text):
532
  # Remove commas and "mm"
533
  text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
 
559
  return widths
560
 
561
 
562
+ '''def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
563
  width_info_tobeprinted = []
564
  secondary_info_tobeprinted = []
565
 
 
601
 
602
  if len(main_info) == 4 and len(secondary_info) == 2:
603
  for coords, label, width, height, acous, fire, color in new_data:
604
+ print(type(width))
605
+ print(type(height))
606
  w = re.sub(r",", "", width)
607
  h = re.sub(r",", "", height)
608
+ if w == "N/A":
 
 
609
  w = w
 
 
610
  else:
611
+ if float(w).is_integer():
612
+ w = int(float(w))
613
+ else:
614
+ w = w
615
+ if h == "N/A":
616
  h = h
617
+ else:
618
+ if float(h).is_integer():
619
+ h = int(float(h))
620
+ else:
621
+ h = h
622
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
623
  secondary_info_tobeprinted.append((acous, fire))
624
  return width_info_tobeprinted, secondary_info_tobeprinted
625
+ '''
626
+
627
+ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
628
+ width_info_tobeprinted = []
629
+ secondary_info_tobeprinted = []
630
+
631
+ if len(main_info) == 2 and len(secondary_info) == 1:
632
+ for coords, label, acous, color in new_data:
633
+ secondary_info_tobeprinted.append(acous)
634
+
635
+
636
+ if len(main_info) == 2 and len(secondary_info) == 2:
637
+ for coords, label, acous, fire, color in new_data:
638
+ secondary_info_tobeprinted.append((acous, fire))
639
+
640
+ if len(main_info) == 3 and len(secondary_info) == 1:
641
+ for coords, label, width, acous, color in new_data:
642
+ width_info_tobeprinted.append(width)
643
+ secondary_info_tobeprinted.append(acous)
644
+
645
+
646
+ if len(main_info) == 3 and len(secondary_info) == 2:
647
+ for coords, label, width, acous, fire, color in new_data:
648
+ width_info_tobeprinted.append(width)
649
+ secondary_info_tobeprinted.append((acous, fire))
650
 
651
+ if len(main_info) == 4 and len(secondary_info) == 1:
652
+ for coords, label, width, height, acous, color in new_data:
653
+ w = re.sub(r",", "", width)
654
+ h = re.sub(r",", "", height)
655
+ if w.isalpha():
656
+ w = w
657
+ else:
658
+ if float(w).is_integer():
659
+ w = int(float(w))
660
+ else:
661
+ w = w
662
+ #if h == "N/A":
663
+ if h.isalpha():
664
+ h = h
665
+ else:
666
+ if float(h).is_integer():
667
+ h = int(float(h))
668
+ else:
669
+ h = h
670
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
671
+ secondary_info_tobeprinted.append((acous, fire))
672
+
673
+ if len(main_info) == 4 and len(secondary_info) == 2:
674
+ for coords, label, width, height, acous, fire, color in new_data:
675
+ print(type(width))
676
+ print(type(height))
677
+ w = re.sub(r",", "", width)
678
+ h = re.sub(r",", "", height)
679
+ #if w == "N/A":
680
+ if w.isalpha():
681
+ w = w
682
+ else:
683
+ if float(w).is_integer():
684
+ w = int(float(w))
685
+ else:
686
+ w = w
687
+ #if h == "N/A":
688
+ if h.isalpha():
689
+ h = h
690
+ else:
691
+ if float(h).is_integer():
692
+ h = int(float(h))
693
+ else:
694
+ h = h
695
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
696
+ secondary_info_tobeprinted.append((acous, fire))
697
+ return width_info_tobeprinted, secondary_info_tobeprinted
698
+
699
  def get_word_locations_plan_secondary(flattened_list, plan_texts, main_info, secondary_info):
700
  #hena fe 7alet en keda keda fe secondary information
701
  locations = []
 
895
  all_print.append(newest_text)
896
  return all_print
897
 
898
+ def merge_pdf_bytes_list(pdfs):
899
+ writer = PdfWriter()
900
+
901
+ for pdf_bytes in pdfs:
902
+ pdf_stream = io.BytesIO(pdf_bytes)
903
+ reader = PdfReader(pdf_stream)
904
+ for page in reader.pages:
905
+ writer.add_page(page)
906
+
907
+ output_stream = io.BytesIO()
908
+ writer.write(output_stream)
909
+ output_stream.seek(0)
910
+
911
+ return output_stream.read()
912
+
913
+ '''def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
914
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
915
  pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
916
 
 
1052
 
1053
  return output_stream.getvalue() # Return the modified PDF as bytes
1054
 
 
1055
  def modify_author_in_pypdf2(pdf_bytes, new_authors):
1056
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1057
  reader = PyPDF2.PdfReader(pdf_stream)
 
1083
 
1084
  return output_stream.read()
1085
 
 
 
 
 
1086
  def add_bluebeam_count_annotations(pdf_bytes, locations):
1087
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1088
  pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
 
1157
  pdf_document.close()
1158
 
1159
  return output_stream.getvalue() # Return the modified PDF as bytes
 
 
1160
 
1161
  def modify_author_in_pypdf2(pdf_bytes, new_authors):
1162
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
 
1189
 
1190
  return output_stream.read()
1191
 
 
 
1192
  def merge_pdf_bytes_list(pdfs):
1193
  writer = PdfWriter()
1194
 
 
1204
 
1205
  return output_stream.read()
1206
 
 
1207
  def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
1208
 
1209
  if isinstance(input_pdf_path, bytes):
 
1220
 
1221
  return final_pdf_bytes
1222
 
 
1223
  def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1224
  #Load original PDF
1225
  if isinstance(input_pdf_path, bytes):
 
1234
  #Modify author field using PyPDF2
1235
  final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1236
  return final_pdf_bytes
1237
+ '''
1238
+
1239
+ def calculate_bounding_rect_count(vertices,padding):
1240
+ x, y = vertices[0]
1241
+ xmin = x - padding
1242
+ ymin = y - padding
1243
+ xmax = x + padding
1244
+ ymax = y + padding
1245
+ return [xmin, ymin, xmax, ymax]
1246
+
1247
+ def rgb_string_to_hex(rgb_string):
1248
+ r, g, b = map(float, rgb_string.strip().split())
1249
+ return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
1250
+
1251
+
1252
+
1253
+
1254
+
1255
+
1256
+
1257
+
1258
+
1259
+ def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
1260
+ label: str = '',height:str='',width:str='',
1261
+ color:str='',countstyle:str='',countsize:str=''):
1262
+ now = datetime.datetime.utcnow()
1263
+ mod_date = now.strftime("D:%Y%m%d%H%M%S+00'00'")
1264
+ creation_date = now.isoformat() + 'Z'
1265
+ id_str = "fitz-" + uuid.uuid4().hex[:4].upper()
1266
+
1267
+ vert_str = ' '.join([f'{x:.4f}' for point in vertices for x in point])
1268
+ ordered_column_values = [f'({custom_data.get(col, "")})' for col in column_order]
1269
+ bsi_column_data = ''.join(ordered_column_values)
1270
+
1271
+ type_internal= 'Bluebeam.PDF.Annotations.AnnotationMeasureCount'
1272
+ subject ='Count Measurement'
1273
+ padding=10
1274
+ rectvertices=calculate_bounding_rect_count(vertices,padding)
1275
+ bbmeasure = '''<</Type/Measure
1276
+ /Subtype/RL
1277
+ /R(1 mm = 1 mm)
1278
+ /X[<</Type/NumberFormat/U(mm)/C 0.3527778/D 100/SS()>>]
1279
+ /D[<</Type/NumberFormat/U(mm)/C 1/D 100/SS()>>]
1280
+ /A[<</Type/NumberFormat/U(sq mm)/C 1/D 100/FD true/SS()>>]
1281
+ /T[<</Type/NumberFormat/U(\\260)/C 1/D 100/FD true/PS()/SS()>>]
1282
+ /V[<</Type/NumberFormat/U(cu mm)/C 1/D 100/FD true/SS()>>]
1283
+ /TargetUnitConversion 0.3527778>>'''
1284
+
1285
+ raw_text = f'''<<
1286
+ /Version 1
1287
+ /DS(font: Helvetica 12pt; text-align:center; line-height:13.8pt; color:#FF0000)
1288
+ /CountStyle{countstyle}
1289
+ /CountScale {countsize}
1290
+ /MeasurementTypes 128
1291
+ /BBMeasure{bbmeasure}
1292
+ /NumCounts {area_text}
1293
+ /AP<</N/BBObjPtr_{uuid.uuid4().hex.upper()}>>
1294
+ /IT/PolygonCount
1295
+ /Vertices[{vert_str}]
1296
+ /IC[{color}]
1297
+ /T({author})
1298
+ /CreationDate({mod_date})
1299
+ /BSIColumnData[{bsi_column_data}]
1300
+ /RC(<?xml version="1.0"?><body xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/" xfa:contentType="text/html" xfa:APIVersion="BluebeamPDFRevu:2018" xfa:spec="2.2.0" style="font:Helvetica 12pt; text-align:center; line-height:13.8pt; color:#FF0000" xmlns="http://www.w3.org/1999/xhtml"><p>{area_text}</p></body>)
1301
+ /Label({label})
1302
+ /Height {height}
1303
+ /Width {width}
1304
+ /Subj({subject})
1305
+ /NM({id_str})
1306
+ /Subtype/Polygon
1307
+ /Rect[{rectvertices[0]} {rectvertices[1]} {rectvertices[2]} {rectvertices[3]}]
1308
+ /Contents({area_text})
1309
+ /F 4
1310
+ /C[{color}]
1311
+ /BS<</Type/Border/W 0/S/S>>
1312
+ /M({mod_date})
1313
+ >>'''.encode('utf-8')
1314
+
1315
+ compressed = zlib.compress(raw_text)
1316
+ base64_raw = base64.b16encode(compressed).lower().decode()
1317
+
1318
+ annotation = Element('Annotation')
1319
+ SubElement(annotation, 'Page') ############## newline #####################
1320
+ SubElement(annotation, 'Contents').text = area_text
1321
+ SubElement(annotation, 'ModDate').text = creation_date
1322
+ SubElement(annotation, 'Color').text = rgb_string_to_hex(color) ############## newline #####################
1323
+ SubElement(annotation, 'Type').text = 'Polygon'
1324
+ SubElement(annotation, 'ID').text = id_str
1325
+ SubElement(annotation, 'TypeInternal').text = type_internal
1326
+ SubElement(annotation, 'Raw').text = base64_raw
1327
+ SubElement(annotation, 'Index').text = str(index)
1328
+
1329
+ custom = SubElement(annotation, 'Custom')
1330
+ for key, value in custom_data.items():
1331
+ SubElement(custom, key).text = value
1332
+
1333
+ SubElement(annotation, 'Subject').text = subject
1334
+ SubElement(annotation, 'CreationDate').text = creation_date
1335
+ SubElement(annotation, 'Author').text = author
1336
+ SubElement(annotation, 'Label').text = label
1337
+ SubElement(annotation, 'Height').text = height
1338
+ SubElement(annotation, 'Width').text = width
1339
+
1340
+
1341
+ return annotation
1342
+
1343
+
1344
+ def save_multiple_annotations_count_bax(annotations, output_path, column_order,pdfWidth,pdfHeight, num_pages): ##new parameter for page number handling
1345
+ """
1346
+ annotations: list of dicts, each with:
1347
+ - vertices: [x, y]
1348
+ - text: str (label)
1349
+ - author: ADR
1350
+ - custom_data: dict of custom field values
1351
+ - type_internal: str (e.g., Bluebeam.PDF.Annotations.AnnotationMeasureCount)
1352
+ - subject: str (e.g., Count Measurement)
1353
+ """
1354
+ doc = Element('Document', Version='1')
1355
+ #group annotations by page number
1356
+ annotations_by_page = defaultdict(list)
1357
+ for ann in annotations:
1358
+ page_num = ann.get('page', 1)
1359
+ annotations_by_page[page_num].append(ann)
1360
+ # Loop through ALL pages
1361
+ # for page_index, (page_num, page_annotations) in enumerate(sorted(annotations_by_page.items())):
1362
+ for page_index in range(num_pages): ##new line for page handling
1363
+ page = SubElement(doc, 'Page', Index=str(page_index))
1364
+ SubElement(page, 'Label').text = str(page_index + 1) ##new line for page handling
1365
+ SubElement(page, 'Width').text = str(pdfWidth)
1366
+ SubElement(page, 'Height').text = str(pdfHeight)
1367
+
1368
+ #adding annotations only if they exist
1369
+ for i, ann in enumerate(annotations_by_page.get(page_index + 1, [])): ## adjusted for page handling
1370
+
1371
+ annotation_xml = generate_annotation_xml_block_count(
1372
+ vertices=ann['vertices'],
1373
+ area_text=ann['text'],
1374
+ author=ann['author'],
1375
+ custom_data=ann['custom_data'],
1376
+ column_order=column_order,
1377
+ index=i,
1378
+ label=ann.get('label', 'label1'),
1379
+ height=ann.get('height', '123'),
1380
+ width=ann.get('width', '123'),
1381
+ color=ann.get('color', ''),
1382
+ countstyle=ann.get('countstyle', ''),
1383
+ countsize=ann.get('countsize','')
1384
+ )
1385
+ annotation_xml.find('Page').text = str(page_index+1) ## adjusted for page handling
1386
+ page.append(annotation_xml)
1387
+
1388
+ pretty_xml = parseString(tostring(doc)).toprettyxml(indent=" ")
1389
+ with open(output_path, 'w', encoding='utf-8') as f:
1390
+ f.write(pretty_xml)
1391
+
1392
+ print(f"Saved {len(annotations)} annotations to {output_path}")
1393
+ return pretty_xml
1394
+
1395
+
1396
+ #templates of countstyles so u can call e.g. CountStyles['Circle']
1397
+ CountStyles = {
1398
+ 'Circle': '/Circle',
1399
+ 'Diamond':'/Diamond',
1400
+ 'Triangle':'/Triangle',
1401
+ 'Square':'/Square',
1402
+ 'Checkmark':'/Checkmark',
1403
+ }
1404
+
1405
+ def convert_to_bytes(input_pdf_path):
1406
+ with open(input_pdf_path, "rb") as file:
1407
+ original_pdf_bytes = file.read()
1408
+ return original_pdf_bytes
1409
+
1410
+ def mirrored_points(x, y, height_plan):
1411
+ #'vertices': [[new_data[i][0][0][0], new_data[i][0][0][1]]],
1412
+ mirrored = []
1413
+ mirrored.append([x, height_plan - y])
1414
+ return mirrored
1415
+
1416
+ # Modified to adjust mirrored points
1417
+ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
1418
+ bax_annotations = []
1419
+ for i in range(len(new_data)):
1420
+ r,g,b = new_data[i][len(new_data[i])-1] # colorr
1421
+ R = str(float(r/255))
1422
+ G = str(float(g/255))
1423
+ B = str(float(b/255))
1424
+ vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
1425
+ if input_user_clmn_names[4] and input_user_clmn_names[5]:
1426
+ bax_annotations.append({
1427
+ 'vertices': vertix,
1428
+ 'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1429
+ 'author': 'ADR',
1430
+ 'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1431
+ 'label': new_data[i][1], #change label to whatever u want
1432
+ 'Height': heightat[i], #for tameem to change - i added any values'
1433
+ 'Width':widthat[i],
1434
+ 'page' : page_number,
1435
+ 'color':R+ ' '+G + ' '+B,# normalized (RGB --> R/255 G/255 B/255)
1436
+ 'countstyle': CountStyles['Circle'],
1437
+ 'countsize':'0.8' #how big or small is the count icon
1438
+ })
1439
+ else:
1440
+ # Fire mawgooda
1441
+ if input_user_clmn_names[4]:
1442
+ bax_annotations.append({
1443
+ 'vertices': vertix,
1444
+ 'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1445
+ 'author': 'ADR',
1446
+ 'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1447
+ 'label': new_data[i][1], #change label to whatever u want
1448
+ 'Height': heightat[i], #for tameem to change - i added any values'
1449
+ 'Width':widthat[i],
1450
+ 'page' : page_number,
1451
+ 'color':R+ ' '+G + ' '+B,# normalized (RGB --> R/255 G/255 B/255)
1452
+ 'countstyle': CountStyles['Circle'],
1453
+ 'countsize':'0.8' #how big or small is the count icon
1454
+ })
1455
+ elif input_user_clmn_names[5]:
1456
+ bax_annotations.append({
1457
+ 'vertices': vertix,
1458
+ 'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1459
+ 'author': 'ADR',
1460
+ 'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1461
+ 'label': new_data[i][1], #change label to whatever u want
1462
+ 'Height': heightat[i], #for tameem to change - i added any values'
1463
+ 'Width':widthat[i],
1464
+ 'page' : page_number,
1465
+ 'color':R+ ' '+G + ' '+B,# normalized (RGB --> R/255 G/255 B/255)
1466
+ 'countstyle': CountStyles['Circle'],
1467
+ 'countsize':'0.8' #how big or small is the count icon
1468
+ })
1469
+
1470
+
1471
+ return bax_annotations
1472
+
1473
+ # Modified to adjust mirrored points
1474
+ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_plan):
1475
+ bax_annotations = []
1476
+ for i in range(len(new_data)):
1477
+ #r,g,b = new_data[i][len(new_data[i])-2] # colorr
1478
+ r,g,b = new_data[i][2] # colorr
1479
+ R = str(float(r/255))
1480
+ G = str(float(g/255))
1481
+ B = str(float(b/255))
1482
+
1483
+ vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
1484
+ bax_annotations.append({
1485
+ 'vertices': vertix,
1486
+ 'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1487
+ 'author': 'ADR',
1488
+ 'custom_data': {'FireRating': 'N/A', 'AcousticRating': 'N/A', 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1489
+ 'label': new_data[i][1], #change label to whatever u want
1490
+ 'height': heightat[i], #for tameem to change - i added any values'
1491
+ 'width':widthat[i],
1492
+ 'page' : page_number,
1493
+ 'color':R+ ' '+G + ' '+B,# normalized (RGB --> R/255 G/255 B/255)
1494
+ 'countstyle': CountStyles['Circle'],
1495
+ 'countsize':'0.8' #how big or small is the count icon
1496
+ })
1497
+
1498
+ return bax_annotations
1499
+
1500
+ #Handle missing widths or heights in some rows
1501
+ def generate_separate_dimensions(widths):
1502
+ widthat = []
1503
+ heightat = []
1504
+ #pattern = r'(\d+)\s*mm wide x\s*(\d+)\s*mm high'
1505
+ pattern = r'(\d+(?:\.\d+)?)\s*mm wide x\s*(\d+(?:\.\d+)?)\s*mm high'
1506
+ for s in widths:
1507
+ match = re.match(pattern, s)
1508
+ if match:
1509
+ width = match.group(1)
1510
+ height = match.group(2)
1511
+ widthat.append(width)
1512
+ heightat.append(height)
1513
+ else:
1514
+ widthat.append("N/A")
1515
+ heightat.append("N/A")
1516
+ return widthat, heightat
1517
+
1518
+ def generate_bluebeam_columns_raw(column_names):
1519
+ """
1520
+ Generate BluebeamUserDefinedColumns XML as raw string, without headers or extra fields.
1521
+ """
1522
+ root = Element("BluebeamUserDefinedColumns")
1523
+
1524
+ for idx, name in enumerate(column_names):
1525
+ item = SubElement(root, "BSIColumnItem", Index=str(idx), Subtype="Text")
1526
+ SubElement(item, "Name").text = name
1527
+ SubElement(item, "DisplayOrder").text = str(idx)
1528
+ SubElement(item, "Deleted").text = "False"
1529
+ SubElement(item, "Multiline").text = "False"
1530
+
1531
+ # Convert to string and decode raw bytes
1532
+ return tostring(root, encoding="unicode", method="xml")
1533
+
1534
+
1535
+ # Example usage
1536
+ # column_names = ["mycustomcolumn"]
1537
+ #column_xml = generate_bluebeam_columns_raw(column_order)
1538
+
1539
+ #with open("count_type_Windows.xml", "w", encoding="utf-8") as f:
1540
+ # f.write(column_xml)
1541
+
1542
+ #print(column_xml)
1543
 
1544
  def mainRun(schedule, plan, searcharray):
1545
  print("mainRun is RUNNING")
 
1555
  print(f"search array: {searcharray}")
1556
 
1557
  dfs = extract_tables(schedule)
1558
+ pdf_widths = []
1559
+ pdf_heights = []
1560
+ pdfs_count_type = []
1561
+
1562
+ annotation_counter = 0
1563
+ page_number = 0
1564
+ bax_annotations_all_inputs = [] #for the same plan
1565
+ #pdfs = []
1566
  for p in plan:
1567
+ annotation_counter +=1
1568
+ page_number +=1
1569
  pdf_document = fitz.open("pdf", p)
1570
  # Get the first page (0-indexed)
1571
  page = pdf_document[0]
1572
  rect = page.rect # Rectangle: contains x0, y0, x1, y1
1573
 
1574
+ width_plan = page.cropbox.width # or: width = rect.x1 - rect.x0
1575
+ height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
1576
+ width_plan = math.ceil(width_plan)
1577
+ height_plan = math.ceil(height_plan)
 
 
 
 
 
1578
 
1579
  for j in range(len(searcharray)):
1580
  user_input = searcharray[j]
 
1594
  print("mafeesh secondary information")
1595
 
1596
  selected_columns_combined = get_selected_columns_all(dfs, user_input)
1597
+ if selected_columns_combined is None:
1598
+ dfs_normal = extract_tables(schedule)
1599
+ column_indices = get_column_indices_from_dfs_normal(dfs_normal, user_input)
1600
+ if len(dfs) == 1:
1601
+ selected_columns_combined = get_selected_columns_by_index(dfs[0], column_indices)
1602
+ if len(dfs) > 1:
1603
+ index_df = get_df_index(dfs, input_user_clmn_names)
1604
+ selected_columns_combined = get_selected_columns_by_index(dfs[index_df], column_indices)
1605
+ selected_columns_combined = selected_columns_combined.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
1606
+ selected_columns_combined = selected_columns_combined.fillna('N/A')
1607
  kelma = get_st_op_pattern(selected_columns_combined, user_input)
1608
  col_dict = get_similar_colors_all(selected_columns_combined)
1609
  flattened_list = get_flattened_tuples_list_all(col_dict)
 
1623
  width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1624
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
1625
  widths = get_widths_bb_format(cleaned_width, kelma)
1626
+
 
 
1627
 
1628
  secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1629
  all_print = mix_width_secondary(widths, secondary_printed_clean)
1630
+
1631
+ #Count type annotation
1632
+ widht_count, height_count = generate_separate_dimensions(widths)
1633
+ bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, height_plan)
1634
+ bax_annotations_all_inputs.append(bax)
1635
+
 
 
 
 
 
 
 
1636
 
1637
  else:
1638
  locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1639
  new_data = get_cleaned_data(locations)
1640
+ if len(new_data) == 0:
1641
+ continue
1642
  repeated_labels = get_repeated_labels(locations)
1643
  if kelma == None:
1644
  widths = get_width_info_tobeprinted(new_data)
 
1646
  width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1647
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
1648
  widths = get_widths_bb_format(cleaned_width, kelma)
1649
+ #count type annotation
1650
+ widht_count, height_count = generate_separate_dimensions(widths)
1651
+ bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, height_plan)
1652
+ bax_annotations_all_inputs.append(bax)
 
 
 
1653
 
1654
+ # if it is not byte type
1655
+ #pdfs_count_type.append(convert_to_bytes(p))
1656
+ pdfs_count_type.append(p)
1657
+ pdf_widths.append(width_plan)
1658
+ pdf_heights.append(height_plan)
1659
+ merged_pdf = merge_pdf_bytes_list(pdfs_count_type)
1660
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1661
+
1662
+ bax_annotation = []
1663
+ for bax_ann in bax_annotations_all_inputs:
1664
+ bax_annotation.extend(bax_ann)
1665
+
1666
+ column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
1667
+
1668
+
1669
+
1670
+ pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
1671
+ column_xml = generate_bluebeam_columns_raw(column_order)
1672
+
1673
+
1674
+ ##### SHOULD return pretty_xml, column_xml, merged_pdf
1675
 
1676
+ return pretty_xml, column_xml, merged_pdf
 
 
1677
 
1678
+ ''' not_found = []
1679
  doc2 =fitz.open('pdf',merged_pdf)
1680
  len_doc2 = len(doc2)
1681
  print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
 
1706
  x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1707
  list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1708
  return annotatedimg, doc2 , list1, repeated_labels , not_found
1709
+ '''