Marthee commited on
Commit
b36adba
·
verified ·
1 Parent(s): 4e64944

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +246 -530
Doors_Schedule.py CHANGED
@@ -1,4 +1,5 @@
1
  from collections import defaultdict
 
2
  import pandas as pd
3
  import random
4
  import math
@@ -116,7 +117,6 @@ def generate_current_table_without_cropping(clm_idx, clmn_name, df):
116
  return selected_df
117
 
118
 
119
-
120
  def crop_rename_table(indices, clmn_name, clmn_idx,df):
121
  #crop_at = (max(set(indices), key=indices.count)) + 1
122
  crop_at = max(indices) + 1
@@ -292,8 +292,10 @@ def get_column_name(user_input_m):
292
  # fixed column names
293
  fixed_list = ["door_id", "door_type", "width", "height"]
294
  for i in range(len(empty_indices)):
295
- if empty_indices[i] == 3:
296
- fixed_list[2] = "structural_opening"
 
 
297
  fixed_list[empty_indices[i]] = ""
298
 
299
  #finalize the column name structure
@@ -455,8 +457,18 @@ def find_text_in_plan(label, x):
455
  return substring_coordinates, words, point_list
456
 
457
 
458
- def get_selected_columns_by_index(df, column_index_list):
459
  selected_df = df.iloc[:, column_index_list]
 
 
 
 
 
 
 
 
 
 
460
  return selected_df
461
 
462
  ## Get the column indices from extract_tables(schedule)
@@ -478,9 +490,10 @@ def get_column_indices_from_dfs_normal(dfs, user_patterns):
478
  cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
479
  cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
480
 
481
- if len(cell_matches) == 0 and len(col_matches) == 0:
482
  continue
483
-
 
484
  else:
485
  #IN COLUMNS
486
  if len(col_matches) == len(non_empty_info):
@@ -498,6 +511,73 @@ def get_column_indices_from_dfs_normal(dfs, user_patterns):
498
 
499
  return column_index_list
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  # get the index of dataframe that has the maximum column matches in the dfs from model table detection
502
  def get_df_index(dfs, user_patterns):
503
  df_matches = []
@@ -615,6 +695,9 @@ def get_cleaned_data(locations):
615
  # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
616
  def get_width_info_tobeprinted(new_data):
617
  width_info_tobeprinted = []
 
 
 
618
  if len(new_data[0]) == 4:
619
  for _,_,_, w in new_data:
620
  #w = re.sub(r",", "", w)
@@ -626,7 +709,8 @@ def get_width_info_tobeprinted(new_data):
626
  h = re.sub(r",", "", h)
627
 
628
  #if w == "N/A":
629
- if w.isalpha():
 
630
  w = w
631
  else:
632
  if float(w).is_integer():
@@ -634,7 +718,8 @@ def get_width_info_tobeprinted(new_data):
634
  else:
635
  w = w
636
  #if h == "N/A":
637
- if h.isalpha():
 
638
  h = h
639
  else:
640
  if float(h).is_integer():
@@ -675,70 +760,14 @@ def get_widths_bb_format(cleaned_width, kelma):
675
  return widths
676
 
677
 
678
- '''def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
679
- width_info_tobeprinted = []
680
- secondary_info_tobeprinted = []
681
-
682
- if len(main_info) == 2 and len(secondary_info) == 1:
683
- for coords, label, acous, color in new_data:
684
- secondary_info_tobeprinted.append(acous)
685
-
686
-
687
- if len(main_info) == 2 and len(secondary_info) == 2:
688
- for coords, label, acous, fire, color in new_data:
689
- secondary_info_tobeprinted.append((acous, fire))
690
-
691
- if len(main_info) == 3 and len(secondary_info) == 1:
692
- for coords, label, width, acous, color in new_data:
693
- width_info_tobeprinted.append(width)
694
- secondary_info_tobeprinted.append(acous)
695
-
696
-
697
- if len(main_info) == 3 and len(secondary_info) == 2:
698
- for coords, label, width, acous, fire, color in new_data:
699
- width_info_tobeprinted.append(width)
700
- secondary_info_tobeprinted.append((acous, fire))
701
-
702
- if len(main_info) == 4 and len(secondary_info) == 1:
703
- for coords, label, width, height, acous, color in new_data:
704
- w = re.sub(r",", "", width)
705
- h = re.sub(r",", "", height)
706
- if float(w).is_integer():
707
- w = int(float(w))
708
- else:
709
- w = w
710
- if float(h).is_integer():
711
- h = int(float(h))
712
- else:
713
- h = h
714
- width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
715
- secondary_info_tobeprinted.append(acous)
716
 
 
 
 
 
 
 
717
 
718
- if len(main_info) == 4 and len(secondary_info) == 2:
719
- for coords, label, width, height, acous, fire, color in new_data:
720
- print(type(width))
721
- print(type(height))
722
- w = re.sub(r",", "", width)
723
- h = re.sub(r",", "", height)
724
- if w == "N/A":
725
- w = w
726
- else:
727
- if float(w).is_integer():
728
- w = int(float(w))
729
- else:
730
- w = w
731
- if h == "N/A":
732
- h = h
733
- else:
734
- if float(h).is_integer():
735
- h = int(float(h))
736
- else:
737
- h = h
738
- width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
739
- secondary_info_tobeprinted.append((acous, fire))
740
- return width_info_tobeprinted, secondary_info_tobeprinted
741
- '''
742
 
743
  def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
744
  width_info_tobeprinted = []
@@ -747,11 +776,13 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
747
  if len(main_info) == 2 and len(secondary_info) == 1:
748
  for coords, label, acous, color in new_data:
749
  secondary_info_tobeprinted.append(acous)
 
750
 
751
 
752
  if len(main_info) == 2 and len(secondary_info) == 2:
753
  for coords, label, acous, fire, color in new_data:
754
  secondary_info_tobeprinted.append((acous, fire))
 
755
 
756
  if len(main_info) == 3 and len(secondary_info) == 1:
757
  for coords, label, width, acous, color in new_data:
@@ -768,7 +799,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
768
  for coords, label, width, height, acous, color in new_data:
769
  w = re.sub(r",", "", width)
770
  h = re.sub(r",", "", height)
771
- if w.isalpha():
 
772
  w = w
773
  else:
774
  if float(w).is_integer():
@@ -776,7 +808,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
776
  else:
777
  w = w
778
  #if h == "N/A":
779
- if h.isalpha():
 
780
  h = h
781
  else:
782
  if float(h).is_integer():
@@ -784,7 +817,7 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
784
  else:
785
  h = h
786
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
787
- secondary_info_tobeprinted.append((acous, fire))
788
 
789
  if len(main_info) == 4 and len(secondary_info) == 2:
790
  for coords, label, width, height, acous, fire, color in new_data:
@@ -793,7 +826,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
793
  w = re.sub(r",", "", width)
794
  h = re.sub(r",", "", height)
795
  #if w == "N/A":
796
- if w.isalpha():
 
797
  w = w
798
  else:
799
  if float(w).is_integer():
@@ -801,7 +835,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
801
  else:
802
  w = w
803
  #if h == "N/A":
804
- if h.isalpha():
 
805
  h = h
806
  else:
807
  if float(h).is_integer():
@@ -916,6 +951,13 @@ def get_flattened_tuples_list_all(col_dict):
916
 
917
  return flattened_list
918
 
 
 
 
 
 
 
 
919
 
920
  #SECONDARY
921
  def get_cleaned_data_secondary(locations, main_info, secondary_info):
@@ -985,31 +1027,6 @@ def get_cleaned_data_secondary(locations, main_info, secondary_info):
985
 
986
  return new_data
987
 
988
- def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
989
- secondary_printed_clean = []
990
- if len(secondary_info) == 1:
991
- if any('acoustic' in col for col in selected_secondary_info.columns):
992
- for acous in secondary_tobeprinted:
993
- new_text = f"acoustic rating: {acous};"
994
- secondary_printed_clean.append(new_text)
995
- if any('fire' in col for col in selected_secondary_info.columns):
996
- for fire in secondary_tobeprinted:
997
- new_text = f"fire rating: {fire};"
998
- secondary_printed_clean.append(new_text)
999
- if len(secondary_info) == 2:
1000
- for fire, acous in secondary_tobeprinted:
1001
- new_text = f"fire rating: {fire}; acoustic rating: {acous};"
1002
- secondary_printed_clean.append(new_text)
1003
- print(new_text)
1004
- return secondary_printed_clean
1005
-
1006
-
1007
- def mix_width_secondary(widths, secondary_printed_clean):
1008
- all_print = []
1009
- for i in range(len(widths)):
1010
- newest_text = f"{widths[i]}; {secondary_printed_clean[i]}"
1011
- all_print.append(newest_text)
1012
- return all_print
1013
 
1014
  def merge_pdf_bytes_list(pdfs):
1015
  writer = PdfWriter()
@@ -1026,331 +1043,6 @@ def merge_pdf_bytes_list(pdfs):
1026
 
1027
  return output_stream.read()
1028
 
1029
- '''def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
1030
- pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1031
- pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
1032
-
1033
- page = pdf_document[0] # First page
1034
- if len(main_info) == 2 and len(secondary_info) == 1:
1035
- for loc in locations:
1036
- coor, lbl, acous, clr = loc
1037
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1038
- for cor in coor:
1039
- #Create a Circle annotation (Count Markup)
1040
- annot = page.add_circle_annot(
1041
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1042
- )
1043
-
1044
- #Assign required Bluebeam metadata
1045
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1046
- annot.set_border(width=2) # Border thickness
1047
- annot.set_opacity(1) # Fully visible
1048
-
1049
- #Set annotation properties for Bluebeam Count detection
1050
- annot.set_info("name", lbl) # Unique name for each count
1051
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1052
- annot.set_info("title", lbl) # Optional
1053
- annot.update() # Apply changes
1054
-
1055
- if len(main_info) == 2 and len(secondary_info) == 2:
1056
- for loc in locations:
1057
- coor, lbl, acous, fire, clr = loc
1058
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1059
- for cor in coor:
1060
- #Create a Circle annotation (Count Markup)
1061
- annot = page.add_circle_annot(
1062
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1063
- )
1064
-
1065
- #Assign required Bluebeam metadata
1066
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1067
- annot.set_border(width=2) # Border thickness
1068
- annot.set_opacity(1) # Fully visible
1069
-
1070
- #Set annotation properties for Bluebeam Count detection
1071
- annot.set_info("name", lbl) # Unique name for each count
1072
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1073
- annot.set_info("title", lbl) # Optional
1074
- annot.update() # Apply changes
1075
-
1076
- if len(main_info) == 3 and len(secondary_info) == 1:
1077
- for loc in locations:
1078
- if len(loc) != 5:
1079
- continue
1080
- coor, lbl, w, acous, clr = loc
1081
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1082
- for cor in coor:
1083
- #Create a Circle annotation (Count Markup)
1084
- annot = page.add_circle_annot(
1085
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1086
- )
1087
-
1088
- #Assign required Bluebeam metadata
1089
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1090
- annot.set_border(width=2) # Border thickness
1091
- annot.set_opacity(1) # Fully visible
1092
-
1093
- #Set annotation properties for Bluebeam Count detection
1094
- annot.set_info("name", lbl) # Unique name for each count
1095
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1096
- annot.set_info("title", lbl) # Optional
1097
- annot.update() # Apply changes
1098
-
1099
- if len(main_info) == 3 and len(secondary_info) == 2:
1100
- for loc in locations:
1101
- coor, lbl, w, acous, fire, clr = loc
1102
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1103
- for cor in coor:
1104
- #Create a Circle annotation (Count Markup)
1105
- annot = page.add_circle_annot(
1106
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1107
- )
1108
-
1109
- #Assign required Bluebeam metadata
1110
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1111
- annot.set_border(width=2) # Border thickness
1112
- annot.set_opacity(1) # Fully visible
1113
-
1114
- #Set annotation properties for Bluebeam Count detection
1115
- annot.set_info("name", lbl) # Unique name for each count
1116
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1117
- annot.set_info("title", lbl) # Optional
1118
- annot.update() # Apply changes
1119
-
1120
- if len(main_info) == 4 and len(secondary_info) == 1:
1121
- for loc in locations:
1122
- coor, lbl, w, h, acous, clr = loc
1123
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1124
- for cor in coor:
1125
- #Create a Circle annotation (Count Markup)
1126
- annot = page.add_circle_annot(
1127
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1128
- )
1129
-
1130
- #Assign required Bluebeam metadata
1131
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1132
- annot.set_border(width=2) # Border thickness
1133
- annot.set_opacity(1) # Fully visible
1134
-
1135
- #Set annotation properties for Bluebeam Count detection
1136
- annot.set_info("name", lbl) # Unique name for each count
1137
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1138
- annot.set_info("title", lbl) # Optional
1139
- annot.update() # Apply changes
1140
-
1141
- if len(main_info) == 4 and len(secondary_info) == 2:
1142
- for loc in locations:
1143
- coor, lbl, w, h, acous, fire, clr = loc
1144
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1145
- for cor in coor:
1146
- #Create a Circle annotation (Count Markup)
1147
- annot = page.add_circle_annot(
1148
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1149
- )
1150
-
1151
- #Assign required Bluebeam metadata
1152
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1153
- annot.set_border(width=2) # Border thickness
1154
- annot.set_opacity(1) # Fully visible
1155
-
1156
- #Set annotation properties for Bluebeam Count detection
1157
- annot.set_info("name", lbl) # Unique name for each count
1158
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1159
- annot.set_info("title", lbl) # Optional
1160
- annot.update() # Apply changes
1161
-
1162
-
1163
-
1164
- #Save modified PDF to a variable instead of a file
1165
- output_stream = io.BytesIO()
1166
- pdf_document.save(output_stream)
1167
- pdf_document.close()
1168
-
1169
- return output_stream.getvalue() # Return the modified PDF as bytes
1170
-
1171
- def modify_author_in_pypdf2(pdf_bytes, new_authors):
1172
- pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1173
- reader = PyPDF2.PdfReader(pdf_stream)
1174
- writer = PyPDF2.PdfWriter()
1175
-
1176
- author_index = 0 # Track author assignment
1177
-
1178
- for page in reader.pages:
1179
- if "/Annots" in page: #Check if annotations exist
1180
- for annot in page["/Annots"]:
1181
- annot_obj = annot.get_object()
1182
- # Assign each annotation a unique author
1183
- if len(new_authors) == 0:
1184
- break
1185
- if author_index < len(new_authors):
1186
- annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
1187
- author_index += 1 # Move to next author
1188
-
1189
- # If authors list is exhausted, keep the last one
1190
- else:
1191
- annot_obj.update({"/T": TextStringObject(new_authors[-1])})
1192
-
1193
- writer.add_page(page)
1194
-
1195
- #Save the modified PDF to a variable
1196
- output_stream = io.BytesIO()
1197
- writer.write(output_stream)
1198
- output_stream.seek(0)
1199
-
1200
- return output_stream.read()
1201
-
1202
- def add_bluebeam_count_annotations(pdf_bytes, locations):
1203
- pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1204
- pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
1205
-
1206
- page = pdf_document[0] # First page
1207
- print(f"length of locations 0 from not sec presence: {len(locations[0])}")
1208
-
1209
- for loc in locations:
1210
-
1211
- if len(loc) == 3:
1212
- coor, lbl, clr = loc
1213
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1214
- for cor in coor:
1215
- #Create a Circle annotation (Count Markup)
1216
- annot = page.add_circle_annot(
1217
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1218
- )
1219
-
1220
- #Assign required Bluebeam metadata
1221
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1222
- annot.set_border(width=2) # Border thickness
1223
- annot.set_opacity(1) # Fully visible
1224
-
1225
- #Set annotation properties for Bluebeam Count detection
1226
- annot.set_info("name", lbl) # Unique name for each count
1227
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1228
- annot.set_info("title", lbl) # Optional
1229
- annot.update() # Apply changes
1230
- if len(loc) == 4:
1231
- coor, lbl, clr,w = loc
1232
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1233
- for cor in coor:
1234
- #Create a Circle annotation (Count Markup)
1235
- annot = page.add_circle_annot(
1236
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1237
- )
1238
-
1239
- #Assign required Bluebeam metadata
1240
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1241
- annot.set_border(width=2) # Border thickness
1242
- annot.set_opacity(1) # Fully visible
1243
-
1244
- #Set annotation properties for Bluebeam Count detection
1245
- annot.set_info("name", lbl) # Unique name for each count
1246
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1247
- annot.set_info("title", lbl) # Optional
1248
- annot.update() # Apply changes
1249
-
1250
- if len(loc) == 5:
1251
- coor, lbl, clr,w,h = loc
1252
- clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1253
- for cor in coor:
1254
- #Create a Circle annotation (Count Markup)
1255
- annot = page.add_circle_annot(
1256
- fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1257
- )
1258
-
1259
- #Assign required Bluebeam metadata
1260
- annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1261
- annot.set_border(width=2) # Border thickness
1262
- annot.set_opacity(1) # Fully visible
1263
-
1264
- #Set annotation properties for Bluebeam Count detection
1265
- annot.set_info("name", lbl) # Unique name for each count
1266
- annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1267
- annot.set_info("title", lbl) # Optional
1268
- annot.update() # Apply changes
1269
-
1270
- #Save modified PDF to a variable instead of a file
1271
- output_stream = io.BytesIO()
1272
- pdf_document.save(output_stream)
1273
- pdf_document.close()
1274
-
1275
- return output_stream.getvalue() # Return the modified PDF as bytes
1276
-
1277
- def modify_author_in_pypdf2(pdf_bytes, new_authors):
1278
- pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1279
- reader = PyPDF2.PdfReader(pdf_stream)
1280
- writer = PyPDF2.PdfWriter()
1281
-
1282
- author_index = 0 # Track author assignment
1283
-
1284
- for page in reader.pages:
1285
- if "/Annots" in page: #Check if annotations exist
1286
- for annot in page["/Annots"]:
1287
- annot_obj = annot.get_object()
1288
- # Assign each annotation a unique author
1289
- if len(new_authors) == 0:
1290
- break
1291
- if author_index < len(new_authors):
1292
- annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
1293
- author_index += 1 # Move to next author
1294
-
1295
- # If authors list is exhausted, keep the last one
1296
- else:
1297
- annot_obj.update({"/T": TextStringObject(new_authors[-1])})
1298
-
1299
- writer.add_page(page)
1300
-
1301
- #Save the modified PDF to a variable
1302
- output_stream = io.BytesIO()
1303
- writer.write(output_stream)
1304
- output_stream.seek(0)
1305
-
1306
- return output_stream.read()
1307
-
1308
- def merge_pdf_bytes_list(pdfs):
1309
- writer = PdfWriter()
1310
-
1311
- for pdf_bytes in pdfs:
1312
- pdf_stream = io.BytesIO(pdf_bytes)
1313
- reader = PdfReader(pdf_stream)
1314
- for page in reader.pages:
1315
- writer.add_page(page)
1316
-
1317
- output_stream = io.BytesIO()
1318
- writer.write(output_stream)
1319
- output_stream.seek(0)
1320
-
1321
- return output_stream.read()
1322
-
1323
- def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
1324
-
1325
- if isinstance(input_pdf_path, bytes):
1326
- original_pdf_bytes = input_pdf_path
1327
- else:
1328
- with open(input_pdf_path, "rb") as file:
1329
- original_pdf_bytes = file.read()
1330
-
1331
- #Add Bluebeam-compatible count annotations
1332
- annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(original_pdf_bytes, locations, main_info, secondary_info)
1333
-
1334
- #Modify author field using PyPDF2
1335
- final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1336
-
1337
- return final_pdf_bytes
1338
-
1339
- def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1340
- #Load original PDF
1341
- if isinstance(input_pdf_path, bytes):
1342
- original_pdf_bytes = input_pdf_path
1343
- else:
1344
- with open(input_pdf_path, "rb") as file:
1345
- original_pdf_bytes = file.read()
1346
-
1347
- #Add Bluebeam-compatible count annotations
1348
- annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
1349
-
1350
- #Modify author field using PyPDF2
1351
- final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1352
- return final_pdf_bytes
1353
- '''
1354
 
1355
  def calculate_bounding_rect_count(vertices,padding):
1356
  x, y = vertices[0]
@@ -1365,13 +1057,6 @@ def rgb_string_to_hex(rgb_string):
1365
  return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
1366
 
1367
 
1368
-
1369
-
1370
-
1371
-
1372
-
1373
-
1374
-
1375
  def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
1376
  label: str = '',height:str='',width:str='',
1377
  color:str='',countstyle:str='',countsize:str=''):
@@ -1528,6 +1213,17 @@ def mirrored_points(x, y, height_plan):
1528
  mirrored = []
1529
  mirrored.append([x, height_plan - y])
1530
  return mirrored
 
 
 
 
 
 
 
 
 
 
 
1531
 
1532
  # Modified to adjust mirrored points
1533
  def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
@@ -1537,7 +1233,8 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
1537
  R = str(float(r/255))
1538
  G = str(float(g/255))
1539
  B = str(float(b/255))
1540
- vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
 
1541
  if input_user_clmn_names[4] and input_user_clmn_names[5]:
1542
  bax_annotations.append({
1543
  'vertices': vertix,
@@ -1559,7 +1256,7 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
1559
  'vertices': vertix,
1560
  'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1561
  'author': 'ADR',
1562
- 'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1563
  'label': new_data[i][1], #change label to whatever u want
1564
  'Height': heightat[i], #for tameem to change - i added any values'
1565
  'Width':widthat[i],
@@ -1573,7 +1270,7 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
1573
  'vertices': vertix,
1574
  'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1575
  'author': 'ADR',
1576
- 'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1577
  'label': new_data[i][1], #change label to whatever u want
1578
  'Height': heightat[i], #for tameem to change - i added any values'
1579
  'Width':widthat[i],
@@ -1596,7 +1293,8 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
1596
  G = str(float(g/255))
1597
  B = str(float(b/255))
1598
 
1599
- vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
 
1600
  bax_annotations.append({
1601
  'vertices': vertix,
1602
  'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
@@ -1613,6 +1311,7 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
1613
 
1614
  return bax_annotations
1615
 
 
1616
  #Handle missing widths or heights in some rows
1617
  def generate_separate_dimensions(widths):
1618
  widthat = []
@@ -1648,14 +1347,6 @@ def generate_bluebeam_columns_raw(column_names):
1648
  return tostring(root, encoding="unicode", method="xml")
1649
 
1650
 
1651
- # Example usage
1652
- # column_names = ["mycustomcolumn"]
1653
- #column_xml = generate_bluebeam_columns_raw(column_order)
1654
-
1655
- #with open("count_type_Windows.xml", "w", encoding="utf-8") as f:
1656
- # f.write(column_xml)
1657
-
1658
- #print(column_xml)
1659
 
1660
  def mainRun(schedule, plan, searcharray):
1661
  print("mainRun is RUNNING")
@@ -1668,11 +1359,12 @@ def mainRun(schedule, plan, searcharray):
1668
  p1_type = type(plan[0])
1669
  print(f"el mawgood fe p[0]: {p1_type}")
1670
 
1671
- print(f"search array: {searcharray}")
1672
-
1673
  #dfs = extract_tables(schedule)
1674
  print(f"type of schedule: {type(schedule)}")
1675
- dfs = extract_tables_model(schedule)
 
1676
  pdf_widths = []
1677
  pdf_heights = []
1678
  pdfs_count_type = []
@@ -1681,6 +1373,9 @@ def mainRun(schedule, plan, searcharray):
1681
  page_number = 0
1682
  bax_annotations_all_inputs = [] #for the same plan
1683
  #pdfs = []
 
 
 
1684
  for p in plan:
1685
  annotation_counter +=1
1686
  page_number +=1
@@ -1691,82 +1386,98 @@ def mainRun(schedule, plan, searcharray):
1691
 
1692
  width_plan = page.cropbox.width # or: width = rect.x1 - rect.x0
1693
  height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
1694
- width_plan = math.ceil(width_plan)
1695
- height_plan = math.ceil(height_plan)
1696
-
1697
- for j in range(len(searcharray)):
1698
- user_input = searcharray[j]
1699
-
1700
- secondary_presence = False
1701
- if user_input[4] or user_input[5]:
1702
- secondary_presence = True
1703
- main_info_, secondary_info_ = separate_main_secondary(user_input)
1704
- main_info = [item for item in main_info_ if item]
1705
- secondary_info = [item for item in secondary_info_ if item]
1706
- print("feh secondary information")
1707
- if user_input[4]:
1708
- print("Fire rate mawgooda")
1709
- if user_input[5]:
1710
- print("Acoustic Rate mawgooda")
1711
- else:
1712
- print("mafeesh secondary information")
1713
-
1714
- selected_columns_combined = get_selected_columns_all(dfs, user_input)
1715
- if selected_columns_combined is None:
1716
- dfs_normal = extract_tables(schedule)
1717
- column_indices = get_column_indices_from_dfs_normal(dfs_normal, user_input)
1718
- if len(dfs) == 1:
1719
- selected_columns_combined = get_selected_columns_by_index(dfs[0], column_indices)
1720
- if len(dfs) > 1:
1721
- index_df = get_df_index(dfs, input_user_clmn_names)
1722
- selected_columns_combined = get_selected_columns_by_index(dfs[index_df], column_indices)
1723
- selected_columns_combined = selected_columns_combined.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
1724
- selected_columns_combined = selected_columns_combined.fillna('N/A')
1725
- kelma = get_st_op_pattern(selected_columns_combined, user_input)
1726
- col_dict = get_similar_colors_all(selected_columns_combined)
1727
- flattened_list = get_flattened_tuples_list_all(col_dict)
1728
- plan_texts = read_text(p)
1729
-
1730
- if secondary_presence:
1731
- plan_texts = read_text(p)
1732
- locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1733
- new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1734
-
1735
- repeated_labels = get_repeated_labels(locations)
1736
- if kelma == None:
1737
- widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1738
- else:
1739
- width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1740
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1741
- widths = get_widths_bb_format(cleaned_width, kelma)
1742
-
1743
-
1744
- secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1745
- all_print = mix_width_secondary(widths, secondary_printed_clean)
1746
-
1747
- #Count type annotation
1748
- widht_count, height_count = generate_separate_dimensions(widths)
1749
- bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, height_plan)
1750
- bax_annotations_all_inputs.append(bax)
1751
-
1752
-
1753
- else:
1754
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1755
- new_data = get_cleaned_data(locations)
1756
- if len(new_data) == 0:
1757
- continue
1758
- repeated_labels = get_repeated_labels(locations)
1759
- if kelma == None:
1760
- widths = get_width_info_tobeprinted(new_data)
1761
- else:
1762
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1763
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1764
- widths = get_widths_bb_format(cleaned_width, kelma)
1765
- #count type annotation
1766
- widht_count, height_count = generate_separate_dimensions(widths)
1767
- bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, height_plan)
1768
- bax_annotations_all_inputs.append(bax)
1769
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1770
  # if it is not byte type
1771
  #pdfs_count_type.append(convert_to_bytes(p))
1772
  pdfs_count_type.append(p)
@@ -1781,14 +1492,21 @@ def mainRun(schedule, plan, searcharray):
1781
 
1782
  column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
1783
 
1784
-
1785
-
 
 
 
 
 
 
 
1786
  pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
1787
  column_xml = generate_bluebeam_columns_raw(column_order)
1788
 
1789
-
1790
  ##### SHOULD return pretty_xml, column_xml, merged_pdf
1791
- not_found = []
1792
  annotatedimgs=[]
1793
  doc2 =fitz.open('pdf',merged_pdf)
1794
  len_doc2 = len(doc2)
@@ -1816,6 +1534,4 @@ def mainRun(schedule, plan, searcharray):
1816
  v='stroke'
1817
  x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1818
  list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1819
- return annotatedimgs, doc2 , list1, repeated_labels , not_found, pretty_xml, column_xml
1820
-
1821
- # return annotatedimg, doc2 , list1, repeated_labels , not_found
 
1
  from collections import defaultdict
2
+ from collections import Counter
3
  import pandas as pd
4
  import random
5
  import math
 
117
  return selected_df
118
 
119
 
 
120
  def crop_rename_table(indices, clmn_name, clmn_idx,df):
121
  #crop_at = (max(set(indices), key=indices.count)) + 1
122
  crop_at = max(indices) + 1
 
292
  # fixed column names
293
  fixed_list = ["door_id", "door_type", "width", "height"]
294
  for i in range(len(empty_indices)):
295
+ if empty_indices[i] == 3 and empty_indices[i - 1] == 2:
296
+ fixed_list[2] = ""
297
+ if empty_indices[i] == 3 and not empty_indices[i - 1] == 2:
298
+ fixed_list[2] = "structural_opening"
299
  fixed_list[empty_indices[i]] = ""
300
 
301
  #finalize the column name structure
 
457
  return substring_coordinates, words, point_list
458
 
459
 
460
+ def get_selected_columns_by_index(df, column_index_list, user_patterns):
461
  selected_df = df.iloc[:, column_index_list]
462
+
463
+ # Rename columns to match the structure of the clr_dictionary
464
+ main_info, secondary_info = separate_main_secondary(user_patterns)
465
+ clmn_name_main = get_column_name(main_info)
466
+ clmn_name_secondary = get_column_name_secondary(secondary_info)
467
+ clmn_name = clmn_name_main + clmn_name_secondary
468
+
469
+ print(f"clmn_name from the function el 3amla moshkela: {clmn_name}")
470
+ selected_df.columns = clmn_name
471
+
472
  return selected_df
473
 
474
  ## Get the column indices from extract_tables(schedule)
 
490
  cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
491
  cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
492
 
493
+ if len(cell_matches) == 0 and len(col_matches) == 0 and i < len(dfs) - 1:
494
  continue
495
+ elif len(cell_matches) == 0 and len(col_matches) == 0:
496
+ column_index_list = None
497
  else:
498
  #IN COLUMNS
499
  if len(col_matches) == len(non_empty_info):
 
511
 
512
  return column_index_list
513
 
514
+
515
+
516
+ def find_missing_columns(complete_list, non_complete_list):
517
+
518
+ def normalize_text(text):
519
+ if not isinstance(text, str):
520
+ return ""
521
+ text = re.sub(r'\s+', '', text) # Remove all whitespace
522
+ return text.lower()
523
+ def normalize_text(text):
524
+ """
525
+ Normalize text by removing all whitespace, brackets, and converting to lowercase.
526
+ """
527
+ if not isinstance(text, str):
528
+ return ""
529
+ # Remove all whitespace characters (spaces, tabs, newlines)
530
+ text = re.sub(r'\s+', '', text)
531
+ # Remove brackets of any type
532
+ text = re.sub(r'[\(\)\[\]\{\}]', '', text)
533
+ return text.lower()
534
+
535
+ complete_list = complete_list
536
+ non_complete = non_complete_list
537
+
538
+ # Normalize non_complete just once for speed
539
+ normalized_non_complete = [normalize_text(item) for item in non_complete]
540
+
541
+ missing = []
542
+ for item in complete_list:
543
+ normalized_item = normalize_text(item)
544
+ if normalized_item not in normalized_non_complete:
545
+ missing.append(item)
546
+ #delete empty fields as it is the 6 fixed fields approach
547
+ missing = [item for item in missing if item]
548
+
549
+ #print(f"{missing} can't be found in the schedule, make sure you entered it right or try entering the first row information instead of the column names")
550
+ return missing
551
+
552
+ # Returns the columns the code failed to locate on the schedule
553
+ def check_missing(dfs, user_patterns):
554
+ all_words = []
555
+ for i in range(len(dfs)):
556
+ main_info, secondary_info = separate_main_secondary(user_patterns)
557
+ clmn_name_main = get_column_name(main_info)
558
+ non_empty_main_info = [item for item in main_info if item]
559
+
560
+ clmn_name_secondary = get_column_name_secondary(secondary_info)
561
+
562
+
563
+ non_empty_secondary_info = [item for item in secondary_info if item]
564
+
565
+ clmn_name = clmn_name_main + clmn_name_secondary
566
+ non_empty_info = non_empty_main_info + non_empty_secondary_info
567
+
568
+
569
+ cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
570
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
571
+
572
+ words = [dfs[i].iloc[row, col] for row, col in cell_matches]
573
+ all_words.append(words)
574
+
575
+ found_words = max(all_words, key=len)
576
+ print(found_words)
577
+ missings = find_missing_columns(user_patterns, found_words)
578
+
579
+ return missings
580
+
581
  # get the index of dataframe that has the maximum column matches in the dfs from model table detection
582
  def get_df_index(dfs, user_patterns):
583
  df_matches = []
 
695
  # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
696
  def get_width_info_tobeprinted(new_data):
697
  width_info_tobeprinted = []
698
+ if len(new_data[0]) < 4:
699
+ for _,_,_, in new_data:
700
+ width_info_tobeprinted.append("N/A mm wide x N/A mm high")
701
  if len(new_data[0]) == 4:
702
  for _,_,_, w in new_data:
703
  #w = re.sub(r",", "", w)
 
709
  h = re.sub(r",", "", h)
710
 
711
  #if w == "N/A":
712
+ #if w.isalpha():
713
+ if is_not_number(w):
714
  w = w
715
  else:
716
  if float(w).is_integer():
 
718
  else:
719
  w = w
720
  #if h == "N/A":
721
+ #if h.isalpha():
722
+ if is_not_number(h):
723
  h = h
724
  else:
725
  if float(h).is_integer():
 
760
  return widths
761
 
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
 
764
+ def is_not_number(s: str) -> bool:
765
+ try:
766
+ float(s) # accepts ints, floats, scientific notation
767
+ return False # it *is* a number
768
+ except ValueError:
769
+ return True # not a number
770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
771
 
772
  def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
773
  width_info_tobeprinted = []
 
776
  if len(main_info) == 2 and len(secondary_info) == 1:
777
  for coords, label, acous, color in new_data:
778
  secondary_info_tobeprinted.append(acous)
779
+ width_info_tobeprinted.append("N/A mm wide x N/A mm high")
780
 
781
 
782
  if len(main_info) == 2 and len(secondary_info) == 2:
783
  for coords, label, acous, fire, color in new_data:
784
  secondary_info_tobeprinted.append((acous, fire))
785
+ width_info_tobeprinted.append("N/A mm wide x N/A mm high")
786
 
787
  if len(main_info) == 3 and len(secondary_info) == 1:
788
  for coords, label, width, acous, color in new_data:
 
799
  for coords, label, width, height, acous, color in new_data:
800
  w = re.sub(r",", "", width)
801
  h = re.sub(r",", "", height)
802
+ #if w.isalpha():
803
+ if is_not_number(w):
804
  w = w
805
  else:
806
  if float(w).is_integer():
 
808
  else:
809
  w = w
810
  #if h == "N/A":
811
+ #if h.isalpha():
812
+ if is_not_number(h):
813
  h = h
814
  else:
815
  if float(h).is_integer():
 
817
  else:
818
  h = h
819
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
820
+ secondary_info_tobeprinted.append((acous))
821
 
822
  if len(main_info) == 4 and len(secondary_info) == 2:
823
  for coords, label, width, height, acous, fire, color in new_data:
 
826
  w = re.sub(r",", "", width)
827
  h = re.sub(r",", "", height)
828
  #if w == "N/A":
829
+ #if w.isalpha():
830
+ if is_not_number(w):
831
  w = w
832
  else:
833
  if float(w).is_integer():
 
835
  else:
836
  w = w
837
  #if h == "N/A":
838
+ #if h.isalpha():
839
+ if is_not_number(h):
840
  h = h
841
  else:
842
  if float(h).is_integer():
 
951
 
952
  return flattened_list
953
 
954
+ def get_flattened_tuples_list_no_doortype(selected_columns):
955
+ flattened_list_no_color = list(selected_columns.itertuples(name=None, index=False))
956
+ col = (0,0,255)
957
+ new_fl_list = []
958
+ for tu in flattened_list_no_color:
959
+ new_fl_list.append(tu + (col,))
960
+ return new_fl_list
961
 
962
  #SECONDARY
963
  def get_cleaned_data_secondary(locations, main_info, secondary_info):
 
1027
 
1028
  return new_data
1029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1030
 
1031
  def merge_pdf_bytes_list(pdfs):
1032
  writer = PdfWriter()
 
1043
 
1044
  return output_stream.read()
1045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046
 
1047
  def calculate_bounding_rect_count(vertices,padding):
1048
  x, y = vertices[0]
 
1057
  return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
1058
 
1059
 
 
 
 
 
 
 
 
1060
  def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
1061
  label: str = '',height:str='',width:str='',
1062
  color:str='',countstyle:str='',countsize:str=''):
 
1213
  mirrored = []
1214
  mirrored.append([x, height_plan - y])
1215
  return mirrored
1216
+ def point_mupdf_to_pdf(x, y, page):
1217
+ rect = page.rect
1218
+ mediabox = page.mediabox
1219
+
1220
+ H = float(rect.height)
1221
+
1222
+ # Convert and adjust for mediabox offset
1223
+ pdf_x = mediabox.x0 + x
1224
+ pdf_y = mediabox.y0 + (H - y)
1225
+
1226
+ return [[pdf_x, pdf_y]]
1227
 
1228
  # Modified to adjust mirrored points
1229
  def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
 
1233
  R = str(float(r/255))
1234
  G = str(float(g/255))
1235
  B = str(float(b/255))
1236
+ #vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
1237
+ vertix = point_mupdf_to_pdf(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
1238
  if input_user_clmn_names[4] and input_user_clmn_names[5]:
1239
  bax_annotations.append({
1240
  'vertices': vertix,
 
1256
  'vertices': vertix,
1257
  'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1258
  'author': 'ADR',
1259
+ 'custom_data': {'FireRating': secondary_tobeprinted[i], 'AcousticRating': 'N/A', 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1260
  'label': new_data[i][1], #change label to whatever u want
1261
  'Height': heightat[i], #for tameem to change - i added any values'
1262
  'Width':widthat[i],
 
1270
  'vertices': vertix,
1271
  'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1272
  'author': 'ADR',
1273
+ 'custom_data': {'FireRating': 'N/A', 'AcousticRating': secondary_tobeprinted[i], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
1274
  'label': new_data[i][1], #change label to whatever u want
1275
  'Height': heightat[i], #for tameem to change - i added any values'
1276
  'Width':widthat[i],
 
1293
  G = str(float(g/255))
1294
  B = str(float(b/255))
1295
 
1296
+ #vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
1297
+ vertix = point_mupdf_to_pdf(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
1298
  bax_annotations.append({
1299
  'vertices': vertix,
1300
  'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
 
1311
 
1312
  return bax_annotations
1313
 
1314
+
1315
  #Handle missing widths or heights in some rows
1316
  def generate_separate_dimensions(widths):
1317
  widthat = []
 
1347
  return tostring(root, encoding="unicode", method="xml")
1348
 
1349
 
 
 
 
 
 
 
 
 
1350
 
1351
  def mainRun(schedule, plan, searcharray):
1352
  print("mainRun is RUNNING")
 
1359
  p1_type = type(plan[0])
1360
  print(f"el mawgood fe p[0]: {p1_type}")
1361
 
1362
+
1363
+ print(f"length of search array: {len(searcharray)}")
1364
  #dfs = extract_tables(schedule)
1365
  print(f"type of schedule: {type(schedule)}")
1366
+ print(f"length of schedules: {len(schedule)}")
1367
+
1368
  pdf_widths = []
1369
  pdf_heights = []
1370
  pdfs_count_type = []
 
1373
  page_number = 0
1374
  bax_annotations_all_inputs = [] #for the same plan
1375
  #pdfs = []
1376
+ not_found_list = []
1377
+ repeated_labels_list = []
1378
+ missings = []
1379
  for p in plan:
1380
  annotation_counter +=1
1381
  page_number +=1
 
1386
 
1387
  width_plan = page.cropbox.width # or: width = rect.x1 - rect.x0
1388
  height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
1389
+ #width_plan = math.ceil(width_plan)
1390
+ #height_plan = math.ceil(height_plan)
1391
+ for k in range(len(schedule)):
1392
+ dfs = extract_tables_model(schedule[k])
1393
+ user_input_this_schedule = searcharray[k]
1394
+ for j in range(len(user_input_this_schedule)):
1395
+ user_input = user_input_this_schedule[j]
1396
+
1397
+ secondary_presence = False
1398
+ if user_input[4] or user_input[5]:
1399
+ secondary_presence = True
1400
+ main_info_, secondary_info_ = separate_main_secondary(user_input)
1401
+ main_info = [item for item in main_info_ if item]
1402
+ secondary_info = [item for item in secondary_info_ if item]
1403
+ print("feh secondary information")
1404
+ if user_input[4]:
1405
+ print("Fire rate mawgooda")
1406
+ if user_input[5]:
1407
+ print("Acoustic Rate mawgooda")
1408
+ else:
1409
+ print("mafeesh secondary information")
1410
+
1411
+ selected_columns_combined = get_selected_columns_all(dfs, user_input)
1412
+ if selected_columns_combined is None:
1413
+ dfs_normal = extract_tables(schedule[k])
1414
+ column_indices = get_column_indices_from_dfs_normal(dfs_normal, user_input)
1415
+ if column_indices is None:
1416
+ missing_clmns = check_missing(dfs, user_input)
1417
+ missing_message = f"{missing_clmns} can't be extracted from table input {j+1} in schedule {k+1}"
1418
+ missings.append(missing_message)
1419
+
1420
+ continue # continue to the next user input
1421
+ if len(dfs) == 1:
1422
+ selected_columns_combined = get_selected_columns_by_index(dfs[0], column_indices, user_input)
1423
+ if len(dfs) > 1:
1424
+ index_df = get_df_index(dfs, user_input)
1425
+ selected_columns_combined = get_selected_columns_by_index(dfs[index_df], column_indices, user_input)
1426
+ selected_columns_combined = selected_columns_combined.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
1427
+ selected_columns_combined = selected_columns_combined.fillna('N/A')
1428
+ selected_columns_combined = selected_columns_combined.replace(r'(?i)\bn/a\b', 'N/A', regex=True)
1429
+ kelma = get_st_op_pattern(selected_columns_combined, user_input)
1430
+ if "door_type" in selected_columns_combined.columns:
1431
+ col_dict = get_similar_colors_all(selected_columns_combined)
1432
+ flattened_list = get_flattened_tuples_list_all(col_dict)
1433
+ else:
1434
+ if secondary_presence:
1435
+ main_info = main_info + [""]
1436
+ flattened_list = get_flattened_tuples_list_no_doortype(selected_columns_combined)
1437
+ plan_texts = read_text(p)
1438
+
1439
+ if secondary_presence:
1440
+ locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1441
+ not_found_list.append(not_found)
1442
+ new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1443
+
1444
+ repeated_labels = get_repeated_labels(locations)
1445
+ repeated_labels = list(repeated_labels)
1446
+ repeated_labels_list.append(repeated_labels)
1447
+ if kelma == None:
1448
+ widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1449
+ else:
1450
+ width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1451
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1452
+ widths = get_widths_bb_format(cleaned_width, kelma)
1453
+
1454
+
1455
+ #Count type annotation
1456
+ widht_count, height_count = generate_separate_dimensions(widths)
1457
+ bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, page)
1458
+ bax_annotations_all_inputs.append(bax)
1459
+
1460
+
1461
+ else:
1462
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1463
+ not_found_list.append(not_found)
1464
+ new_data = get_cleaned_data(locations)
1465
+ if len(new_data) == 0:
1466
+ continue
1467
+ repeated_labels = get_repeated_labels(locations)
1468
+ repeated_labels = list(repeated_labels)
1469
+ repeated_labels_list.append(repeated_labels)
1470
+ if kelma == None:
1471
+ widths = get_width_info_tobeprinted(new_data)
1472
+ else:
1473
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1474
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1475
+ widths = get_widths_bb_format(cleaned_width, kelma)
1476
+ #count type annotation
1477
+ widht_count, height_count = generate_separate_dimensions(widths)
1478
+ bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, page)
1479
+ bax_annotations_all_inputs.append(bax)
1480
+
1481
  # if it is not byte type
1482
  #pdfs_count_type.append(convert_to_bytes(p))
1483
  pdfs_count_type.append(p)
 
1492
 
1493
  column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
1494
 
1495
+ ## Getting the not found in all plans
1496
+ flattened_not_found_list = [item for sublist in not_found_list for item in sublist]
1497
+ counts_not_found = Counter(flattened_not_found_list)
1498
+ not_found_any_plan = []
1499
+ for key, value in counts_not_found.items():
1500
+ if value == len(pdfs_count_type):
1501
+ not_found_any_plan.append(key)
1502
+
1503
+ flattened_repeated_labels_list = [item for sublist in repeated_labels_list for item in sublist]
1504
  pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
1505
  column_xml = generate_bluebeam_columns_raw(column_order)
1506
 
1507
+ repeated_labels = flattened_repeated_labels_list
1508
  ##### SHOULD return pretty_xml, column_xml, merged_pdf
1509
+ not_found = not_found_any_plan
1510
  annotatedimgs=[]
1511
  doc2 =fitz.open('pdf',merged_pdf)
1512
  len_doc2 = len(doc2)
 
1534
  v='stroke'
1535
  x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1536
  list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1537
+ return annotatedimgs, doc2 , list1, repeated_labels , not_found, pretty_xml, column_xml