Marthee commited on
Commit
e8a0a36
·
verified ·
1 Parent(s): 5f1c29c

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +500 -120
Doors_Schedule.py CHANGED
@@ -92,9 +92,6 @@ def flexible_search(df, search_terms):
92
  return results
93
 
94
 
95
-
96
-
97
-
98
  def generate_current_table_without_cropping(clm_idx, clmn_name, df):
99
  selected_df = df.iloc[:, clm_idx]
100
  print("hello I generated the selected columns table without cropping")
@@ -264,10 +261,131 @@ def get_selected_columns(dfs, user_patterns):
264
 
265
 
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
268
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
269
  def get_st_op_pattern(selected_columns, user_input):
270
- target = 'structural opening'
271
  if target in selected_columns.columns:
272
  name = user_input[2]
273
  return name
@@ -455,6 +573,7 @@ def get_cleaned_width(width_info_tobeprinted):
455
  cleaned_width.append(clean_dimensions(w))
456
  return cleaned_width
457
 
 
458
  def get_widths_bb_format(cleaned_width, kelma):
459
  pattern = r"\bW(?:idth)?\s*[×x]\s*H(?:eight)?\b"
460
  match = re.search(pattern, kelma)
@@ -566,7 +685,7 @@ def get_similar_colors_secondary(selected_columns_new, user_input):
566
 
567
  return dict(col_dict)
568
 
569
- def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
570
  width_info_tobeprinted = []
571
  secondary_info_tobeprinted = []
572
 
@@ -608,6 +727,50 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
608
  h = int(float(h))
609
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
610
  secondary_info_tobeprinted.append((acous, fire))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  return width_info_tobeprinted, secondary_info_tobeprinted
612
 
613
  def get_flattened_tuples_list_SECONDARY(col_dict):
@@ -710,6 +873,60 @@ def get_word_locations_plan_secondary(flattened_list, plan_texts, main_info, sec
710
  locations.append((location, lbl, w, h, clr,acoustic))
711
  return locations, not_found
712
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  #SECONDARY
714
  def get_cleaned_data_secondary(locations, main_info, secondary_info):
715
  processed = defaultdict(int)
@@ -737,7 +954,7 @@ def get_cleaned_data_secondary(locations, main_info, secondary_info):
737
 
738
 
739
  if len(main_info) == 3 and len(secondary_info) == 1:
740
- for coords, label, widht, color, acous in locations:
741
  if len(coords)>1:
742
  index = processed[label] % len(coords) # Round-robin indexing
743
  new_coord = [coords[index]] # Pick the correct coordinate
@@ -797,7 +1014,7 @@ def get_cleaned_data_gpt(locations):
797
 
798
  return new_data
799
 
800
- def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
801
  secondary_printed_clean = []
802
  if len(secondary_info) == 1:
803
  if any('Acoustic' in col for col in selected_secondary_info.columns):
@@ -813,8 +1030,28 @@ def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprint
813
  new_text = f"fire rating: {fire}; acoustic rating: {acous}"
814
  secondary_printed_clean.append(new_text)
815
  print(new_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  return secondary_printed_clean
817
 
 
818
  def mix_width_secondary(widths, secondary_printed_clean):
819
  all_print = []
820
  for i in range(len(widths)):
@@ -829,7 +1066,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
829
  page = pdf_document[0] # First page
830
  if len(main_info) == 2 and len(secondary_info) == 1:
831
  for loc in locations:
832
- coor, lbl, clr, acous = loc
833
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
834
  for cor in coor:
835
  #Create a Circle annotation (Count Markup)
@@ -850,7 +1087,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
850
 
851
  if len(main_info) == 2 and len(secondary_info) == 2:
852
  for loc in locations:
853
- coor, lbl, clr, acous, fire = loc
854
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
855
  for cor in coor:
856
  #Create a Circle annotation (Count Markup)
@@ -871,7 +1108,9 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
871
 
872
  if len(main_info) == 3 and len(secondary_info) == 1:
873
  for loc in locations:
874
- coor, lbl, w, clr, acous = loc
 
 
875
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
876
  for cor in coor:
877
  #Create a Circle annotation (Count Markup)
@@ -892,7 +1131,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
892
 
893
  if len(main_info) == 3 and len(secondary_info) == 2:
894
  for loc in locations:
895
- coor, lbl, w, clr, acous, fire = loc
896
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
897
  for cor in coor:
898
  #Create a Circle annotation (Count Markup)
@@ -913,7 +1152,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
913
 
914
  if len(main_info) == 4 and len(secondary_info) == 1:
915
  for loc in locations:
916
- coor, lbl, w, h, clr, acous = loc
917
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
918
  for cor in coor:
919
  #Create a Circle annotation (Count Markup)
@@ -934,7 +1173,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
934
 
935
  if len(main_info) == 4 and len(secondary_info) == 2:
936
  for loc in locations:
937
- coor, lbl, w, h, clr, acous, fire = loc
938
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
939
  for cor in coor:
940
  #Create a Circle annotation (Count Markup)
@@ -961,6 +1200,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
961
  pdf_document.close()
962
 
963
  return output_stream.getvalue() # Return the modified PDF as bytes
 
964
  def get_user_input(user_words):
965
  user_input = []
966
  for item in user_words:
@@ -1015,7 +1255,7 @@ import PyPDF2
1015
  import io
1016
  from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
1017
 
1018
- def add_bluebeam_count_annotations(pdf_bytes, locations):
1019
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1020
  pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
1021
 
@@ -1087,6 +1327,83 @@ def add_bluebeam_count_annotations(pdf_bytes, locations):
1087
  pdf_document.close()
1088
 
1089
  return output_stream.getvalue() # Return the modified PDF as bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
  def get_user_input(user_words):
1091
  user_input = []
1092
  for item in user_words:
@@ -1157,115 +1474,178 @@ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1157
  # #Save the final modified PDF to disk
1158
  # with open(output_pdf_path, "wb") as file:
1159
  # file.write(final_pdf_bytes)
1160
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161
  def mainRun(schedule, plan, searcharray):
1162
- #print(type(plan))
1163
- eltype = type(plan)
1164
- print(f"el type beta3 variable plan:: {eltype}")
1165
- len_plan = len(plan)
1166
- print(f"length of the plan's array is: {len_plan}")
1167
- p1_type = type(plan[0])
1168
- print(f"el mawgood fe p[0]: {p1_type}")
1169
-
1170
- user_input = get_user_input(searcharray)
1171
- secondary_info_presence = False
1172
- if len(user_input) > 4:
1173
- secondary_info_presence = True
1174
- secondary_info = user_input[4:]
1175
- if not user_input[3]:
1176
- main_info = user_input[:3]
1177
- elif len(user_input) > 4:
1178
- main_info = user_input[:4]
1179
-
1180
-
1181
- dfs = extract_tables(schedule)
1182
-
1183
- if secondary_info_presence:
1184
- selected_columns_new = get_selected_columns(dfs, main_info)
1185
- selected_secondary_info = get_secondary_info(dfs, secondary_info)
1186
- selected_secondary_info = selected_secondary_info.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
1187
- selected_columns_combined = pd.concat([selected_columns_new, selected_secondary_info], axis=1)
1188
- kelma = get_st_op_pattern(selected_columns_new, user_input)
1189
- col_dict = get_similar_colors_secondary(selected_columns_combined, user_input)
1190
- flattened_list2 = get_flattened_tuples_list_SECONDARY(col_dict)
1191
 
1192
- pdfs = []
1193
- for p in plan:
1194
- plan_texts = read_text(p)
1195
- locations, not_found = get_word_locations_plan_secondary(flattened_list2,plan_texts, main_info, secondary_info)
1196
- new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1197
- repeated_labels = get_repeated_labels(locations)
1198
- if kelma == None:
1199
- #widths = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1200
- widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1201
- else:
1202
- width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1203
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1204
- widths = get_widths_bb_format(cleaned_width, kelma)
1205
- secondary_printed_clean = get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info)
1206
- all_print = mix_width_secondary(widths, secondary_printed_clean)
1207
- final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", new_data3, all_print, main_info, secondary_info)
1208
- pdfs.append(final_pdf_bytes)
1209
-
1210
 
1211
- else:
1212
- selected_columns_new = get_selected_columns(dfs, user_input)
1213
- kelma = get_st_op_pattern(selected_columns_new, user_input)
1214
- col_dict = get_similar_colors(selected_columns_new)
1215
- flattened_list = get_flattened_tuples_list(col_dict)
1216
-
1217
- pdfs = []
1218
- for p in plan:
1219
- print(f" p in plan is {type(p)}")
1220
- print(p)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1221
  plan_texts = read_text(p)
1222
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1223
- new_data = get_cleaned_data(locations)
1224
- repeated_labels = get_repeated_labels(locations)
1225
- if kelma == None:
1226
- widths = get_width_info_tobeprinted(new_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1227
  else:
1228
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1229
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1230
- widths = get_widths_bb_format(cleaned_width, kelma)
1231
- final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1232
- pdfs.append(final_pdf_bytes)
1233
-
1234
- if selected_columns_new.shape[1] == 2:
1235
- widths = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1236
 
1237
- merged_pdf = merge_pdf_bytes_list(pdfs)
1238
- print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1239
- not_found = []
1240
- doc2 =fitz.open('pdf',merged_pdf)
1241
- len_doc2 = len(doc2)
1242
- print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
1243
- page=doc2[0]
1244
- pix = page.get_pixmap() # render page to an image
1245
- pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
1246
- img=np.array(pl)
1247
- annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
1248
-
1249
-
1250
- list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
1251
-
1252
- # for page in doc:
1253
- for page in doc2:
1254
- # Iterate through annotations on the page
1255
- for annot in page.annots():
1256
- # Get the color of the annotation
1257
- annot_color = annot.colors
1258
- if annot_color is not None:
1259
- # annot_color is a dictionary with 'stroke' and 'fill' keys
1260
- stroke_color = annot_color.get('stroke') # Border color
1261
- fill_color = annot_color.get('fill') # Fill color
1262
- if fill_color:
1263
- v='fill'
1264
- # print('fill')
1265
- if stroke_color:
1266
- v='stroke'
1267
- x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1268
- list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1269
- return annotatedimg, doc2 , list1, repeated_labels , not_found
1270
-
1271
-
 
92
  return results
93
 
94
 
 
 
 
95
  def generate_current_table_without_cropping(clm_idx, clmn_name, df):
96
  selected_df = df.iloc[:, clm_idx]
97
  print("hello I generated the selected columns table without cropping")
 
261
 
262
 
263
 
264
+ def separate_main_secondary(input_user_clmn_names):
265
+ main_info = input_user_clmn_names[:4]
266
+ secondary_info = input_user_clmn_names[4:]
267
+ return main_info, secondary_info
268
+
269
+
270
+ # take main info
271
+ def get_column_name(user_input_m):
272
+ #get empty indices
273
+ empty_indices = [i for i, v in enumerate(user_input_m) if v == '']
274
+
275
+ # fixed column names
276
+ fixed_list = ["door_id", "door_type", "width", "height"]
277
+ for i in range(len(empty_indices)):
278
+ if empty_indices[i] == 3:
279
+ fixed_list[2] = "structural_opening"
280
+ fixed_list[empty_indices[i]] = ""
281
+
282
+ #finalize the column name structure
283
+ clmn_name_m = [i for i in fixed_list if i]
284
+
285
+ return clmn_name_m
286
+
287
+ # take secondary info
288
+ def get_column_name_secondary(user_input_m):
289
+ #get empty indices
290
+ empty_indices = [i for i, v in enumerate(user_input_m) if v == '']
291
+
292
+ # fixed column names
293
+ fixed_list = ["fire_rate", "acoustic_rate"]
294
+ for i in range(len(empty_indices)):
295
+ fixed_list[empty_indices[i]] = ""
296
+
297
+ #finalize the column name structure
298
+ clmn_name_m = [i for i in fixed_list if i]
299
+
300
+ return clmn_name_m
301
+
302
+
303
+ #handling both main and secondary info together in one table
304
+ def get_selected_columns_all(dfs, user_patterns):
305
+ selected_columns = []
306
+ selected_columns_new = None # Initialize selected_columns_new to None
307
+
308
+ for i in range(len(dfs)):
309
+
310
+
311
+
312
+
313
+
314
+ main_info, secondary_info = separate_main_secondary(user_patterns)
315
+ clmn_name_main = get_column_name(main_info)
316
+ non_empty_main_info = [item for item in main_info if item]
317
+
318
+ clmn_name_secondary = get_column_name_secondary(secondary_info)
319
+
320
+
321
+ non_empty_secondary_info = [item for item in secondary_info if item]
322
+
323
+ clmn_name = clmn_name_main + clmn_name_secondary
324
+ non_empty_info = non_empty_main_info + non_empty_secondary_info
325
+
326
+ #print(f"main info: {main_info}")
327
+ print(f"clmn name: {clmn_name}")
328
+ print(f"non-empty info: {non_empty_info}")
329
+ #print(f"length of non-empty info: {len(non_empty_main_info)}")
330
+
331
+
332
+ cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
333
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
334
+
335
+ print(f"length of cell_matches: {len(cell_matches)}")
336
+ print(f"cell_matches: {cell_matches}")
337
+ #clmn_name = map_user_input_to_standard_labels(user_patterns)
338
+ #if len(clmn_name) < len(user_patterns):
339
+
340
+
341
+
342
+
343
+ print(clmn_name)
344
+
345
+ if len(cell_matches) == 0 and len(col_matches) == 0:
346
+ print(f"this is df {i}, SEARCH IN ANOTHER DF")
347
+
348
+ else:
349
+ #IN COLUMNS
350
+ if len(col_matches) == len(non_empty_info):
351
+ column_index_list = get_column_index(col_matches)
352
+ print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
353
+ #print(len(clm_idx))
354
+ #details in another table
355
+ print(column_index_list)
356
+ if len(dfs[i]) <10:
357
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
358
+ #break
359
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
360
+ #details in the same table
361
+ if len(dfs[i]) >10:
362
+ selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
363
+ #break
364
+
365
+ #IN CELLS
366
+ if len(cell_matches) == len(non_empty_info):
367
+ row_index_list, column_index_list = get_row_column_indices(cell_matches)
368
+ print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
369
+
370
+ #details in another table
371
+ #if len(dfs[i]) <2:
372
+ #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
373
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
374
+ selected_columns_new = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
375
+
376
+ break
377
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
378
+ ##details in the same table
379
+ #if len(dfs[i]) >2:
380
+ # #print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
381
+ #break
382
+ return selected_columns_new
383
+
384
+
385
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
386
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
387
  def get_st_op_pattern(selected_columns, user_input):
388
+ target = 'structural_opening'
389
  if target in selected_columns.columns:
390
  name = user_input[2]
391
  return name
 
573
  cleaned_width.append(clean_dimensions(w))
574
  return cleaned_width
575
 
576
+
577
  def get_widths_bb_format(cleaned_width, kelma):
578
  pattern = r"\bW(?:idth)?\s*[×x]\s*H(?:eight)?\b"
579
  match = re.search(pattern, kelma)
 
685
 
686
  return dict(col_dict)
687
 
688
+ '''def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
689
  width_info_tobeprinted = []
690
  secondary_info_tobeprinted = []
691
 
 
727
  h = int(float(h))
728
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
729
  secondary_info_tobeprinted.append((acous, fire))
730
+ return width_info_tobeprinted, secondary_info_tobeprinted'''
731
+
732
+ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
733
+ width_info_tobeprinted = []
734
+ secondary_info_tobeprinted = []
735
+
736
+ if len(main_info) == 2 and len(secondary_info) == 1:
737
+ for coords, label, acous, color in new_data:
738
+ secondary_info_tobeprinted.append(acous)
739
+
740
+
741
+ if len(main_info) == 2 and len(secondary_info) == 2:
742
+ for coords, label, acous, fire, color in new_data:
743
+ secondary_info_tobeprinted.append((acous, fire))
744
+
745
+ if len(main_info) == 3 and len(secondary_info) == 1:
746
+ for coords, label, width, acous, color in new_data:
747
+ width_info_tobeprinted.append(width)
748
+ secondary_info_tobeprinted.append(acous)
749
+
750
+
751
+ if len(main_info) == 3 and len(secondary_info) == 2:
752
+ for coords, label, width, acous, fire, color in new_data:
753
+ width_info_tobeprinted.append(width)
754
+ secondary_info_tobeprinted.append((acous, fire))
755
+
756
+ if len(main_info) == 4 and len(secondary_info) == 1:
757
+ for coords, label, width, height, acous, color in new_data:
758
+ w = re.sub(r",", "", width)
759
+ h = re.sub(r",", "", height)
760
+ w = int(float(w))
761
+ h = int(float(h))
762
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
763
+ secondary_info_tobeprinted.append(acous)
764
+
765
+
766
+ if len(main_info) == 4 and len(secondary_info) == 2:
767
+ for coords, label, width, height, acous, fire, color in new_data:
768
+ w = re.sub(r",", "", width)
769
+ h = re.sub(r",", "", height)
770
+ w = int(float(w))
771
+ h = int(float(h))
772
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
773
+ secondary_info_tobeprinted.append((acous, fire))
774
  return width_info_tobeprinted, secondary_info_tobeprinted
775
 
776
  def get_flattened_tuples_list_SECONDARY(col_dict):
 
873
  locations.append((location, lbl, w, h, clr,acoustic))
874
  return locations, not_found
875
 
876
+ ### newest, accept combined table
877
+ from collections import defaultdict
878
+ import random
879
+
880
+ def get_similar_colors_all(selected_columns_new):
881
+ def generate_rgb():
882
+ return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
883
+
884
+ unique_keys = selected_columns_new['door_type'].unique()
885
+ key_colors = {key: generate_rgb() for key in unique_keys}
886
+
887
+ #Column fields
888
+ clmns_fields = selected_columns_new.columns.to_list()
889
+
890
+ def col_template():
891
+ d = {
892
+ 'values': [],
893
+ 'color': None
894
+ }
895
+ for field in clmns_fields:
896
+ d[field] = []
897
+ return d
898
+
899
+ col_dict = defaultdict(col_template)
900
+
901
+ for _, row in selected_columns_new.iterrows():
902
+ key = row['door_type']
903
+ col_dict[key]['values'].append(row['door_id'])
904
+
905
+ for field in clmns_fields:
906
+ col_dict[key][field].append(row.get(field, None))
907
+
908
+ col_dict[key]['color'] = key_colors[key]
909
+
910
+ return dict(col_dict)
911
+
912
+ ### newest, accept combined table
913
+ def get_flattened_tuples_list_all(col_dict):
914
+ exclude_fields = ['door_type', 'values']
915
+ flattened_list = []
916
+
917
+ for values_dict in col_dict.values():
918
+ # All fields that are lists and not in the excluded fields
919
+ list_fields = [k for k, v in values_dict.items()
920
+ if isinstance(v, list) and k not in exclude_fields]
921
+ n_rows = len(values_dict[list_fields[0]]) if list_fields else 0
922
+
923
+ for i in range(n_rows):
924
+ tuple_row = tuple(values_dict[field][i] for field in list_fields) + (values_dict['color'],)
925
+ flattened_list.append(tuple_row)
926
+
927
+ return flattened_list
928
+
929
+
930
  #SECONDARY
931
  def get_cleaned_data_secondary(locations, main_info, secondary_info):
932
  processed = defaultdict(int)
 
954
 
955
 
956
  if len(main_info) == 3 and len(secondary_info) == 1:
957
+ for coords, label, width, color, acous in locations:
958
  if len(coords)>1:
959
  index = processed[label] % len(coords) # Round-robin indexing
960
  new_coord = [coords[index]] # Pick the correct coordinate
 
1014
 
1015
  return new_data
1016
 
1017
+ '''def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
1018
  secondary_printed_clean = []
1019
  if len(secondary_info) == 1:
1020
  if any('Acoustic' in col for col in selected_secondary_info.columns):
 
1030
  new_text = f"fire rating: {fire}; acoustic rating: {acous}"
1031
  secondary_printed_clean.append(new_text)
1032
  print(new_text)
1033
+ return secondary_printed_clean'''
1034
+
1035
+
1036
+ def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
1037
+ secondary_printed_clean = []
1038
+ if len(secondary_info) == 1:
1039
+ if any('acoustic' in col for col in selected_secondary_info.columns):
1040
+ for acous in secondary_tobeprinted:
1041
+ new_text = f"acoustic rating: {acous};"
1042
+ secondary_printed_clean.append(new_text)
1043
+ if any('fire' in col for col in selected_secondary_info.columns):
1044
+ for fire in secondary_tobeprinted:
1045
+ new_text = f"fire rating: {fire};"
1046
+ secondary_printed_clean.append(new_text)
1047
+ if len(secondary_info) == 2:
1048
+ for fire, acous in secondary_tobeprinted:
1049
+ new_text = f"fire rating: {fire}; acoustic rating: {acous};"
1050
+ secondary_printed_clean.append(new_text)
1051
+ print(new_text)
1052
  return secondary_printed_clean
1053
 
1054
+
1055
  def mix_width_secondary(widths, secondary_printed_clean):
1056
  all_print = []
1057
  for i in range(len(widths)):
 
1066
  page = pdf_document[0] # First page
1067
  if len(main_info) == 2 and len(secondary_info) == 1:
1068
  for loc in locations:
1069
+ coor, lbl, acous, clr = loc
1070
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1071
  for cor in coor:
1072
  #Create a Circle annotation (Count Markup)
 
1087
 
1088
  if len(main_info) == 2 and len(secondary_info) == 2:
1089
  for loc in locations:
1090
+ coor, lbl, acous, fire, clr = loc
1091
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1092
  for cor in coor:
1093
  #Create a Circle annotation (Count Markup)
 
1108
 
1109
  if len(main_info) == 3 and len(secondary_info) == 1:
1110
  for loc in locations:
1111
+ if len(loc) != 5:
1112
+ continue
1113
+ coor, lbl, w, acous, clr = loc
1114
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1115
  for cor in coor:
1116
  #Create a Circle annotation (Count Markup)
 
1131
 
1132
  if len(main_info) == 3 and len(secondary_info) == 2:
1133
  for loc in locations:
1134
+ coor, lbl, w, acous, fire, clr = loc
1135
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1136
  for cor in coor:
1137
  #Create a Circle annotation (Count Markup)
 
1152
 
1153
  if len(main_info) == 4 and len(secondary_info) == 1:
1154
  for loc in locations:
1155
+ coor, lbl, w, h, acous, clr = loc
1156
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1157
  for cor in coor:
1158
  #Create a Circle annotation (Count Markup)
 
1173
 
1174
  if len(main_info) == 4 and len(secondary_info) == 2:
1175
  for loc in locations:
1176
+ coor, lbl, w, h, acous, fire, clr = loc
1177
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1178
  for cor in coor:
1179
  #Create a Circle annotation (Count Markup)
 
1200
  pdf_document.close()
1201
 
1202
  return output_stream.getvalue() # Return the modified PDF as bytes
1203
+
1204
  def get_user_input(user_words):
1205
  user_input = []
1206
  for item in user_words:
 
1255
  import io
1256
  from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
1257
 
1258
+ '''def add_bluebeam_count_annotations(pdf_bytes, locations):
1259
  pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1260
  pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
1261
 
 
1327
  pdf_document.close()
1328
 
1329
  return output_stream.getvalue() # Return the modified PDF as bytes
1330
+ '''
1331
+ def add_bluebeam_count_annotations(pdf_bytes, locations):
1332
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1333
+ pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
1334
+
1335
+ page = pdf_document[0] # First page
1336
+ print(f"length of locations 0 from not sec presence: {len(locations[0])}")
1337
+
1338
+ for loc in locations:
1339
+
1340
+ if len(loc) == 3:
1341
+ coor, lbl, clr = loc
1342
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1343
+ for cor in coor:
1344
+ #Create a Circle annotation (Count Markup)
1345
+ annot = page.add_circle_annot(
1346
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1347
+ )
1348
+
1349
+ #Assign required Bluebeam metadata
1350
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1351
+ annot.set_border(width=2) # Border thickness
1352
+ annot.set_opacity(1) # Fully visible
1353
+
1354
+ #Set annotation properties for Bluebeam Count detection
1355
+ annot.set_info("name", lbl) # Unique name for each count
1356
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1357
+ annot.set_info("title", lbl) # Optional
1358
+ annot.update() # Apply changes
1359
+ if len(loc) == 4:
1360
+ coor, lbl, clr,w = loc
1361
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1362
+ for cor in coor:
1363
+ #Create a Circle annotation (Count Markup)
1364
+ annot = page.add_circle_annot(
1365
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1366
+ )
1367
+
1368
+ #Assign required Bluebeam metadata
1369
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1370
+ annot.set_border(width=2) # Border thickness
1371
+ annot.set_opacity(1) # Fully visible
1372
+
1373
+ #Set annotation properties for Bluebeam Count detection
1374
+ annot.set_info("name", lbl) # Unique name for each count
1375
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1376
+ annot.set_info("title", lbl) # Optional
1377
+ annot.update() # Apply changes
1378
+
1379
+ if len(loc) == 5:
1380
+ coor, lbl, clr,w,h = loc
1381
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1382
+ for cor in coor:
1383
+ #Create a Circle annotation (Count Markup)
1384
+ annot = page.add_circle_annot(
1385
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1386
+ )
1387
+
1388
+ #Assign required Bluebeam metadata
1389
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1390
+ annot.set_border(width=2) # Border thickness
1391
+ annot.set_opacity(1) # Fully visible
1392
+
1393
+ #Set annotation properties for Bluebeam Count detection
1394
+ annot.set_info("name", lbl) # Unique name for each count
1395
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1396
+ annot.set_info("title", lbl) # Optional
1397
+ annot.update() # Apply changes
1398
+
1399
+ #Save modified PDF to a variable instead of a file
1400
+ output_stream = io.BytesIO()
1401
+ pdf_document.save(output_stream)
1402
+ pdf_document.close()
1403
+
1404
+ return output_stream.getvalue() # Return the modified PDF as bytes
1405
+
1406
+
1407
  def get_user_input(user_words):
1408
  user_input = []
1409
  for item in user_words:
 
1474
  # #Save the final modified PDF to disk
1475
  # with open(output_pdf_path, "wb") as file:
1476
  # file.write(final_pdf_bytes)
1477
+
1478
+ def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
1479
+
1480
+ if isinstance(input_pdf_path, bytes):
1481
+ original_pdf_bytes = input_pdf_path
1482
+ else:
1483
+ with open(input_pdf_path, "rb") as file:
1484
+ original_pdf_bytes = file.read()
1485
+
1486
+ #Add Bluebeam-compatible count annotations
1487
+ annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(original_pdf_bytes, locations, main_info, secondary_info)
1488
+
1489
+ #Modify author field using PyPDF2
1490
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1491
+
1492
+ return final_pdf_bytes
1493
+
1494
+
1495
+ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1496
+ #Load original PDF
1497
+ if isinstance(input_pdf_path, bytes):
1498
+ original_pdf_bytes = input_pdf_path
1499
+ else:
1500
+ with open(input_pdf_path, "rb") as file:
1501
+ original_pdf_bytes = file.read()
1502
+
1503
+ #Add Bluebeam-compatible count annotations
1504
+ annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
1505
+
1506
+ #Modify author field using PyPDF2
1507
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1508
+ return final_pdf_bytes
1509
  def mainRun(schedule, plan, searcharray):
1510
+
1511
+ #print(type(plan))
1512
+ eltype = type(plan)
1513
+ print(f"el type beta3 variable plan:: {eltype}")
1514
+ len_plan = len(plan)
1515
+ print(f"length of the plan's array is: {len_plan}")
1516
+ p1_type = type(plan[0])
1517
+ print(f"el mawgood fe p[0]: {p1_type}")
1518
+
1519
+ print(f"search array: {searcharray}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1520
 
1521
+ dfs = extract_tables(schedule)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1522
 
1523
+ pdfs = []
1524
+ for p in plan:
1525
+ all_new_data = []
1526
+ all_widths = []
1527
+ pdf_outputs = []
1528
+
1529
+ for j in range(len(searcharray)):
1530
+ user_input = searcharray[j]
1531
+
1532
+ secondary_presence = False
1533
+ if user_input[4] or user_input[5]:
1534
+ secondary_presence = True
1535
+ main_info_, secondary_info_ = separate_main_secondary(user_input)
1536
+ main_info = [item for item in main_info_ if item]
1537
+ secondary_info = [item for item in secondary_info_ if item]
1538
+ print("feh secondary information")
1539
+ if user_input[4]:
1540
+ print("Fire rate mawgooda")
1541
+ if user_input[5]:
1542
+ print("Acoustic Rate mawgooda")
1543
+ else:
1544
+ print("mafeesh secondary information")
1545
+
1546
+ selected_columns_combined = get_selected_columns_all(dfs, user_input)
1547
+ kelma = get_st_op_pattern(selected_columns_combined, user_input)
1548
+ col_dict = get_similar_colors_all(selected_columns_combined)
1549
+ flattened_list = get_flattened_tuples_list_all(col_dict)
1550
  plan_texts = read_text(p)
1551
+
1552
+ if secondary_presence:
1553
+ plan_texts = read_text(p)
1554
+ locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1555
+ new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1556
+
1557
+ #Single page annotation
1558
+ all_new_data.append(new_data3)
1559
+ repeated_labels = get_repeated_labels(locations)
1560
+ if kelma == None:
1561
+ widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1562
+ else:
1563
+ width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1564
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1565
+ widths = get_widths_bb_format(cleaned_width, kelma)
1566
+ #Handling schedules without dimensions (width and height)
1567
+ if selected_columns_combined.shape[1] == 2:
1568
+ widths = []
1569
+
1570
+ secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1571
+ all_print = mix_width_secondary(widths, secondary_printed_clean)
1572
+
1573
+ #Single page annotation
1574
+ all_widths.append(all_print)
1575
+
1576
+ #flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1577
+ #flat_list_widths = [item for sublist in all_widths for item in sublist]
1578
+
1579
+ if pdf_outputs:
1580
+ final_pdf_bytes = process_pdf_secondary(pdf_outputs[j-1], "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1581
+ pdf_outputs.append(final_pdf_bytes)
1582
+ else:
1583
+ final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1584
+ pdf_outputs.append(final_pdf_bytes)
1585
+
1586
  else:
1587
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1588
+ new_data = get_cleaned_data(locations)
1589
+ #Single page annotation
1590
+ all_new_data.append(new_data)
1591
+ repeated_labels = get_repeated_labels(locations)
1592
+ if kelma == None:
1593
+ widths = get_width_info_tobeprinted(new_data)
1594
+ else:
1595
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1596
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1597
+ widths = get_widths_bb_format(cleaned_width, kelma)
1598
+
1599
+ #Handling schedules without dimensions (width and height)
1600
+ if selected_columns_combined.shape[1] == 2:
1601
+ widths = []
1602
+
1603
+ #Single page annotation
1604
+ all_widths.append(widths)
1605
+
1606
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1607
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1608
+
1609
+ if pdf_outputs:
1610
+ final_pdf_bytes = process_pdf(pdf_outputs[j-1], "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1611
+ pdf_outputs.append(final_pdf_bytes)
1612
+ else:
1613
+ final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1614
+ pdf_outputs.append(final_pdf_bytes)
1615
+
1616
 
1617
+ pdfs.append(final_pdf_bytes)
1618
+ merged_pdf = merge_pdf_bytes_list(pdfs)
1619
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1620
+
1621
+ not_found = []
1622
+ doc2 =fitz.open('pdf',merged_pdf)
1623
+ len_doc2 = len(doc2)
1624
+ print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
1625
+ page=doc2[0]
1626
+ pix = page.get_pixmap() # render page to an image
1627
+ pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
1628
+ img=np.array(pl)
1629
+ annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
1630
+
1631
+
1632
+ list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
1633
+
1634
+ # for page in doc:
1635
+ for page in doc2:
1636
+ # Iterate through annotations on the page
1637
+ for annot in page.annots():
1638
+ # Get the color of the annotation
1639
+ annot_color = annot.colors
1640
+ if annot_color is not None:
1641
+ # annot_color is a dictionary with 'stroke' and 'fill' keys
1642
+ stroke_color = annot_color.get('stroke') # Border color
1643
+ fill_color = annot_color.get('fill') # Fill color
1644
+ if fill_color:
1645
+ v='fill'
1646
+ # print('fill')
1647
+ if stroke_color:
1648
+ v='stroke'
1649
+ x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1650
+ list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1651
+ return annotatedimg, doc2 , list1, repeated_labels , not_found