Marthee commited on
Commit
52d7f75
·
verified ·
1 Parent(s): 1a0d20a

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +470 -123
Doors_Schedule.py CHANGED
@@ -92,9 +92,6 @@ def flexible_search(df, search_terms):
92
  return results
93
 
94
 
95
-
96
-
97
-
98
  def generate_current_table_without_cropping(clm_idx, clmn_name, df):
99
  selected_df = df.iloc[:, clm_idx]
100
  print("hello I generated the selected columns table without cropping")
@@ -264,6 +261,127 @@ def get_selected_columns(dfs, user_patterns):
264
 
265
 
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
268
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
269
  def get_st_op_pattern(selected_columns, user_input):
@@ -420,29 +538,22 @@ def get_cleaned_data(locations):
420
  return new_data
421
 
422
 
423
- # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
424
  def get_width_info_tobeprinted(new_data):
425
  width_info_tobeprinted = []
426
  if len(new_data[0]) == 4:
427
  for _,_,_, w in new_data:
428
- #w = re.sub(r",", "", w)
429
- #w = int(float(w))
430
  width_info_tobeprinted.append(w)
431
  if len(new_data[0]) == 5:
432
  for _,_,_, w,h in new_data:
433
  w = re.sub(r",", "", w)
434
  h = re.sub(r",", "", h)
435
- if float(w).is_integer():
436
- w = int(float(w))
437
- else:
438
- w = w
439
- if float(h).is_integer():
440
- h = int(float(h))
441
- else:
442
- h = h
443
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
444
  return width_info_tobeprinted
445
-
446
  def clean_dimensions(text):
447
  # Remove commas and "mm"
448
  text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
@@ -466,9 +577,9 @@ def get_widths_bb_format(cleaned_width, kelma):
466
  width_name = int(float(width_name))
467
  height_name = int(float(height_name))
468
  if match:
469
- full_text = f"{width_name} mm wide x {height_name} mm high"
470
  else:
471
- full_text = f"{height_name} mm wide x {width_name} mm high"
472
  widths.append(full_text)
473
  return widths
474
 
@@ -710,6 +821,60 @@ def get_word_locations_plan_secondary(flattened_list, plan_texts, main_info, sec
710
  locations.append((location, lbl, w, h, clr,acoustic))
711
  return locations, not_found
712
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  #SECONDARY
714
  def get_cleaned_data_secondary(locations, main_info, secondary_info):
715
  processed = defaultdict(int)
@@ -797,7 +962,7 @@ def get_cleaned_data_gpt(locations):
797
 
798
  return new_data
799
 
800
- def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
801
  secondary_printed_clean = []
802
  if len(secondary_info) == 1:
803
  if any('Acoustic' in col for col in selected_secondary_info.columns):
@@ -813,6 +978,24 @@ def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprint
813
  new_text = f"fire rating: {fire}; acoustic rating: {acous}"
814
  secondary_printed_clean.append(new_text)
815
  print(new_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  return secondary_printed_clean
817
 
818
  def mix_width_secondary(widths, secondary_printed_clean):
@@ -913,7 +1096,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
913
 
914
  if len(main_info) == 4 and len(secondary_info) == 1:
915
  for loc in locations:
916
- coor, lbl, w, h, clr, acous = loc
917
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
918
  for cor in coor:
919
  #Create a Circle annotation (Count Markup)
@@ -961,6 +1144,7 @@ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, se
961
  pdf_document.close()
962
 
963
  return output_stream.getvalue() # Return the modified PDF as bytes
 
964
  def get_user_input(user_words):
965
  user_input = []
966
  for item in user_words:
@@ -1087,6 +1271,8 @@ def add_bluebeam_count_annotations(pdf_bytes, locations):
1087
  pdf_document.close()
1088
 
1089
  return output_stream.getvalue() # Return the modified PDF as bytes
 
 
1090
  def get_user_input(user_words):
1091
  user_input = []
1092
  for item in user_words:
@@ -1157,115 +1343,276 @@ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1157
  # #Save the final modified PDF to disk
1158
  # with open(output_pdf_path, "wb") as file:
1159
  # file.write(final_pdf_bytes)
1160
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161
  def mainRun(schedule, plan, searcharray):
1162
- #print(type(plan))
1163
- eltype = type(plan)
1164
- print(f"el type beta3 variable plan:: {eltype}")
1165
- len_plan = len(plan)
1166
- print(f"length of the plan's array is: {len_plan}")
1167
- p1_type = type(plan[0])
1168
- print(f"el mawgood fe p[0]: {p1_type}")
1169
-
1170
- user_input = get_user_input(searcharray)
1171
- secondary_info_presence = False
1172
- if len(user_input) > 4:
1173
- secondary_info_presence = True
1174
- secondary_info = user_input[4:]
1175
- if not user_input[3]:
1176
- main_info = user_input[:3]
1177
- elif len(user_input) > 4:
1178
- main_info = user_input[:4]
1179
-
1180
-
1181
- dfs = extract_tables(schedule)
1182
-
1183
- if secondary_info_presence:
1184
- selected_columns_new = get_selected_columns(dfs, main_info)
1185
- selected_secondary_info = get_secondary_info(dfs, secondary_info)
1186
- selected_secondary_info = selected_secondary_info.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
1187
- selected_columns_combined = pd.concat([selected_columns_new, selected_secondary_info], axis=1)
1188
- kelma = get_st_op_pattern(selected_columns_new, user_input)
1189
- col_dict = get_similar_colors_secondary(selected_columns_combined, user_input)
1190
- flattened_list2 = get_flattened_tuples_list_SECONDARY(col_dict)
1191
 
1192
- pdfs = []
1193
- for p in plan:
1194
- plan_texts = read_text(p)
1195
- locations, not_found = get_word_locations_plan_secondary(flattened_list2,plan_texts, main_info, secondary_info)
1196
- new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1197
- repeated_labels = get_repeated_labels(locations)
1198
- if kelma == None:
1199
- #widths = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1200
- widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1201
  else:
1202
- width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1203
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1204
- widths = get_widths_bb_format(cleaned_width, kelma)
1205
- secondary_printed_clean = get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info)
1206
- all_print = mix_width_secondary(widths, secondary_printed_clean)
1207
- final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", new_data3, all_print, main_info, secondary_info)
1208
- pdfs.append(final_pdf_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
 
1210
-
1211
- else:
1212
- selected_columns_new = get_selected_columns(dfs, user_input)
1213
- kelma = get_st_op_pattern(selected_columns_new, user_input)
1214
- col_dict = get_similar_colors(selected_columns_new)
1215
- flattened_list = get_flattened_tuples_list(col_dict)
1216
-
1217
- pdfs = []
1218
- for p in plan:
1219
- print(f" p in plan is {type(p)}")
1220
- print(p)
1221
- plan_texts = read_text(p)
1222
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1223
- new_data = get_cleaned_data(locations)
1224
- repeated_labels = get_repeated_labels(locations)
1225
- if kelma == None:
1226
- widths = get_width_info_tobeprinted(new_data)
 
 
 
 
 
 
 
 
 
 
1227
  else:
1228
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1229
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1230
- widths = get_widths_bb_format(cleaned_width, kelma)
1231
- final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1232
- pdfs.append(final_pdf_bytes)
1233
-
1234
- if selected_columns_new.shape[1] == 2:
1235
- widths = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1236
 
1237
- merged_pdf = merge_pdf_bytes_list(pdfs)
1238
- print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1239
- not_found = []
1240
- doc2 =fitz.open('pdf',merged_pdf)
1241
- len_doc2 = len(doc2)
1242
- print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
1243
- page=doc2[0]
1244
- pix = page.get_pixmap() # render page to an image
1245
- pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
1246
- img=np.array(pl)
1247
- annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
1248
-
1249
-
1250
- list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
1251
-
1252
- # for page in doc:
1253
- for page in doc2:
1254
- # Iterate through annotations on the page
1255
- for annot in page.annots():
1256
- # Get the color of the annotation
1257
- annot_color = annot.colors
1258
- if annot_color is not None:
1259
- # annot_color is a dictionary with 'stroke' and 'fill' keys
1260
- stroke_color = annot_color.get('stroke') # Border color
1261
- fill_color = annot_color.get('fill') # Fill color
1262
- if fill_color:
1263
- v='fill'
1264
- # print('fill')
1265
- if stroke_color:
1266
- v='stroke'
1267
- x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1268
- list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1269
- return annotatedimg, doc2 , list1, repeated_labels , not_found
 
 
 
 
 
 
 
 
 
1270
 
1271
 
 
92
  return results
93
 
94
 
 
 
 
95
  def generate_current_table_without_cropping(clm_idx, clmn_name, df):
96
  selected_df = df.iloc[:, clm_idx]
97
  print("hello I generated the selected columns table without cropping")
 
261
 
262
 
263
 
264
+ def separate_main_secondary(input_user_clmn_names):
265
+ main_info = input_user_clmn_names[:4]
266
+ secondary_info = input_user_clmn_names[4:]
267
+ return main_info, secondary_info
268
+
269
+
270
+ # take main info
271
+ def get_column_name(user_input_m):
272
+ #get empty indices
273
+ empty_indices = [i for i, v in enumerate(user_input_m) if v == '']
274
+
275
+ # fixed column names
276
+ fixed_list = ["door_id", "door_type", "width", "height"]
277
+ for i in range(len(empty_indices)):
278
+ if empty_indices[i] == 3:
279
+ fixed_list[2] = "structural_opening"
280
+ fixed_list[empty_indices[i]] = ""
281
+
282
+ #finalize the column name structure
283
+ clmn_name_m = [i for i in fixed_list if i]
284
+
285
+ return clmn_name_m
286
+
287
+ # take secondary info
288
+ def get_column_name_secondary(user_input_m):
289
+ #get empty indices
290
+ empty_indices = [i for i, v in enumerate(user_input_m) if v == '']
291
+
292
+ # fixed column names
293
+ fixed_list = ["fire_rate", "acoustic_rate"]
294
+ for i in range(len(empty_indices)):
295
+ fixed_list[empty_indices[i]] = ""
296
+
297
+ #finalize the column name structure
298
+ clmn_name_m = [i for i in fixed_list if i]
299
+
300
+ return clmn_name_m
301
+
302
+
303
+ #handling both main and secondary info together in one table
304
+ def get_selected_columns_all(dfs, user_patterns):
305
+ selected_columns = []
306
+ selected_columns_new = None # Initialize selected_columns_new to None
307
+
308
+ for i in range(len(dfs)):
309
+
310
+
311
+
312
+
313
+
314
+ main_info, secondary_info = separate_main_secondary(user_patterns)
315
+ clmn_name_main = get_column_name(main_info)
316
+ non_empty_main_info = [item for item in main_info if item]
317
+
318
+ clmn_name_secondary = get_column_name_secondary(secondary_info)
319
+
320
+
321
+ non_empty_secondary_info = [item for item in secondary_info if item]
322
+
323
+ clmn_name = clmn_name_main + clmn_name_secondary
324
+ non_empty_info = non_empty_main_info + non_empty_secondary_info
325
+
326
+ #print(f"main info: {main_info}")
327
+ print(f"clmn name: {clmn_name}")
328
+ print(f"non-empty info: {non_empty_info}")
329
+ #print(f"length of non-empty info: {len(non_empty_main_info)}")
330
+
331
+
332
+ cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
333
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
334
+
335
+ print(f"length of cell_matches: {len(cell_matches)}")
336
+ print(f"cell_matches: {cell_matches}")
337
+ #clmn_name = map_user_input_to_standard_labels(user_patterns)
338
+ #if len(clmn_name) < len(user_patterns):
339
+
340
+
341
+
342
+
343
+ print(clmn_name)
344
+
345
+ if len(cell_matches) == 0 and len(col_matches) == 0:
346
+ print(f"this is df {i}, SEARCH IN ANOTHER DF")
347
+
348
+ else:
349
+ #IN COLUMNS
350
+ if len(col_matches) == len(non_empty_info):
351
+ column_index_list = get_column_index(col_matches)
352
+ print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
353
+ #print(len(clm_idx))
354
+ #details in another table
355
+ print(column_index_list)
356
+ if len(dfs[i]) <10:
357
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
358
+ #break
359
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
360
+ #details in the same table
361
+ if len(dfs[i]) >10:
362
+ selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
363
+ #break
364
+
365
+ #IN CELLS
366
+ if len(cell_matches) == len(non_empty_info):
367
+ row_index_list, column_index_list = get_row_column_indices(cell_matches)
368
+ print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
369
+
370
+ #details in another table
371
+ #if len(dfs[i]) <2:
372
+ #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
373
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
374
+ selected_columns_new = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
375
+
376
+ break
377
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
378
+ ##details in the same table
379
+ #if len(dfs[i]) >2:
380
+ # #print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
381
+ #break
382
+ return selected_columns_new
383
+
384
+
385
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
386
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
387
  def get_st_op_pattern(selected_columns, user_input):
 
538
  return new_data
539
 
540
 
 
541
  def get_width_info_tobeprinted(new_data):
542
  width_info_tobeprinted = []
543
  if len(new_data[0]) == 4:
544
  for _,_,_, w in new_data:
545
+ w = re.sub(r",", "", w)
546
+ w = int(float(w))
547
  width_info_tobeprinted.append(w)
548
  if len(new_data[0]) == 5:
549
  for _,_,_, w,h in new_data:
550
  w = re.sub(r",", "", w)
551
  h = re.sub(r",", "", h)
552
+ w = int(float(w))
553
+ h = int(float(h))
 
 
 
 
 
 
554
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
555
  return width_info_tobeprinted
556
+
557
  def clean_dimensions(text):
558
  # Remove commas and "mm"
559
  text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
 
577
  width_name = int(float(width_name))
578
  height_name = int(float(height_name))
579
  if match:
580
+ full_text = f"{width_name}mm wide x {height_name}mm high"
581
  else:
582
+ full_text = f"{height_name}mm wide x {width_name}mm high"
583
  widths.append(full_text)
584
  return widths
585
 
 
821
  locations.append((location, lbl, w, h, clr,acoustic))
822
  return locations, not_found
823
 
824
+ ### newest, accept combined table
825
+ from collections import defaultdict
826
+ import random
827
+
828
+ def get_similar_colors_all(selected_columns_new):
829
+ def generate_rgb():
830
+ return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
831
+
832
+ unique_keys = selected_columns_new['door_type'].unique()
833
+ key_colors = {key: generate_rgb() for key in unique_keys}
834
+
835
+ #Column fields
836
+ clmns_fields = selected_columns_new.columns.to_list()
837
+
838
+ def col_template():
839
+ d = {
840
+ 'values': [],
841
+ 'color': None
842
+ }
843
+ for field in clmns_fields:
844
+ d[field] = []
845
+ return d
846
+
847
+ col_dict = defaultdict(col_template)
848
+
849
+ for _, row in selected_columns_new.iterrows():
850
+ key = row['door_type']
851
+ col_dict[key]['values'].append(row['door_id'])
852
+
853
+ for field in clmns_fields:
854
+ col_dict[key][field].append(row.get(field, None))
855
+
856
+ col_dict[key]['color'] = key_colors[key]
857
+
858
+ return dict(col_dict)
859
+
860
+ ### newest, accept combined table
861
+ def get_flattened_tuples_list_all(col_dict):
862
+ exclude_fields = ['door_type', 'values']
863
+ flattened_list = []
864
+
865
+ for values_dict in col_dict.values():
866
+ # All fields that are lists and not in the excluded fields
867
+ list_fields = [k for k, v in values_dict.items()
868
+ if isinstance(v, list) and k not in exclude_fields]
869
+ n_rows = len(values_dict[list_fields[0]]) if list_fields else 0
870
+
871
+ for i in range(n_rows):
872
+ tuple_row = tuple(values_dict[field][i] for field in list_fields) + (values_dict['color'],)
873
+ flattened_list.append(tuple_row)
874
+
875
+ return flattened_list
876
+
877
+
878
  #SECONDARY
879
  def get_cleaned_data_secondary(locations, main_info, secondary_info):
880
  processed = defaultdict(int)
 
962
 
963
  return new_data
964
 
965
+ '''def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
966
  secondary_printed_clean = []
967
  if len(secondary_info) == 1:
968
  if any('Acoustic' in col for col in selected_secondary_info.columns):
 
978
  new_text = f"fire rating: {fire}; acoustic rating: {acous}"
979
  secondary_printed_clean.append(new_text)
980
  print(new_text)
981
+ return secondary_printed_clean'''
982
+
983
+ def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
984
+ secondary_printed_clean = []
985
+ if len(secondary_info) == 1:
986
+ if any('acoustic' in col for col in selected_secondary_info.columns):
987
+ for acous in secondary_tobeprinted:
988
+ new_text = f"acoustic rating: {acous};"
989
+ secondary_printed_clean.append(new_text)
990
+ if any('fire' in col for col in selected_secondary_info.columns):
991
+ for fire in secondary_tobeprinted:
992
+ new_text = f"fire rating: {fire};"
993
+ secondary_printed_clean.append(new_text)
994
+ if len(secondary_info) == 2:
995
+ for fire, acous in secondary_tobeprinted:
996
+ new_text = f"fire rating: {fire}; acoustic rating: {acous};"
997
+ secondary_printed_clean.append(new_text)
998
+ print(new_text)
999
  return secondary_printed_clean
1000
 
1001
  def mix_width_secondary(widths, secondary_printed_clean):
 
1096
 
1097
  if len(main_info) == 4 and len(secondary_info) == 1:
1098
  for loc in locations:
1099
+ coor, lbl, w, h, acous, clr = loc
1100
  clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1101
  for cor in coor:
1102
  #Create a Circle annotation (Count Markup)
 
1144
  pdf_document.close()
1145
 
1146
  return output_stream.getvalue() # Return the modified PDF as bytes
1147
+
1148
  def get_user_input(user_words):
1149
  user_input = []
1150
  for item in user_words:
 
1271
  pdf_document.close()
1272
 
1273
  return output_stream.getvalue() # Return the modified PDF as bytes
1274
+
1275
+
1276
  def get_user_input(user_words):
1277
  user_input = []
1278
  for item in user_words:
 
1343
  # #Save the final modified PDF to disk
1344
  # with open(output_pdf_path, "wb") as file:
1345
  # file.write(final_pdf_bytes)
1346
+
1347
+ def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
1348
+
1349
+ if isinstance(input_pdf_path, bytes):
1350
+ original_pdf_bytes = input_pdf_path
1351
+ else:
1352
+ with open(input_pdf_path, "rb") as file:
1353
+ original_pdf_bytes = file.read()
1354
+
1355
+ #Add Bluebeam-compatible count annotations
1356
+ annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(original_pdf_bytes, locations, main_info, secondary_info)
1357
+
1358
+ #Modify author field using PyPDF2
1359
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1360
+
1361
+ return final_pdf_bytes
1362
+
1363
+
1364
+ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1365
+ #Load original PDF
1366
+ if isinstance(input_pdf_path, bytes):
1367
+ original_pdf_bytes = input_pdf_path
1368
+ else:
1369
+ with open(input_pdf_path, "rb") as file:
1370
+ original_pdf_bytes = file.read()
1371
+
1372
+ #Add Bluebeam-compatible count annotations
1373
+ annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
1374
+
1375
+ #Modify author field using PyPDF2
1376
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1377
+ return final_pdf_bytes
1378
  def mainRun(schedule, plan, searcharray):
1379
+
1380
+ #print(type(plan))
1381
+ eltype = type(plan)
1382
+ print(f"el type beta3 variable plan:: {eltype}")
1383
+ len_plan = len(plan)
1384
+ print(f"length of the plan's array is: {len_plan}")
1385
+ p1_type = type(plan[0])
1386
+ print(f"el mawgood fe p[0]: {p1_type}")
1387
+
1388
+ print(f"search array: {searcharray}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
 
1390
+ dfs = extract_tables(schedule)
1391
+
1392
+ all_new_data = []
1393
+ all_widths = []
1394
+ pdf_outputs = []
1395
+
1396
+ if len(searcharray)>1:
1397
+ for j in range(len(searcharray)):
1398
+ pdfs = []
1399
+ user_input = searcharray[j]
1400
+ secondary_presence = False
1401
+
1402
+ if user_input[4] or user_input[5]:
1403
+ secondary_presence = True
1404
+
1405
+ main_info_, secondary_info_ = separate_main_secondary(user_input)
1406
+
1407
+ main_info = [item for item in main_info_ if item]
1408
+ secondary_info = [item for item in secondary_info_ if item]
1409
+
1410
+ print("feh secondary information")
1411
+ if user_input[4]:
1412
+ print("Fire rate mawgooda")
1413
+ if user_input[5]:
1414
+ print("Acoustic Rate mawgooda")
1415
+
1416
  else:
1417
+ print("mafeesh secondary information")
1418
+
1419
+ selected_columns_combined = get_selected_columns_all(dfs, user_input)
1420
+ kelma = get_st_op_pattern(selected_columns_combined, user_input)
1421
+ col_dict = get_similar_colors_all(selected_columns_combined)
1422
+ flattened_list = get_flattened_tuples_list_all(col_dict)
1423
+
1424
+ if secondary_presence:
1425
+ for p in plan:
1426
+ plan_texts = read_text(p)
1427
+ locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1428
+ new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1429
+
1430
+ #Single page annotation
1431
+ all_new_data.append(new_data3)
1432
+ repeated_labels = get_repeated_labels(locations)
1433
+ if kelma == None:
1434
+ widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1435
+ else:
1436
+ width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1437
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1438
+ widths = get_widths_bb_format(cleaned_width, kelma)
1439
+ secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1440
+ all_print = mix_width_secondary(widths, secondary_printed_clean)
1441
+
1442
+ #Single page annotation
1443
+ all_widths.append(all_print)
1444
+
1445
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1446
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1447
+
1448
+ if pdf_outputs:
1449
+ final_pdf_bytes = process_pdf_secondary(pdf_outputs[j-1], "final_output_multiple_input_new2.pdf", flat_list_new_data, flat_list_widths, main_info, secondary_info)
1450
+ pdf_outputs.append(final_pdf_bytes)
1451
+ else:
1452
+ final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", flat_list_new_data, flat_list_widths, main_info, secondary_info)
1453
+ pdf_outputs.append(final_pdf_bytes)
1454
+
1455
+ pdfs.append(final_pdf_bytes)
1456
+
1457
+
1458
+ else:
1459
+ for p in plan:
1460
+ print(f" p in plan is {type(p)}")
1461
+ print(p)
1462
+ plan_texts = read_text(p)
1463
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1464
+ new_data = get_cleaned_data(locations)
1465
+ #Single page annotation
1466
+ all_new_data.append(new_data)
1467
+ repeated_labels = get_repeated_labels(locations)
1468
+ if kelma == None:
1469
+ widths = get_width_info_tobeprinted(new_data)
1470
+ else:
1471
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1472
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1473
+ widths = get_widths_bb_format(cleaned_width, kelma)
1474
+ #Single page annotation
1475
+ all_widths.append(widths)
1476
+
1477
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1478
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1479
+
1480
+ if pdf_outputs:
1481
+ final_pdf_bytes = process_pdf(pdf_outputs[j-1], "final_output_width_trial.pdf", flat_list_new_data, flat_list_widths)
1482
+ #final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1483
+ pdfs.append(final_pdf_bytes)
1484
+ else:
1485
+ final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", flat_list_new_data, flat_list_widths)
1486
+ #final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1487
+ pdfs.append(final_pdf_bytes)
1488
 
1489
+ #Handling schedules without dimensions (width and height)
1490
+ if selected_columns_combined.shape[1] == 2:
1491
+ widths = []
1492
+
1493
+ merged_pdf = merge_pdf_bytes_list(pdfs)
1494
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1495
+
1496
+ else:
1497
+ for j in range(len(searcharray)):
1498
+ pdfs = []
1499
+ user_input = searcharray[j]
1500
+ secondary_presence = False
1501
+
1502
+ if user_input[4] or user_input[5]:
1503
+ secondary_presence = True
1504
+
1505
+ main_info_, secondary_info_ = separate_main_secondary(user_input)
1506
+
1507
+ main_info = [item for item in main_info_ if item]
1508
+ secondary_info = [item for item in secondary_info_ if item]
1509
+
1510
+ print("feh secondary information")
1511
+ if user_input[4]:
1512
+ print("Fire rate mawgooda")
1513
+ if user_input[5]:
1514
+ print("Acoustic Rate mawgooda")
1515
+
1516
  else:
1517
+ print("mafeesh secondary information")
1518
+
1519
+ selected_columns_combined = get_selected_columns_all(dfs, user_input)
1520
+ kelma = get_st_op_pattern(selected_columns_combined, user_input)
1521
+ col_dict = get_similar_colors_all(selected_columns_combined)
1522
+ flattened_list = get_flattened_tuples_list_all(col_dict)
1523
+
1524
+ if secondary_presence:
1525
+ for p in plan:
1526
+ plan_texts = read_text(p)
1527
+ locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1528
+ new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1529
+
1530
+ #Single page annotation
1531
+ all_new_data.append(new_data3)
1532
+ repeated_labels = get_repeated_labels(locations)
1533
+ if kelma == None:
1534
+ widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1535
+ else:
1536
+ width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1537
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1538
+ widths = get_widths_bb_format(cleaned_width, kelma)
1539
+ secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1540
+ all_print = mix_width_secondary(widths, secondary_printed_clean)
1541
+
1542
+ #Single page annotation
1543
+ all_widths.append(all_print)
1544
+
1545
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1546
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1547
+
1548
+
1549
+ final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", flat_list_new_data, flat_list_widths, main_info, secondary_info)
1550
+ pdfs.append(final_pdf_bytes)
1551
+
1552
+
1553
+ else:
1554
+ for p in plan:
1555
+ print(f" p in plan is {type(p)}")
1556
+ print(p)
1557
+ plan_texts = read_text(p)
1558
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1559
+ new_data = get_cleaned_data(locations)
1560
+ #Single page annotation
1561
+ all_new_data.append(new_data)
1562
+ repeated_labels = get_repeated_labels(locations)
1563
+ if kelma == None:
1564
+ widths = get_width_info_tobeprinted(new_data)
1565
+ else:
1566
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1567
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1568
+ widths = get_widths_bb_format(cleaned_width, kelma)
1569
+ #Single page annotation
1570
+ all_widths.append(widths)
1571
+
1572
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1573
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1574
 
1575
+ final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", flat_list_new_data, flat_list_widths)
1576
+ #final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1577
+ pdfs.append(final_pdf_bytes)
1578
+
1579
+ #Handling schedules without dimensions (width and height)
1580
+ if selected_columns_combined.shape[1] == 2:
1581
+ widths = []
1582
+
1583
+ merged_pdf = merge_pdf_bytes_list(pdfs)
1584
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1585
+
1586
+ not_found = []
1587
+ doc2 =fitz.open('pdf',merged_pdf)
1588
+ len_doc2 = len(doc2)
1589
+ print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
1590
+ page=doc2[0]
1591
+ pix = page.get_pixmap() # render page to an image
1592
+ pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
1593
+ img=np.array(pl)
1594
+ annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
1595
+
1596
+
1597
+ list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
1598
+
1599
+ # for page in doc:
1600
+ for page in doc2:
1601
+ # Iterate through annotations on the page
1602
+ for annot in page.annots():
1603
+ # Get the color of the annotation
1604
+ annot_color = annot.colors
1605
+ if annot_color is not None:
1606
+ # annot_color is a dictionary with 'stroke' and 'fill' keys
1607
+ stroke_color = annot_color.get('stroke') # Border color
1608
+ fill_color = annot_color.get('fill') # Fill color
1609
+ if fill_color:
1610
+ v='fill'
1611
+ # print('fill')
1612
+ if stroke_color:
1613
+ v='stroke'
1614
+ x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1615
+ list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1616
+ return annotatedimg, doc2 , list1, repeated_labels , not_found
1617
 
1618