Marthee commited on
Commit
205cfc0
·
verified ·
1 Parent(s): 30ed4a5

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +70 -82
Doors_Schedule.py CHANGED
@@ -221,14 +221,13 @@ def get_selected_columns(dfs, user_patterns):
221
  cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
222
 
223
 
224
- #clmn_name = map_user_input_to_standard_labels(user_patterns)
225
- #if len(clmn_name) < len(user_patterns):
226
  if len(user_patterns) == 2:
227
  clmn_name = ["door_id", "door_type"]
228
  if len(user_patterns) == 4:
229
  clmn_name = ["door_id", "door_type", "width", "height"]
230
  if len(user_patterns) == 3:
231
- clmn_name = ["door_id", "door_type", "structural_opening"]
232
  if len(cell_matches) == 0 and len(col_matches) == 0:
233
  print(f"this is df {i}, SEARCH IN ANOTHER DF")
234
  else:
@@ -236,13 +235,11 @@ def get_selected_columns(dfs, user_patterns):
236
  if len(col_matches) == len(user_patterns):
237
  column_index_list = get_column_index(col_matches)
238
  print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
239
- #print(len(clm_idx))
240
- #details in another table
241
  print(column_index_list)
242
  if len(dfs[i]) <10:
243
  selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
244
- #break
245
- #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
246
  #details in the same table
247
  if len(dfs[i]) >10:
248
  selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
@@ -258,7 +255,6 @@ def get_selected_columns(dfs, user_patterns):
258
  #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
259
  selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
260
  break
261
- #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
262
  #details in the same table
263
  if len(dfs[i]) >10:
264
  print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
@@ -266,62 +262,19 @@ def get_selected_columns(dfs, user_patterns):
266
  break
267
  return selected_columns_new
268
 
269
- '''def get_st_op_pattern(clm_idx, clmn_name, starting_row_index,df):
270
- target = 'structural opening'
271
- clm_dict = dict(clm_idx) # Convert list of tuples to dictionary
272
- structural_opening_value = clm_dict.get(target) # Returns None if not found
273
-
274
- if target in clmn_name:
275
- position = clmn_name.index(target)
276
- kelma = df.iloc[starting_row_index[position], structural_opening_value]
277
- else:
278
- kelma = None
279
- return kelma'''
280
-
281
- '''def get_st_op_pattern(clmn_name):
282
- target = 'structural opening'
283
- for name in clmn_name:
284
- if target in name.lower():
285
- return name
286
- return None'''
287
 
288
 
289
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
290
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
291
  def get_st_op_pattern(selected_columns, user_input):
292
- target = 'structural_opening'
293
  if target in selected_columns.columns:
294
  name = user_input[2]
295
  return name
296
  return None
297
 
298
 
299
- '''def get_similar_colors(selected_columns_new):
300
- def generate_rgb():
301
- return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # RGB tuple
302
 
303
- unique_keys = selected_columns_new['door_type'].unique()
304
- key_colors = {key: generate_rgb() for key in unique_keys} # Assign a unique RGB color to each key
305
-
306
- # Create dictionary storing values, colors, and widths
307
- if 'structural opening' in selected_columns_new.columns:
308
- col_dict = defaultdict(lambda: {'values': [], 'color': None, 'widths': []})
309
- else:
310
- col_dict = defaultdict(lambda: {'values': [], 'color': None, 'widths': [], 'heights': []})
311
-
312
- for _, row in selected_columns_new.iterrows():
313
- key = row['door_type']
314
- col_dict[key]['values'].append(row['door_id'])
315
- if 'structural opening' in selected_columns_new.columns:
316
- col_dict[key]['widths'].append(row['structural opening']) # Add structural opening
317
- else:
318
- col_dict[key]['widths'].append(row['width']) # Assuming 'widht' is a typo for 'width'
319
- col_dict[key]['heights'].append(row['height'])
320
- col_dict[key]['color'] = key_colors[key] # Assign the unique RGB color
321
-
322
- # Convert defaultdict to a normal dictionary
323
- col_dict = dict(col_dict)
324
- return col_dict'''
325
 
326
  def get_similar_colors(selected_columns_new):
327
  def generate_rgb():
@@ -466,15 +419,6 @@ def get_cleaned_data(locations):
466
 
467
  return new_data
468
 
469
- '''def get_width_info_tobeprinted(new_data):
470
- width_info_tobeprinted = []
471
- if len(new_data[0]) == 4:
472
- for _,_,_, w in new_data:
473
- width_info_tobeprinted.append(w)
474
- if len(new_data[0]) == 5:
475
- for _,_,_, w,h in new_data:
476
- width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
477
- return width_info_tobeprinted'''
478
 
479
  def get_width_info_tobeprinted(new_data):
480
  width_info_tobeprinted = []
@@ -637,6 +581,23 @@ def modify_author_in_pypdf2(pdf_bytes, new_authors):
637
 
638
  # return output_stream.getvalue() # Return modified PDF as bytes
639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
641
  #Load original PDF
642
  # with open(input_pdf_path, "rb") as file:
@@ -653,37 +614,64 @@ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
653
  # file.write(final_pdf_bytes)
654
 
655
  def mainRun(schedule, plan, searcharray):
 
 
 
 
 
 
 
 
656
  user_input = get_user_input(searcharray)
657
  dfs = extract_tables(schedule)
658
- #selected_columns = get_selected_columns(dfs)
659
  selected_columns_new = get_selected_columns(dfs, user_input)
660
- #selected_columns_new = selected_columns[0][0]
661
- #df = selected_columns[0][1]
662
- #clm_idx = selected_columns[0][2]
663
- #clmn_name = selected_columns[0][3]
664
- #starting_row_index = selected_columns[0][4]
665
- #kelma = get_st_op_pattern(user_input)
666
  kelma = get_st_op_pattern(selected_columns_new, user_input)
667
  col_dict = get_similar_colors(selected_columns_new)
668
  flattened_list = get_flattened_tuples_list(col_dict)
669
- plan_texts = read_text(plan)
670
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
671
- new_data = get_cleaned_data(locations)
672
- repeated_labels = get_repeated_labels(locations)
673
- if kelma == None:
674
- widths = get_width_info_tobeprinted(new_data)
675
- else:
676
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
677
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
678
- widths = get_widths_bb_format(cleaned_width, kelma)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
  if selected_columns_new.shape[1] == 2:
680
  widths = []
681
  #for j in range(len(locations)):
682
  # widths.append("Dimensions not found in schedule")
683
- final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
684
-
685
-
686
- doc2 =fitz.open('pdf',final_pdf_bytes)
 
 
 
687
  page=doc2[0]
688
  pix = page.get_pixmap() # render page to an image
689
  pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
 
221
  cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
222
 
223
 
224
+
 
225
  if len(user_patterns) == 2:
226
  clmn_name = ["door_id", "door_type"]
227
  if len(user_patterns) == 4:
228
  clmn_name = ["door_id", "door_type", "width", "height"]
229
  if len(user_patterns) == 3:
230
+ clmn_name = ["door_id", "door_type", "structural opening"]
231
  if len(cell_matches) == 0 and len(col_matches) == 0:
232
  print(f"this is df {i}, SEARCH IN ANOTHER DF")
233
  else:
 
235
  if len(col_matches) == len(user_patterns):
236
  column_index_list = get_column_index(col_matches)
237
  print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
238
+
 
239
  print(column_index_list)
240
  if len(dfs[i]) <10:
241
  selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
242
+
 
243
  #details in the same table
244
  if len(dfs[i]) >10:
245
  selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
 
255
  #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
256
  selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
257
  break
 
258
  #details in the same table
259
  if len(dfs[i]) >10:
260
  print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
 
262
  break
263
  return selected_columns_new
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
 
267
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
268
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
269
  def get_st_op_pattern(selected_columns, user_input):
270
+ target = 'structural opening'
271
  if target in selected_columns.columns:
272
  name = user_input[2]
273
  return name
274
  return None
275
 
276
 
 
 
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
  def get_similar_colors(selected_columns_new):
280
  def generate_rgb():
 
419
 
420
  return new_data
421
 
 
 
 
 
 
 
 
 
 
422
 
423
  def get_width_info_tobeprinted(new_data):
424
  width_info_tobeprinted = []
 
581
 
582
  # return output_stream.getvalue() # Return modified PDF as bytes
583
 
584
+ from PyPDF2 import PdfReader, PdfWriter
585
+
586
+ def merge_pdf_bytes_list(pdfs):
587
+ writer = PdfWriter()
588
+
589
+ for pdf_bytes in pdfs:
590
+ pdf_stream = io.BytesIO(pdf_bytes)
591
+ reader = PdfReader(pdf_stream)
592
+ for page in reader.pages:
593
+ writer.add_page(page)
594
+
595
+ output_stream = io.BytesIO()
596
+ writer.write(output_stream)
597
+ output_stream.seek(0)
598
+
599
+ return output_stream.read()
600
+
601
  def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
602
  #Load original PDF
603
  # with open(input_pdf_path, "rb") as file:
 
614
  # file.write(final_pdf_bytes)
615
 
616
  def mainRun(schedule, plan, searcharray):
617
+ #print(type(plan))
618
+ eltype = type(plan)
619
+ print(f"el type beta3 variable plan:: {eltype}")
620
+ len_plan = len(plan)
621
+ print(f"length of the plan's array is: {len_plan}")
622
+ p1_type = type(plan[0])
623
+ print(f"el mawgood fe p[0]: {p1_type}")
624
+
625
  user_input = get_user_input(searcharray)
626
  dfs = extract_tables(schedule)
627
+
628
  selected_columns_new = get_selected_columns(dfs, user_input)
629
+
 
 
 
 
 
630
  kelma = get_st_op_pattern(selected_columns_new, user_input)
631
  col_dict = get_similar_colors(selected_columns_new)
632
  flattened_list = get_flattened_tuples_list(col_dict)
633
+
634
+ pdfs = []
635
+ for p in plan:
636
+ print(f" p in plan is {type(p)}")
637
+ print(p)
638
+ plan_texts = read_text(p)
639
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
640
+ new_data = get_cleaned_data(locations)
641
+ repeated_labels = get_repeated_labels(locations)
642
+ if kelma == None:
643
+ widths = get_width_info_tobeprinted(new_data)
644
+ else:
645
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
646
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
647
+ widths = get_widths_bb_format(cleaned_width, kelma)
648
+ final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
649
+ pdfs.append(final_pdf_bytes)
650
+
651
+
652
+
653
+
654
+ #plan_texts = read_text(plan)
655
+ #locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
656
+ #new_data = get_cleaned_data(locations)
657
+ #repeated_labels = get_repeated_labels(locations)
658
+ #if kelma == None:
659
+ # widths = get_width_info_tobeprinted(new_data)
660
+ #else:
661
+ # width_info_tobeprinted = get_width_info_tobeprinted(new_data)
662
+ # cleaned_width = get_cleaned_width(width_info_tobeprinted)
663
+ # widths = get_widths_bb_format(cleaned_width, kelma)
664
  if selected_columns_new.shape[1] == 2:
665
  widths = []
666
  #for j in range(len(locations)):
667
  # widths.append("Dimensions not found in schedule")
668
+ #final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
669
+ merged_pdf = merge_pdf_bytes_list(pdfs)
670
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
671
+ not_found = []
672
+ doc2 =fitz.open('pdf',merged_pdf)
673
+ len_doc2 = len(doc2)
674
+ print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
675
  page=doc2[0]
676
  pix = page.get_pixmap() # render page to an image
677
  pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)