Spaces:
Sleeping
Sleeping
Update Doors_Schedule.py
Browse files- Doors_Schedule.py +70 -82
Doors_Schedule.py
CHANGED
|
@@ -221,14 +221,13 @@ def get_selected_columns(dfs, user_patterns):
|
|
| 221 |
cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
|
| 222 |
|
| 223 |
|
| 224 |
-
|
| 225 |
-
#if len(clmn_name) < len(user_patterns):
|
| 226 |
if len(user_patterns) == 2:
|
| 227 |
clmn_name = ["door_id", "door_type"]
|
| 228 |
if len(user_patterns) == 4:
|
| 229 |
clmn_name = ["door_id", "door_type", "width", "height"]
|
| 230 |
if len(user_patterns) == 3:
|
| 231 |
-
clmn_name = ["door_id", "door_type", "
|
| 232 |
if len(cell_matches) == 0 and len(col_matches) == 0:
|
| 233 |
print(f"this is df {i}, SEARCH IN ANOTHER DF")
|
| 234 |
else:
|
|
@@ -236,13 +235,11 @@ def get_selected_columns(dfs, user_patterns):
|
|
| 236 |
if len(col_matches) == len(user_patterns):
|
| 237 |
column_index_list = get_column_index(col_matches)
|
| 238 |
print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
|
| 239 |
-
|
| 240 |
-
#details in another table
|
| 241 |
print(column_index_list)
|
| 242 |
if len(dfs[i]) <10:
|
| 243 |
selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
|
| 244 |
-
|
| 245 |
-
#other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
|
| 246 |
#details in the same table
|
| 247 |
if len(dfs[i]) >10:
|
| 248 |
selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
|
|
@@ -258,7 +255,6 @@ def get_selected_columns(dfs, user_patterns):
|
|
| 258 |
#selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
|
| 259 |
selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
|
| 260 |
break
|
| 261 |
-
#other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
|
| 262 |
#details in the same table
|
| 263 |
if len(dfs[i]) >10:
|
| 264 |
print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
|
|
@@ -266,62 +262,19 @@ def get_selected_columns(dfs, user_patterns):
|
|
| 266 |
break
|
| 267 |
return selected_columns_new
|
| 268 |
|
| 269 |
-
'''def get_st_op_pattern(clm_idx, clmn_name, starting_row_index,df):
|
| 270 |
-
target = 'structural opening'
|
| 271 |
-
clm_dict = dict(clm_idx) # Convert list of tuples to dictionary
|
| 272 |
-
structural_opening_value = clm_dict.get(target) # Returns None if not found
|
| 273 |
-
|
| 274 |
-
if target in clmn_name:
|
| 275 |
-
position = clmn_name.index(target)
|
| 276 |
-
kelma = df.iloc[starting_row_index[position], structural_opening_value]
|
| 277 |
-
else:
|
| 278 |
-
kelma = None
|
| 279 |
-
return kelma'''
|
| 280 |
-
|
| 281 |
-
'''def get_st_op_pattern(clmn_name):
|
| 282 |
-
target = 'structural opening'
|
| 283 |
-
for name in clmn_name:
|
| 284 |
-
if target in name.lower():
|
| 285 |
-
return name
|
| 286 |
-
return None'''
|
| 287 |
|
| 288 |
|
| 289 |
# 3ayz akhaleehaa te search fel selected_columns column names nafsaha
|
| 290 |
# 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
|
| 291 |
def get_st_op_pattern(selected_columns, user_input):
|
| 292 |
-
target = '
|
| 293 |
if target in selected_columns.columns:
|
| 294 |
name = user_input[2]
|
| 295 |
return name
|
| 296 |
return None
|
| 297 |
|
| 298 |
|
| 299 |
-
'''def get_similar_colors(selected_columns_new):
|
| 300 |
-
def generate_rgb():
|
| 301 |
-
return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # RGB tuple
|
| 302 |
|
| 303 |
-
unique_keys = selected_columns_new['door_type'].unique()
|
| 304 |
-
key_colors = {key: generate_rgb() for key in unique_keys} # Assign a unique RGB color to each key
|
| 305 |
-
|
| 306 |
-
# Create dictionary storing values, colors, and widths
|
| 307 |
-
if 'structural opening' in selected_columns_new.columns:
|
| 308 |
-
col_dict = defaultdict(lambda: {'values': [], 'color': None, 'widths': []})
|
| 309 |
-
else:
|
| 310 |
-
col_dict = defaultdict(lambda: {'values': [], 'color': None, 'widths': [], 'heights': []})
|
| 311 |
-
|
| 312 |
-
for _, row in selected_columns_new.iterrows():
|
| 313 |
-
key = row['door_type']
|
| 314 |
-
col_dict[key]['values'].append(row['door_id'])
|
| 315 |
-
if 'structural opening' in selected_columns_new.columns:
|
| 316 |
-
col_dict[key]['widths'].append(row['structural opening']) # Add structural opening
|
| 317 |
-
else:
|
| 318 |
-
col_dict[key]['widths'].append(row['width']) # Assuming 'widht' is a typo for 'width'
|
| 319 |
-
col_dict[key]['heights'].append(row['height'])
|
| 320 |
-
col_dict[key]['color'] = key_colors[key] # Assign the unique RGB color
|
| 321 |
-
|
| 322 |
-
# Convert defaultdict to a normal dictionary
|
| 323 |
-
col_dict = dict(col_dict)
|
| 324 |
-
return col_dict'''
|
| 325 |
|
| 326 |
def get_similar_colors(selected_columns_new):
|
| 327 |
def generate_rgb():
|
|
@@ -466,15 +419,6 @@ def get_cleaned_data(locations):
|
|
| 466 |
|
| 467 |
return new_data
|
| 468 |
|
| 469 |
-
'''def get_width_info_tobeprinted(new_data):
|
| 470 |
-
width_info_tobeprinted = []
|
| 471 |
-
if len(new_data[0]) == 4:
|
| 472 |
-
for _,_,_, w in new_data:
|
| 473 |
-
width_info_tobeprinted.append(w)
|
| 474 |
-
if len(new_data[0]) == 5:
|
| 475 |
-
for _,_,_, w,h in new_data:
|
| 476 |
-
width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
|
| 477 |
-
return width_info_tobeprinted'''
|
| 478 |
|
| 479 |
def get_width_info_tobeprinted(new_data):
|
| 480 |
width_info_tobeprinted = []
|
|
@@ -637,6 +581,23 @@ def modify_author_in_pypdf2(pdf_bytes, new_authors):
|
|
| 637 |
|
| 638 |
# return output_stream.getvalue() # Return modified PDF as bytes
|
| 639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
|
| 641 |
#Load original PDF
|
| 642 |
# with open(input_pdf_path, "rb") as file:
|
|
@@ -653,37 +614,64 @@ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
|
|
| 653 |
# file.write(final_pdf_bytes)
|
| 654 |
|
| 655 |
def mainRun(schedule, plan, searcharray):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
user_input = get_user_input(searcharray)
|
| 657 |
dfs = extract_tables(schedule)
|
| 658 |
-
|
| 659 |
selected_columns_new = get_selected_columns(dfs, user_input)
|
| 660 |
-
|
| 661 |
-
#df = selected_columns[0][1]
|
| 662 |
-
#clm_idx = selected_columns[0][2]
|
| 663 |
-
#clmn_name = selected_columns[0][3]
|
| 664 |
-
#starting_row_index = selected_columns[0][4]
|
| 665 |
-
#kelma = get_st_op_pattern(user_input)
|
| 666 |
kelma = get_st_op_pattern(selected_columns_new, user_input)
|
| 667 |
col_dict = get_similar_colors(selected_columns_new)
|
| 668 |
flattened_list = get_flattened_tuples_list(col_dict)
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
if selected_columns_new.shape[1] == 2:
|
| 680 |
widths = []
|
| 681 |
#for j in range(len(locations)):
|
| 682 |
# widths.append("Dimensions not found in schedule")
|
| 683 |
-
final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
|
|
|
|
|
|
|
|
|
| 687 |
page=doc2[0]
|
| 688 |
pix = page.get_pixmap() # render page to an image
|
| 689 |
pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
|
|
|
|
| 221 |
cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
|
| 222 |
|
| 223 |
|
| 224 |
+
|
|
|
|
| 225 |
if len(user_patterns) == 2:
|
| 226 |
clmn_name = ["door_id", "door_type"]
|
| 227 |
if len(user_patterns) == 4:
|
| 228 |
clmn_name = ["door_id", "door_type", "width", "height"]
|
| 229 |
if len(user_patterns) == 3:
|
| 230 |
+
clmn_name = ["door_id", "door_type", "structural opening"]
|
| 231 |
if len(cell_matches) == 0 and len(col_matches) == 0:
|
| 232 |
print(f"this is df {i}, SEARCH IN ANOTHER DF")
|
| 233 |
else:
|
|
|
|
| 235 |
if len(col_matches) == len(user_patterns):
|
| 236 |
column_index_list = get_column_index(col_matches)
|
| 237 |
print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
|
| 238 |
+
|
|
|
|
| 239 |
print(column_index_list)
|
| 240 |
if len(dfs[i]) <10:
|
| 241 |
selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
|
| 242 |
+
|
|
|
|
| 243 |
#details in the same table
|
| 244 |
if len(dfs[i]) >10:
|
| 245 |
selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
|
|
|
|
| 255 |
#selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
|
| 256 |
selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
|
| 257 |
break
|
|
|
|
| 258 |
#details in the same table
|
| 259 |
if len(dfs[i]) >10:
|
| 260 |
print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
|
|
|
|
| 262 |
break
|
| 263 |
return selected_columns_new
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
|
| 267 |
# 3ayz akhaleehaa te search fel selected_columns column names nafsaha
|
| 268 |
# 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
|
| 269 |
def get_st_op_pattern(selected_columns, user_input):
|
| 270 |
+
target = 'structural opening'
|
| 271 |
if target in selected_columns.columns:
|
| 272 |
name = user_input[2]
|
| 273 |
return name
|
| 274 |
return None
|
| 275 |
|
| 276 |
|
|
|
|
|
|
|
|
|
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
def get_similar_colors(selected_columns_new):
|
| 280 |
def generate_rgb():
|
|
|
|
| 419 |
|
| 420 |
return new_data
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
def get_width_info_tobeprinted(new_data):
|
| 424 |
width_info_tobeprinted = []
|
|
|
|
| 581 |
|
| 582 |
# return output_stream.getvalue() # Return modified PDF as bytes
|
| 583 |
|
| 584 |
+
from PyPDF2 import PdfReader, PdfWriter
|
| 585 |
+
|
| 586 |
+
def merge_pdf_bytes_list(pdfs):
|
| 587 |
+
writer = PdfWriter()
|
| 588 |
+
|
| 589 |
+
for pdf_bytes in pdfs:
|
| 590 |
+
pdf_stream = io.BytesIO(pdf_bytes)
|
| 591 |
+
reader = PdfReader(pdf_stream)
|
| 592 |
+
for page in reader.pages:
|
| 593 |
+
writer.add_page(page)
|
| 594 |
+
|
| 595 |
+
output_stream = io.BytesIO()
|
| 596 |
+
writer.write(output_stream)
|
| 597 |
+
output_stream.seek(0)
|
| 598 |
+
|
| 599 |
+
return output_stream.read()
|
| 600 |
+
|
| 601 |
def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
|
| 602 |
#Load original PDF
|
| 603 |
# with open(input_pdf_path, "rb") as file:
|
|
|
|
| 614 |
# file.write(final_pdf_bytes)
|
| 615 |
|
| 616 |
def mainRun(schedule, plan, searcharray):
|
| 617 |
+
#print(type(plan))
|
| 618 |
+
eltype = type(plan)
|
| 619 |
+
print(f"el type beta3 variable plan:: {eltype}")
|
| 620 |
+
len_plan = len(plan)
|
| 621 |
+
print(f"length of the plan's array is: {len_plan}")
|
| 622 |
+
p1_type = type(plan[0])
|
| 623 |
+
print(f"el mawgood fe p[0]: {p1_type}")
|
| 624 |
+
|
| 625 |
user_input = get_user_input(searcharray)
|
| 626 |
dfs = extract_tables(schedule)
|
| 627 |
+
|
| 628 |
selected_columns_new = get_selected_columns(dfs, user_input)
|
| 629 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
kelma = get_st_op_pattern(selected_columns_new, user_input)
|
| 631 |
col_dict = get_similar_colors(selected_columns_new)
|
| 632 |
flattened_list = get_flattened_tuples_list(col_dict)
|
| 633 |
+
|
| 634 |
+
pdfs = []
|
| 635 |
+
for p in plan:
|
| 636 |
+
print(f" p in plan is {type(p)}")
|
| 637 |
+
print(p)
|
| 638 |
+
plan_texts = read_text(p)
|
| 639 |
+
locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
|
| 640 |
+
new_data = get_cleaned_data(locations)
|
| 641 |
+
repeated_labels = get_repeated_labels(locations)
|
| 642 |
+
if kelma == None:
|
| 643 |
+
widths = get_width_info_tobeprinted(new_data)
|
| 644 |
+
else:
|
| 645 |
+
width_info_tobeprinted = get_width_info_tobeprinted(new_data)
|
| 646 |
+
cleaned_width = get_cleaned_width(width_info_tobeprinted)
|
| 647 |
+
widths = get_widths_bb_format(cleaned_width, kelma)
|
| 648 |
+
final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
|
| 649 |
+
pdfs.append(final_pdf_bytes)
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
#plan_texts = read_text(plan)
|
| 655 |
+
#locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
|
| 656 |
+
#new_data = get_cleaned_data(locations)
|
| 657 |
+
#repeated_labels = get_repeated_labels(locations)
|
| 658 |
+
#if kelma == None:
|
| 659 |
+
# widths = get_width_info_tobeprinted(new_data)
|
| 660 |
+
#else:
|
| 661 |
+
# width_info_tobeprinted = get_width_info_tobeprinted(new_data)
|
| 662 |
+
# cleaned_width = get_cleaned_width(width_info_tobeprinted)
|
| 663 |
+
# widths = get_widths_bb_format(cleaned_width, kelma)
|
| 664 |
if selected_columns_new.shape[1] == 2:
|
| 665 |
widths = []
|
| 666 |
#for j in range(len(locations)):
|
| 667 |
# widths.append("Dimensions not found in schedule")
|
| 668 |
+
#final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
|
| 669 |
+
merged_pdf = merge_pdf_bytes_list(pdfs)
|
| 670 |
+
print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
|
| 671 |
+
not_found = []
|
| 672 |
+
doc2 =fitz.open('pdf',merged_pdf)
|
| 673 |
+
len_doc2 = len(doc2)
|
| 674 |
+
print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
|
| 675 |
page=doc2[0]
|
| 676 |
pix = page.get_pixmap() # render page to an image
|
| 677 |
pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
|