Update Doors_Schedule.py
Browse files- Doors_Schedule.py +246 -530
Doors_Schedule.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from collections import defaultdict
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import random
|
| 4 |
import math
|
|
@@ -116,7 +117,6 @@ def generate_current_table_without_cropping(clm_idx, clmn_name, df):
|
|
| 116 |
return selected_df
|
| 117 |
|
| 118 |
|
| 119 |
-
|
| 120 |
def crop_rename_table(indices, clmn_name, clmn_idx,df):
|
| 121 |
#crop_at = (max(set(indices), key=indices.count)) + 1
|
| 122 |
crop_at = max(indices) + 1
|
|
@@ -292,8 +292,10 @@ def get_column_name(user_input_m):
|
|
| 292 |
# fixed column names
|
| 293 |
fixed_list = ["door_id", "door_type", "width", "height"]
|
| 294 |
for i in range(len(empty_indices)):
|
| 295 |
-
if empty_indices[i] == 3:
|
| 296 |
-
fixed_list[2] = "
|
|
|
|
|
|
|
| 297 |
fixed_list[empty_indices[i]] = ""
|
| 298 |
|
| 299 |
#finalize the column name structure
|
|
@@ -455,8 +457,18 @@ def find_text_in_plan(label, x):
|
|
| 455 |
return substring_coordinates, words, point_list
|
| 456 |
|
| 457 |
|
| 458 |
-
def get_selected_columns_by_index(df, column_index_list):
|
| 459 |
selected_df = df.iloc[:, column_index_list]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
return selected_df
|
| 461 |
|
| 462 |
## Get the column indices from extract_tables(schedule)
|
|
@@ -478,9 +490,10 @@ def get_column_indices_from_dfs_normal(dfs, user_patterns):
|
|
| 478 |
cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
|
| 479 |
cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
|
| 480 |
|
| 481 |
-
if len(cell_matches) == 0 and len(col_matches) == 0:
|
| 482 |
continue
|
| 483 |
-
|
|
|
|
| 484 |
else:
|
| 485 |
#IN COLUMNS
|
| 486 |
if len(col_matches) == len(non_empty_info):
|
|
@@ -498,6 +511,73 @@ def get_column_indices_from_dfs_normal(dfs, user_patterns):
|
|
| 498 |
|
| 499 |
return column_index_list
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
# get the index of dataframe that has the maximum column matches in the dfs from model table detection
|
| 502 |
def get_df_index(dfs, user_patterns):
|
| 503 |
df_matches = []
|
|
@@ -615,6 +695,9 @@ def get_cleaned_data(locations):
|
|
| 615 |
# law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
|
| 616 |
def get_width_info_tobeprinted(new_data):
|
| 617 |
width_info_tobeprinted = []
|
|
|
|
|
|
|
|
|
|
| 618 |
if len(new_data[0]) == 4:
|
| 619 |
for _,_,_, w in new_data:
|
| 620 |
#w = re.sub(r",", "", w)
|
|
@@ -626,7 +709,8 @@ def get_width_info_tobeprinted(new_data):
|
|
| 626 |
h = re.sub(r",", "", h)
|
| 627 |
|
| 628 |
#if w == "N/A":
|
| 629 |
-
if w.isalpha():
|
|
|
|
| 630 |
w = w
|
| 631 |
else:
|
| 632 |
if float(w).is_integer():
|
|
@@ -634,7 +718,8 @@ def get_width_info_tobeprinted(new_data):
|
|
| 634 |
else:
|
| 635 |
w = w
|
| 636 |
#if h == "N/A":
|
| 637 |
-
if h.isalpha():
|
|
|
|
| 638 |
h = h
|
| 639 |
else:
|
| 640 |
if float(h).is_integer():
|
|
@@ -675,70 +760,14 @@ def get_widths_bb_format(cleaned_width, kelma):
|
|
| 675 |
return widths
|
| 676 |
|
| 677 |
|
| 678 |
-
'''def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
| 679 |
-
width_info_tobeprinted = []
|
| 680 |
-
secondary_info_tobeprinted = []
|
| 681 |
-
|
| 682 |
-
if len(main_info) == 2 and len(secondary_info) == 1:
|
| 683 |
-
for coords, label, acous, color in new_data:
|
| 684 |
-
secondary_info_tobeprinted.append(acous)
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
if len(main_info) == 2 and len(secondary_info) == 2:
|
| 688 |
-
for coords, label, acous, fire, color in new_data:
|
| 689 |
-
secondary_info_tobeprinted.append((acous, fire))
|
| 690 |
-
|
| 691 |
-
if len(main_info) == 3 and len(secondary_info) == 1:
|
| 692 |
-
for coords, label, width, acous, color in new_data:
|
| 693 |
-
width_info_tobeprinted.append(width)
|
| 694 |
-
secondary_info_tobeprinted.append(acous)
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
if len(main_info) == 3 and len(secondary_info) == 2:
|
| 698 |
-
for coords, label, width, acous, fire, color in new_data:
|
| 699 |
-
width_info_tobeprinted.append(width)
|
| 700 |
-
secondary_info_tobeprinted.append((acous, fire))
|
| 701 |
-
|
| 702 |
-
if len(main_info) == 4 and len(secondary_info) == 1:
|
| 703 |
-
for coords, label, width, height, acous, color in new_data:
|
| 704 |
-
w = re.sub(r",", "", width)
|
| 705 |
-
h = re.sub(r",", "", height)
|
| 706 |
-
if float(w).is_integer():
|
| 707 |
-
w = int(float(w))
|
| 708 |
-
else:
|
| 709 |
-
w = w
|
| 710 |
-
if float(h).is_integer():
|
| 711 |
-
h = int(float(h))
|
| 712 |
-
else:
|
| 713 |
-
h = h
|
| 714 |
-
width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
|
| 715 |
-
secondary_info_tobeprinted.append(acous)
|
| 716 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
|
| 718 |
-
if len(main_info) == 4 and len(secondary_info) == 2:
|
| 719 |
-
for coords, label, width, height, acous, fire, color in new_data:
|
| 720 |
-
print(type(width))
|
| 721 |
-
print(type(height))
|
| 722 |
-
w = re.sub(r",", "", width)
|
| 723 |
-
h = re.sub(r",", "", height)
|
| 724 |
-
if w == "N/A":
|
| 725 |
-
w = w
|
| 726 |
-
else:
|
| 727 |
-
if float(w).is_integer():
|
| 728 |
-
w = int(float(w))
|
| 729 |
-
else:
|
| 730 |
-
w = w
|
| 731 |
-
if h == "N/A":
|
| 732 |
-
h = h
|
| 733 |
-
else:
|
| 734 |
-
if float(h).is_integer():
|
| 735 |
-
h = int(float(h))
|
| 736 |
-
else:
|
| 737 |
-
h = h
|
| 738 |
-
width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
|
| 739 |
-
secondary_info_tobeprinted.append((acous, fire))
|
| 740 |
-
return width_info_tobeprinted, secondary_info_tobeprinted
|
| 741 |
-
'''
|
| 742 |
|
| 743 |
def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
| 744 |
width_info_tobeprinted = []
|
|
@@ -747,11 +776,13 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
|
| 747 |
if len(main_info) == 2 and len(secondary_info) == 1:
|
| 748 |
for coords, label, acous, color in new_data:
|
| 749 |
secondary_info_tobeprinted.append(acous)
|
|
|
|
| 750 |
|
| 751 |
|
| 752 |
if len(main_info) == 2 and len(secondary_info) == 2:
|
| 753 |
for coords, label, acous, fire, color in new_data:
|
| 754 |
secondary_info_tobeprinted.append((acous, fire))
|
|
|
|
| 755 |
|
| 756 |
if len(main_info) == 3 and len(secondary_info) == 1:
|
| 757 |
for coords, label, width, acous, color in new_data:
|
|
@@ -768,7 +799,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
|
| 768 |
for coords, label, width, height, acous, color in new_data:
|
| 769 |
w = re.sub(r",", "", width)
|
| 770 |
h = re.sub(r",", "", height)
|
| 771 |
-
if w.isalpha():
|
|
|
|
| 772 |
w = w
|
| 773 |
else:
|
| 774 |
if float(w).is_integer():
|
|
@@ -776,7 +808,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
|
| 776 |
else:
|
| 777 |
w = w
|
| 778 |
#if h == "N/A":
|
| 779 |
-
if h.isalpha():
|
|
|
|
| 780 |
h = h
|
| 781 |
else:
|
| 782 |
if float(h).is_integer():
|
|
@@ -784,7 +817,7 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
|
| 784 |
else:
|
| 785 |
h = h
|
| 786 |
width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
|
| 787 |
-
secondary_info_tobeprinted.append((acous
|
| 788 |
|
| 789 |
if len(main_info) == 4 and len(secondary_info) == 2:
|
| 790 |
for coords, label, width, height, acous, fire, color in new_data:
|
|
@@ -793,7 +826,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
|
| 793 |
w = re.sub(r",", "", width)
|
| 794 |
h = re.sub(r",", "", height)
|
| 795 |
#if w == "N/A":
|
| 796 |
-
if w.isalpha():
|
|
|
|
| 797 |
w = w
|
| 798 |
else:
|
| 799 |
if float(w).is_integer():
|
|
@@ -801,7 +835,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
|
| 801 |
else:
|
| 802 |
w = w
|
| 803 |
#if h == "N/A":
|
| 804 |
-
if h.isalpha():
|
|
|
|
| 805 |
h = h
|
| 806 |
else:
|
| 807 |
if float(h).is_integer():
|
|
@@ -916,6 +951,13 @@ def get_flattened_tuples_list_all(col_dict):
|
|
| 916 |
|
| 917 |
return flattened_list
|
| 918 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 919 |
|
| 920 |
#SECONDARY
|
| 921 |
def get_cleaned_data_secondary(locations, main_info, secondary_info):
|
|
@@ -985,31 +1027,6 @@ def get_cleaned_data_secondary(locations, main_info, secondary_info):
|
|
| 985 |
|
| 986 |
return new_data
|
| 987 |
|
| 988 |
-
def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
|
| 989 |
-
secondary_printed_clean = []
|
| 990 |
-
if len(secondary_info) == 1:
|
| 991 |
-
if any('acoustic' in col for col in selected_secondary_info.columns):
|
| 992 |
-
for acous in secondary_tobeprinted:
|
| 993 |
-
new_text = f"acoustic rating: {acous};"
|
| 994 |
-
secondary_printed_clean.append(new_text)
|
| 995 |
-
if any('fire' in col for col in selected_secondary_info.columns):
|
| 996 |
-
for fire in secondary_tobeprinted:
|
| 997 |
-
new_text = f"fire rating: {fire};"
|
| 998 |
-
secondary_printed_clean.append(new_text)
|
| 999 |
-
if len(secondary_info) == 2:
|
| 1000 |
-
for fire, acous in secondary_tobeprinted:
|
| 1001 |
-
new_text = f"fire rating: {fire}; acoustic rating: {acous};"
|
| 1002 |
-
secondary_printed_clean.append(new_text)
|
| 1003 |
-
print(new_text)
|
| 1004 |
-
return secondary_printed_clean
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
def mix_width_secondary(widths, secondary_printed_clean):
|
| 1008 |
-
all_print = []
|
| 1009 |
-
for i in range(len(widths)):
|
| 1010 |
-
newest_text = f"{widths[i]}; {secondary_printed_clean[i]}"
|
| 1011 |
-
all_print.append(newest_text)
|
| 1012 |
-
return all_print
|
| 1013 |
|
| 1014 |
def merge_pdf_bytes_list(pdfs):
|
| 1015 |
writer = PdfWriter()
|
|
@@ -1026,331 +1043,6 @@ def merge_pdf_bytes_list(pdfs):
|
|
| 1026 |
|
| 1027 |
return output_stream.read()
|
| 1028 |
|
| 1029 |
-
'''def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
|
| 1030 |
-
pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
|
| 1031 |
-
pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
|
| 1032 |
-
|
| 1033 |
-
page = pdf_document[0] # First page
|
| 1034 |
-
if len(main_info) == 2 and len(secondary_info) == 1:
|
| 1035 |
-
for loc in locations:
|
| 1036 |
-
coor, lbl, acous, clr = loc
|
| 1037 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1038 |
-
for cor in coor:
|
| 1039 |
-
#Create a Circle annotation (Count Markup)
|
| 1040 |
-
annot = page.add_circle_annot(
|
| 1041 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1042 |
-
)
|
| 1043 |
-
|
| 1044 |
-
#Assign required Bluebeam metadata
|
| 1045 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1046 |
-
annot.set_border(width=2) # Border thickness
|
| 1047 |
-
annot.set_opacity(1) # Fully visible
|
| 1048 |
-
|
| 1049 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1050 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1051 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1052 |
-
annot.set_info("title", lbl) # Optional
|
| 1053 |
-
annot.update() # Apply changes
|
| 1054 |
-
|
| 1055 |
-
if len(main_info) == 2 and len(secondary_info) == 2:
|
| 1056 |
-
for loc in locations:
|
| 1057 |
-
coor, lbl, acous, fire, clr = loc
|
| 1058 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1059 |
-
for cor in coor:
|
| 1060 |
-
#Create a Circle annotation (Count Markup)
|
| 1061 |
-
annot = page.add_circle_annot(
|
| 1062 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1063 |
-
)
|
| 1064 |
-
|
| 1065 |
-
#Assign required Bluebeam metadata
|
| 1066 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1067 |
-
annot.set_border(width=2) # Border thickness
|
| 1068 |
-
annot.set_opacity(1) # Fully visible
|
| 1069 |
-
|
| 1070 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1071 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1072 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1073 |
-
annot.set_info("title", lbl) # Optional
|
| 1074 |
-
annot.update() # Apply changes
|
| 1075 |
-
|
| 1076 |
-
if len(main_info) == 3 and len(secondary_info) == 1:
|
| 1077 |
-
for loc in locations:
|
| 1078 |
-
if len(loc) != 5:
|
| 1079 |
-
continue
|
| 1080 |
-
coor, lbl, w, acous, clr = loc
|
| 1081 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1082 |
-
for cor in coor:
|
| 1083 |
-
#Create a Circle annotation (Count Markup)
|
| 1084 |
-
annot = page.add_circle_annot(
|
| 1085 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1086 |
-
)
|
| 1087 |
-
|
| 1088 |
-
#Assign required Bluebeam metadata
|
| 1089 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1090 |
-
annot.set_border(width=2) # Border thickness
|
| 1091 |
-
annot.set_opacity(1) # Fully visible
|
| 1092 |
-
|
| 1093 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1094 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1095 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1096 |
-
annot.set_info("title", lbl) # Optional
|
| 1097 |
-
annot.update() # Apply changes
|
| 1098 |
-
|
| 1099 |
-
if len(main_info) == 3 and len(secondary_info) == 2:
|
| 1100 |
-
for loc in locations:
|
| 1101 |
-
coor, lbl, w, acous, fire, clr = loc
|
| 1102 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1103 |
-
for cor in coor:
|
| 1104 |
-
#Create a Circle annotation (Count Markup)
|
| 1105 |
-
annot = page.add_circle_annot(
|
| 1106 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1107 |
-
)
|
| 1108 |
-
|
| 1109 |
-
#Assign required Bluebeam metadata
|
| 1110 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1111 |
-
annot.set_border(width=2) # Border thickness
|
| 1112 |
-
annot.set_opacity(1) # Fully visible
|
| 1113 |
-
|
| 1114 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1115 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1116 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1117 |
-
annot.set_info("title", lbl) # Optional
|
| 1118 |
-
annot.update() # Apply changes
|
| 1119 |
-
|
| 1120 |
-
if len(main_info) == 4 and len(secondary_info) == 1:
|
| 1121 |
-
for loc in locations:
|
| 1122 |
-
coor, lbl, w, h, acous, clr = loc
|
| 1123 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1124 |
-
for cor in coor:
|
| 1125 |
-
#Create a Circle annotation (Count Markup)
|
| 1126 |
-
annot = page.add_circle_annot(
|
| 1127 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1128 |
-
)
|
| 1129 |
-
|
| 1130 |
-
#Assign required Bluebeam metadata
|
| 1131 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1132 |
-
annot.set_border(width=2) # Border thickness
|
| 1133 |
-
annot.set_opacity(1) # Fully visible
|
| 1134 |
-
|
| 1135 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1136 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1137 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1138 |
-
annot.set_info("title", lbl) # Optional
|
| 1139 |
-
annot.update() # Apply changes
|
| 1140 |
-
|
| 1141 |
-
if len(main_info) == 4 and len(secondary_info) == 2:
|
| 1142 |
-
for loc in locations:
|
| 1143 |
-
coor, lbl, w, h, acous, fire, clr = loc
|
| 1144 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1145 |
-
for cor in coor:
|
| 1146 |
-
#Create a Circle annotation (Count Markup)
|
| 1147 |
-
annot = page.add_circle_annot(
|
| 1148 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1149 |
-
)
|
| 1150 |
-
|
| 1151 |
-
#Assign required Bluebeam metadata
|
| 1152 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1153 |
-
annot.set_border(width=2) # Border thickness
|
| 1154 |
-
annot.set_opacity(1) # Fully visible
|
| 1155 |
-
|
| 1156 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1157 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1158 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1159 |
-
annot.set_info("title", lbl) # Optional
|
| 1160 |
-
annot.update() # Apply changes
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
#Save modified PDF to a variable instead of a file
|
| 1165 |
-
output_stream = io.BytesIO()
|
| 1166 |
-
pdf_document.save(output_stream)
|
| 1167 |
-
pdf_document.close()
|
| 1168 |
-
|
| 1169 |
-
return output_stream.getvalue() # Return the modified PDF as bytes
|
| 1170 |
-
|
| 1171 |
-
def modify_author_in_pypdf2(pdf_bytes, new_authors):
|
| 1172 |
-
pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
|
| 1173 |
-
reader = PyPDF2.PdfReader(pdf_stream)
|
| 1174 |
-
writer = PyPDF2.PdfWriter()
|
| 1175 |
-
|
| 1176 |
-
author_index = 0 # Track author assignment
|
| 1177 |
-
|
| 1178 |
-
for page in reader.pages:
|
| 1179 |
-
if "/Annots" in page: #Check if annotations exist
|
| 1180 |
-
for annot in page["/Annots"]:
|
| 1181 |
-
annot_obj = annot.get_object()
|
| 1182 |
-
# Assign each annotation a unique author
|
| 1183 |
-
if len(new_authors) == 0:
|
| 1184 |
-
break
|
| 1185 |
-
if author_index < len(new_authors):
|
| 1186 |
-
annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
|
| 1187 |
-
author_index += 1 # Move to next author
|
| 1188 |
-
|
| 1189 |
-
# If authors list is exhausted, keep the last one
|
| 1190 |
-
else:
|
| 1191 |
-
annot_obj.update({"/T": TextStringObject(new_authors[-1])})
|
| 1192 |
-
|
| 1193 |
-
writer.add_page(page)
|
| 1194 |
-
|
| 1195 |
-
#Save the modified PDF to a variable
|
| 1196 |
-
output_stream = io.BytesIO()
|
| 1197 |
-
writer.write(output_stream)
|
| 1198 |
-
output_stream.seek(0)
|
| 1199 |
-
|
| 1200 |
-
return output_stream.read()
|
| 1201 |
-
|
| 1202 |
-
def add_bluebeam_count_annotations(pdf_bytes, locations):
|
| 1203 |
-
pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
|
| 1204 |
-
pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
|
| 1205 |
-
|
| 1206 |
-
page = pdf_document[0] # First page
|
| 1207 |
-
print(f"length of locations 0 from not sec presence: {len(locations[0])}")
|
| 1208 |
-
|
| 1209 |
-
for loc in locations:
|
| 1210 |
-
|
| 1211 |
-
if len(loc) == 3:
|
| 1212 |
-
coor, lbl, clr = loc
|
| 1213 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1214 |
-
for cor in coor:
|
| 1215 |
-
#Create a Circle annotation (Count Markup)
|
| 1216 |
-
annot = page.add_circle_annot(
|
| 1217 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1218 |
-
)
|
| 1219 |
-
|
| 1220 |
-
#Assign required Bluebeam metadata
|
| 1221 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1222 |
-
annot.set_border(width=2) # Border thickness
|
| 1223 |
-
annot.set_opacity(1) # Fully visible
|
| 1224 |
-
|
| 1225 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1226 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1227 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1228 |
-
annot.set_info("title", lbl) # Optional
|
| 1229 |
-
annot.update() # Apply changes
|
| 1230 |
-
if len(loc) == 4:
|
| 1231 |
-
coor, lbl, clr,w = loc
|
| 1232 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1233 |
-
for cor in coor:
|
| 1234 |
-
#Create a Circle annotation (Count Markup)
|
| 1235 |
-
annot = page.add_circle_annot(
|
| 1236 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1237 |
-
)
|
| 1238 |
-
|
| 1239 |
-
#Assign required Bluebeam metadata
|
| 1240 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1241 |
-
annot.set_border(width=2) # Border thickness
|
| 1242 |
-
annot.set_opacity(1) # Fully visible
|
| 1243 |
-
|
| 1244 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1245 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1246 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1247 |
-
annot.set_info("title", lbl) # Optional
|
| 1248 |
-
annot.update() # Apply changes
|
| 1249 |
-
|
| 1250 |
-
if len(loc) == 5:
|
| 1251 |
-
coor, lbl, clr,w,h = loc
|
| 1252 |
-
clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
|
| 1253 |
-
for cor in coor:
|
| 1254 |
-
#Create a Circle annotation (Count Markup)
|
| 1255 |
-
annot = page.add_circle_annot(
|
| 1256 |
-
fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
|
| 1257 |
-
)
|
| 1258 |
-
|
| 1259 |
-
#Assign required Bluebeam metadata
|
| 1260 |
-
annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
|
| 1261 |
-
annot.set_border(width=2) # Border thickness
|
| 1262 |
-
annot.set_opacity(1) # Fully visible
|
| 1263 |
-
|
| 1264 |
-
#Set annotation properties for Bluebeam Count detection
|
| 1265 |
-
annot.set_info("name", lbl) # Unique name for each count
|
| 1266 |
-
annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
|
| 1267 |
-
annot.set_info("title", lbl) # Optional
|
| 1268 |
-
annot.update() # Apply changes
|
| 1269 |
-
|
| 1270 |
-
#Save modified PDF to a variable instead of a file
|
| 1271 |
-
output_stream = io.BytesIO()
|
| 1272 |
-
pdf_document.save(output_stream)
|
| 1273 |
-
pdf_document.close()
|
| 1274 |
-
|
| 1275 |
-
return output_stream.getvalue() # Return the modified PDF as bytes
|
| 1276 |
-
|
| 1277 |
-
def modify_author_in_pypdf2(pdf_bytes, new_authors):
|
| 1278 |
-
pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
|
| 1279 |
-
reader = PyPDF2.PdfReader(pdf_stream)
|
| 1280 |
-
writer = PyPDF2.PdfWriter()
|
| 1281 |
-
|
| 1282 |
-
author_index = 0 # Track author assignment
|
| 1283 |
-
|
| 1284 |
-
for page in reader.pages:
|
| 1285 |
-
if "/Annots" in page: #Check if annotations exist
|
| 1286 |
-
for annot in page["/Annots"]:
|
| 1287 |
-
annot_obj = annot.get_object()
|
| 1288 |
-
# Assign each annotation a unique author
|
| 1289 |
-
if len(new_authors) == 0:
|
| 1290 |
-
break
|
| 1291 |
-
if author_index < len(new_authors):
|
| 1292 |
-
annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
|
| 1293 |
-
author_index += 1 # Move to next author
|
| 1294 |
-
|
| 1295 |
-
# If authors list is exhausted, keep the last one
|
| 1296 |
-
else:
|
| 1297 |
-
annot_obj.update({"/T": TextStringObject(new_authors[-1])})
|
| 1298 |
-
|
| 1299 |
-
writer.add_page(page)
|
| 1300 |
-
|
| 1301 |
-
#Save the modified PDF to a variable
|
| 1302 |
-
output_stream = io.BytesIO()
|
| 1303 |
-
writer.write(output_stream)
|
| 1304 |
-
output_stream.seek(0)
|
| 1305 |
-
|
| 1306 |
-
return output_stream.read()
|
| 1307 |
-
|
| 1308 |
-
def merge_pdf_bytes_list(pdfs):
|
| 1309 |
-
writer = PdfWriter()
|
| 1310 |
-
|
| 1311 |
-
for pdf_bytes in pdfs:
|
| 1312 |
-
pdf_stream = io.BytesIO(pdf_bytes)
|
| 1313 |
-
reader = PdfReader(pdf_stream)
|
| 1314 |
-
for page in reader.pages:
|
| 1315 |
-
writer.add_page(page)
|
| 1316 |
-
|
| 1317 |
-
output_stream = io.BytesIO()
|
| 1318 |
-
writer.write(output_stream)
|
| 1319 |
-
output_stream.seek(0)
|
| 1320 |
-
|
| 1321 |
-
return output_stream.read()
|
| 1322 |
-
|
| 1323 |
-
def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
|
| 1324 |
-
|
| 1325 |
-
if isinstance(input_pdf_path, bytes):
|
| 1326 |
-
original_pdf_bytes = input_pdf_path
|
| 1327 |
-
else:
|
| 1328 |
-
with open(input_pdf_path, "rb") as file:
|
| 1329 |
-
original_pdf_bytes = file.read()
|
| 1330 |
-
|
| 1331 |
-
#Add Bluebeam-compatible count annotations
|
| 1332 |
-
annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(original_pdf_bytes, locations, main_info, secondary_info)
|
| 1333 |
-
|
| 1334 |
-
#Modify author field using PyPDF2
|
| 1335 |
-
final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
|
| 1336 |
-
|
| 1337 |
-
return final_pdf_bytes
|
| 1338 |
-
|
| 1339 |
-
def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
|
| 1340 |
-
#Load original PDF
|
| 1341 |
-
if isinstance(input_pdf_path, bytes):
|
| 1342 |
-
original_pdf_bytes = input_pdf_path
|
| 1343 |
-
else:
|
| 1344 |
-
with open(input_pdf_path, "rb") as file:
|
| 1345 |
-
original_pdf_bytes = file.read()
|
| 1346 |
-
|
| 1347 |
-
#Add Bluebeam-compatible count annotations
|
| 1348 |
-
annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
|
| 1349 |
-
|
| 1350 |
-
#Modify author field using PyPDF2
|
| 1351 |
-
final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
|
| 1352 |
-
return final_pdf_bytes
|
| 1353 |
-
'''
|
| 1354 |
|
| 1355 |
def calculate_bounding_rect_count(vertices,padding):
|
| 1356 |
x, y = vertices[0]
|
|
@@ -1365,13 +1057,6 @@ def rgb_string_to_hex(rgb_string):
|
|
| 1365 |
return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
|
| 1366 |
|
| 1367 |
|
| 1368 |
-
|
| 1369 |
-
|
| 1370 |
-
|
| 1371 |
-
|
| 1372 |
-
|
| 1373 |
-
|
| 1374 |
-
|
| 1375 |
def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
|
| 1376 |
label: str = '',height:str='',width:str='',
|
| 1377 |
color:str='',countstyle:str='',countsize:str=''):
|
|
@@ -1528,6 +1213,17 @@ def mirrored_points(x, y, height_plan):
|
|
| 1528 |
mirrored = []
|
| 1529 |
mirrored.append([x, height_plan - y])
|
| 1530 |
return mirrored
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1531 |
|
| 1532 |
# Modified to adjust mirrored points
|
| 1533 |
def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
|
|
@@ -1537,7 +1233,8 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
|
|
| 1537 |
R = str(float(r/255))
|
| 1538 |
G = str(float(g/255))
|
| 1539 |
B = str(float(b/255))
|
| 1540 |
-
vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
|
|
|
|
| 1541 |
if input_user_clmn_names[4] and input_user_clmn_names[5]:
|
| 1542 |
bax_annotations.append({
|
| 1543 |
'vertices': vertix,
|
|
@@ -1559,7 +1256,7 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
|
|
| 1559 |
'vertices': vertix,
|
| 1560 |
'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
|
| 1561 |
'author': 'ADR',
|
| 1562 |
-
'custom_data': {'FireRating': secondary_tobeprinted[i]
|
| 1563 |
'label': new_data[i][1], #change label to whatever u want
|
| 1564 |
'Height': heightat[i], #for tameem to change - i added any values'
|
| 1565 |
'Width':widthat[i],
|
|
@@ -1573,7 +1270,7 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
|
|
| 1573 |
'vertices': vertix,
|
| 1574 |
'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
|
| 1575 |
'author': 'ADR',
|
| 1576 |
-
'custom_data': {'FireRating':
|
| 1577 |
'label': new_data[i][1], #change label to whatever u want
|
| 1578 |
'Height': heightat[i], #for tameem to change - i added any values'
|
| 1579 |
'Width':widthat[i],
|
|
@@ -1596,7 +1293,8 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
|
|
| 1596 |
G = str(float(g/255))
|
| 1597 |
B = str(float(b/255))
|
| 1598 |
|
| 1599 |
-
vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
|
|
|
|
| 1600 |
bax_annotations.append({
|
| 1601 |
'vertices': vertix,
|
| 1602 |
'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
|
|
@@ -1613,6 +1311,7 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
|
|
| 1613 |
|
| 1614 |
return bax_annotations
|
| 1615 |
|
|
|
|
| 1616 |
#Handle missing widths or heights in some rows
|
| 1617 |
def generate_separate_dimensions(widths):
|
| 1618 |
widthat = []
|
|
@@ -1648,14 +1347,6 @@ def generate_bluebeam_columns_raw(column_names):
|
|
| 1648 |
return tostring(root, encoding="unicode", method="xml")
|
| 1649 |
|
| 1650 |
|
| 1651 |
-
# Example usage
|
| 1652 |
-
# column_names = ["mycustomcolumn"]
|
| 1653 |
-
#column_xml = generate_bluebeam_columns_raw(column_order)
|
| 1654 |
-
|
| 1655 |
-
#with open("count_type_Windows.xml", "w", encoding="utf-8") as f:
|
| 1656 |
-
# f.write(column_xml)
|
| 1657 |
-
|
| 1658 |
-
#print(column_xml)
|
| 1659 |
|
| 1660 |
def mainRun(schedule, plan, searcharray):
|
| 1661 |
print("mainRun is RUNNING")
|
|
@@ -1668,11 +1359,12 @@ def mainRun(schedule, plan, searcharray):
|
|
| 1668 |
p1_type = type(plan[0])
|
| 1669 |
print(f"el mawgood fe p[0]: {p1_type}")
|
| 1670 |
|
| 1671 |
-
|
| 1672 |
-
|
| 1673 |
#dfs = extract_tables(schedule)
|
| 1674 |
print(f"type of schedule: {type(schedule)}")
|
| 1675 |
-
|
|
|
|
| 1676 |
pdf_widths = []
|
| 1677 |
pdf_heights = []
|
| 1678 |
pdfs_count_type = []
|
|
@@ -1681,6 +1373,9 @@ def mainRun(schedule, plan, searcharray):
|
|
| 1681 |
page_number = 0
|
| 1682 |
bax_annotations_all_inputs = [] #for the same plan
|
| 1683 |
#pdfs = []
|
|
|
|
|
|
|
|
|
|
| 1684 |
for p in plan:
|
| 1685 |
annotation_counter +=1
|
| 1686 |
page_number +=1
|
|
@@ -1691,82 +1386,98 @@ def mainRun(schedule, plan, searcharray):
|
|
| 1691 |
|
| 1692 |
width_plan = page.cropbox.width # or: width = rect.x1 - rect.x0
|
| 1693 |
height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
|
| 1694 |
-
width_plan = math.ceil(width_plan)
|
| 1695 |
-
height_plan = math.ceil(height_plan)
|
| 1696 |
-
|
| 1697 |
-
|
| 1698 |
-
|
| 1699 |
-
|
| 1700 |
-
|
| 1701 |
-
|
| 1702 |
-
secondary_presence =
|
| 1703 |
-
|
| 1704 |
-
|
| 1705 |
-
|
| 1706 |
-
|
| 1707 |
-
|
| 1708 |
-
print("
|
| 1709 |
-
|
| 1710 |
-
|
| 1711 |
-
|
| 1712 |
-
|
| 1713 |
-
|
| 1714 |
-
|
| 1715 |
-
|
| 1716 |
-
|
| 1717 |
-
|
| 1718 |
-
|
| 1719 |
-
|
| 1720 |
-
|
| 1721 |
-
|
| 1722 |
-
|
| 1723 |
-
|
| 1724 |
-
|
| 1725 |
-
|
| 1726 |
-
|
| 1727 |
-
|
| 1728 |
-
|
| 1729 |
-
|
| 1730 |
-
|
| 1731 |
-
|
| 1732 |
-
|
| 1733 |
-
|
| 1734 |
-
|
| 1735 |
-
|
| 1736 |
-
|
| 1737 |
-
|
| 1738 |
-
|
| 1739 |
-
|
| 1740 |
-
|
| 1741 |
-
|
| 1742 |
-
|
| 1743 |
-
|
| 1744 |
-
|
| 1745 |
-
|
| 1746 |
-
|
| 1747 |
-
|
| 1748 |
-
|
| 1749 |
-
|
| 1750 |
-
|
| 1751 |
-
|
| 1752 |
-
|
| 1753 |
-
|
| 1754 |
-
|
| 1755 |
-
|
| 1756 |
-
|
| 1757 |
-
|
| 1758 |
-
|
| 1759 |
-
|
| 1760 |
-
|
| 1761 |
-
|
| 1762 |
-
|
| 1763 |
-
|
| 1764 |
-
|
| 1765 |
-
|
| 1766 |
-
|
| 1767 |
-
|
| 1768 |
-
|
| 1769 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1770 |
# if it is not byte type
|
| 1771 |
#pdfs_count_type.append(convert_to_bytes(p))
|
| 1772 |
pdfs_count_type.append(p)
|
|
@@ -1781,14 +1492,21 @@ def mainRun(schedule, plan, searcharray):
|
|
| 1781 |
|
| 1782 |
column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
|
| 1783 |
|
| 1784 |
-
|
| 1785 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1786 |
pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
|
| 1787 |
column_xml = generate_bluebeam_columns_raw(column_order)
|
| 1788 |
|
| 1789 |
-
|
| 1790 |
##### SHOULD return pretty_xml, column_xml, merged_pdf
|
| 1791 |
-
not_found =
|
| 1792 |
annotatedimgs=[]
|
| 1793 |
doc2 =fitz.open('pdf',merged_pdf)
|
| 1794 |
len_doc2 = len(doc2)
|
|
@@ -1816,6 +1534,4 @@ def mainRun(schedule, plan, searcharray):
|
|
| 1816 |
v='stroke'
|
| 1817 |
x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
|
| 1818 |
list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
|
| 1819 |
-
return annotatedimgs, doc2 , list1, repeated_labels , not_found, pretty_xml, column_xml
|
| 1820 |
-
|
| 1821 |
-
# return annotatedimg, doc2 , list1, repeated_labels , not_found
|
|
|
|
| 1 |
from collections import defaultdict
|
| 2 |
+
from collections import Counter
|
| 3 |
import pandas as pd
|
| 4 |
import random
|
| 5 |
import math
|
|
|
|
| 117 |
return selected_df
|
| 118 |
|
| 119 |
|
|
|
|
| 120 |
def crop_rename_table(indices, clmn_name, clmn_idx,df):
|
| 121 |
#crop_at = (max(set(indices), key=indices.count)) + 1
|
| 122 |
crop_at = max(indices) + 1
|
|
|
|
| 292 |
# fixed column names
|
| 293 |
fixed_list = ["door_id", "door_type", "width", "height"]
|
| 294 |
for i in range(len(empty_indices)):
|
| 295 |
+
if empty_indices[i] == 3 and empty_indices[i - 1] == 2:
|
| 296 |
+
fixed_list[2] = ""
|
| 297 |
+
if empty_indices[i] == 3 and not empty_indices[i - 1] == 2:
|
| 298 |
+
fixed_list[2] = "structural_opening"
|
| 299 |
fixed_list[empty_indices[i]] = ""
|
| 300 |
|
| 301 |
#finalize the column name structure
|
|
|
|
| 457 |
return substring_coordinates, words, point_list
|
| 458 |
|
| 459 |
|
| 460 |
+
def get_selected_columns_by_index(df, column_index_list, user_patterns):
|
| 461 |
selected_df = df.iloc[:, column_index_list]
|
| 462 |
+
|
| 463 |
+
# Rename columns to match the structure of the clr_dictionary
|
| 464 |
+
main_info, secondary_info = separate_main_secondary(user_patterns)
|
| 465 |
+
clmn_name_main = get_column_name(main_info)
|
| 466 |
+
clmn_name_secondary = get_column_name_secondary(secondary_info)
|
| 467 |
+
clmn_name = clmn_name_main + clmn_name_secondary
|
| 468 |
+
|
| 469 |
+
print(f"clmn_name from the function el 3amla moshkela: {clmn_name}")
|
| 470 |
+
selected_df.columns = clmn_name
|
| 471 |
+
|
| 472 |
return selected_df
|
| 473 |
|
| 474 |
## Get the column indices from extract_tables(schedule)
|
|
|
|
| 490 |
cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
|
| 491 |
cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
|
| 492 |
|
| 493 |
+
if len(cell_matches) == 0 and len(col_matches) == 0 and i < len(dfs) - 1:
|
| 494 |
continue
|
| 495 |
+
elif len(cell_matches) == 0 and len(col_matches) == 0:
|
| 496 |
+
column_index_list = None
|
| 497 |
else:
|
| 498 |
#IN COLUMNS
|
| 499 |
if len(col_matches) == len(non_empty_info):
|
|
|
|
| 511 |
|
| 512 |
return column_index_list
|
| 513 |
|
| 514 |
+
|
| 515 |
+
|
| 516 |
+
def find_missing_columns(complete_list, non_complete_list):
|
| 517 |
+
|
| 518 |
+
def normalize_text(text):
|
| 519 |
+
if not isinstance(text, str):
|
| 520 |
+
return ""
|
| 521 |
+
text = re.sub(r'\s+', '', text) # Remove all whitespace
|
| 522 |
+
return text.lower()
|
| 523 |
+
def normalize_text(text):
|
| 524 |
+
"""
|
| 525 |
+
Normalize text by removing all whitespace, brackets, and converting to lowercase.
|
| 526 |
+
"""
|
| 527 |
+
if not isinstance(text, str):
|
| 528 |
+
return ""
|
| 529 |
+
# Remove all whitespace characters (spaces, tabs, newlines)
|
| 530 |
+
text = re.sub(r'\s+', '', text)
|
| 531 |
+
# Remove brackets of any type
|
| 532 |
+
text = re.sub(r'[\(\)\[\]\{\}]', '', text)
|
| 533 |
+
return text.lower()
|
| 534 |
+
|
| 535 |
+
complete_list = complete_list
|
| 536 |
+
non_complete = non_complete_list
|
| 537 |
+
|
| 538 |
+
# Normalize non_complete just once for speed
|
| 539 |
+
normalized_non_complete = [normalize_text(item) for item in non_complete]
|
| 540 |
+
|
| 541 |
+
missing = []
|
| 542 |
+
for item in complete_list:
|
| 543 |
+
normalized_item = normalize_text(item)
|
| 544 |
+
if normalized_item not in normalized_non_complete:
|
| 545 |
+
missing.append(item)
|
| 546 |
+
#delete empty fields as it is the 6 fixed fields approach
|
| 547 |
+
missing = [item for item in missing if item]
|
| 548 |
+
|
| 549 |
+
#print(f"{missing} can't be found in the schedule, make sure you entered it right or try entering the first row information instead of the column names")
|
| 550 |
+
return missing
|
| 551 |
+
|
| 552 |
+
# Returns the columns the code failed to locate on the schedule
|
| 553 |
+
def check_missing(dfs, user_patterns):
|
| 554 |
+
all_words = []
|
| 555 |
+
for i in range(len(dfs)):
|
| 556 |
+
main_info, secondary_info = separate_main_secondary(user_patterns)
|
| 557 |
+
clmn_name_main = get_column_name(main_info)
|
| 558 |
+
non_empty_main_info = [item for item in main_info if item]
|
| 559 |
+
|
| 560 |
+
clmn_name_secondary = get_column_name_secondary(secondary_info)
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
non_empty_secondary_info = [item for item in secondary_info if item]
|
| 564 |
+
|
| 565 |
+
clmn_name = clmn_name_main + clmn_name_secondary
|
| 566 |
+
non_empty_info = non_empty_main_info + non_empty_secondary_info
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
|
| 570 |
+
cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
|
| 571 |
+
|
| 572 |
+
words = [dfs[i].iloc[row, col] for row, col in cell_matches]
|
| 573 |
+
all_words.append(words)
|
| 574 |
+
|
| 575 |
+
found_words = max(all_words, key=len)
|
| 576 |
+
print(found_words)
|
| 577 |
+
missings = find_missing_columns(user_patterns, found_words)
|
| 578 |
+
|
| 579 |
+
return missings
|
| 580 |
+
|
| 581 |
# get the index of dataframe that has the maximum column matches in the dfs from model table detection
|
| 582 |
def get_df_index(dfs, user_patterns):
|
| 583 |
df_matches = []
|
|
|
|
| 695 |
# law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
|
| 696 |
def get_width_info_tobeprinted(new_data):
|
| 697 |
width_info_tobeprinted = []
|
| 698 |
+
if len(new_data[0]) < 4:
|
| 699 |
+
for _,_,_, in new_data:
|
| 700 |
+
width_info_tobeprinted.append("N/A mm wide x N/A mm high")
|
| 701 |
if len(new_data[0]) == 4:
|
| 702 |
for _,_,_, w in new_data:
|
| 703 |
#w = re.sub(r",", "", w)
|
|
|
|
| 709 |
h = re.sub(r",", "", h)
|
| 710 |
|
| 711 |
#if w == "N/A":
|
| 712 |
+
#if w.isalpha():
|
| 713 |
+
if is_not_number(w):
|
| 714 |
w = w
|
| 715 |
else:
|
| 716 |
if float(w).is_integer():
|
|
|
|
| 718 |
else:
|
| 719 |
w = w
|
| 720 |
#if h == "N/A":
|
| 721 |
+
#if h.isalpha():
|
| 722 |
+
if is_not_number(h):
|
| 723 |
h = h
|
| 724 |
else:
|
| 725 |
if float(h).is_integer():
|
|
|
|
| 760 |
return widths
|
| 761 |
|
| 762 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
|
| 764 |
+
def is_not_number(s: str) -> bool:
|
| 765 |
+
try:
|
| 766 |
+
float(s) # accepts ints, floats, scientific notation
|
| 767 |
+
return False # it *is* a number
|
| 768 |
+
except ValueError:
|
| 769 |
+
return True # not a number
|
| 770 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 771 |
|
| 772 |
def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
|
| 773 |
width_info_tobeprinted = []
|
|
|
|
| 776 |
if len(main_info) == 2 and len(secondary_info) == 1:
|
| 777 |
for coords, label, acous, color in new_data:
|
| 778 |
secondary_info_tobeprinted.append(acous)
|
| 779 |
+
width_info_tobeprinted.append("N/A mm wide x N/A mm high")
|
| 780 |
|
| 781 |
|
| 782 |
if len(main_info) == 2 and len(secondary_info) == 2:
|
| 783 |
for coords, label, acous, fire, color in new_data:
|
| 784 |
secondary_info_tobeprinted.append((acous, fire))
|
| 785 |
+
width_info_tobeprinted.append("N/A mm wide x N/A mm high")
|
| 786 |
|
| 787 |
if len(main_info) == 3 and len(secondary_info) == 1:
|
| 788 |
for coords, label, width, acous, color in new_data:
|
|
|
|
| 799 |
for coords, label, width, height, acous, color in new_data:
|
| 800 |
w = re.sub(r",", "", width)
|
| 801 |
h = re.sub(r",", "", height)
|
| 802 |
+
#if w.isalpha():
|
| 803 |
+
if is_not_number(w):
|
| 804 |
w = w
|
| 805 |
else:
|
| 806 |
if float(w).is_integer():
|
|
|
|
| 808 |
else:
|
| 809 |
w = w
|
| 810 |
#if h == "N/A":
|
| 811 |
+
#if h.isalpha():
|
| 812 |
+
if is_not_number(h):
|
| 813 |
h = h
|
| 814 |
else:
|
| 815 |
if float(h).is_integer():
|
|
|
|
| 817 |
else:
|
| 818 |
h = h
|
| 819 |
width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
|
| 820 |
+
secondary_info_tobeprinted.append((acous))
|
| 821 |
|
| 822 |
if len(main_info) == 4 and len(secondary_info) == 2:
|
| 823 |
for coords, label, width, height, acous, fire, color in new_data:
|
|
|
|
| 826 |
w = re.sub(r",", "", width)
|
| 827 |
h = re.sub(r",", "", height)
|
| 828 |
#if w == "N/A":
|
| 829 |
+
#if w.isalpha():
|
| 830 |
+
if is_not_number(w):
|
| 831 |
w = w
|
| 832 |
else:
|
| 833 |
if float(w).is_integer():
|
|
|
|
| 835 |
else:
|
| 836 |
w = w
|
| 837 |
#if h == "N/A":
|
| 838 |
+
#if h.isalpha():
|
| 839 |
+
if is_not_number(h):
|
| 840 |
h = h
|
| 841 |
else:
|
| 842 |
if float(h).is_integer():
|
|
|
|
| 951 |
|
| 952 |
return flattened_list
|
| 953 |
|
| 954 |
+
def get_flattened_tuples_list_no_doortype(selected_columns):
|
| 955 |
+
flattened_list_no_color = list(selected_columns.itertuples(name=None, index=False))
|
| 956 |
+
col = (0,0,255)
|
| 957 |
+
new_fl_list = []
|
| 958 |
+
for tu in flattened_list_no_color:
|
| 959 |
+
new_fl_list.append(tu + (col,))
|
| 960 |
+
return new_fl_list
|
| 961 |
|
| 962 |
#SECONDARY
|
| 963 |
def get_cleaned_data_secondary(locations, main_info, secondary_info):
|
|
|
|
| 1027 |
|
| 1028 |
return new_data
|
| 1029 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1030 |
|
| 1031 |
def merge_pdf_bytes_list(pdfs):
|
| 1032 |
writer = PdfWriter()
|
|
|
|
| 1043 |
|
| 1044 |
return output_stream.read()
|
| 1045 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
|
| 1047 |
def calculate_bounding_rect_count(vertices,padding):
|
| 1048 |
x, y = vertices[0]
|
|
|
|
| 1057 |
return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
|
| 1058 |
|
| 1059 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1060 |
def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
|
| 1061 |
label: str = '',height:str='',width:str='',
|
| 1062 |
color:str='',countstyle:str='',countsize:str=''):
|
|
|
|
| 1213 |
mirrored = []
|
| 1214 |
mirrored.append([x, height_plan - y])
|
| 1215 |
return mirrored
|
| 1216 |
+
def point_mupdf_to_pdf(x, y, page):
|
| 1217 |
+
rect = page.rect
|
| 1218 |
+
mediabox = page.mediabox
|
| 1219 |
+
|
| 1220 |
+
H = float(rect.height)
|
| 1221 |
+
|
| 1222 |
+
# Convert and adjust for mediabox offset
|
| 1223 |
+
pdf_x = mediabox.x0 + x
|
| 1224 |
+
pdf_y = mediabox.y0 + (H - y)
|
| 1225 |
+
|
| 1226 |
+
return [[pdf_x, pdf_y]]
|
| 1227 |
|
| 1228 |
# Modified to adjust mirrored points
|
| 1229 |
def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
|
|
|
|
| 1233 |
R = str(float(r/255))
|
| 1234 |
G = str(float(g/255))
|
| 1235 |
B = str(float(b/255))
|
| 1236 |
+
#vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
|
| 1237 |
+
vertix = point_mupdf_to_pdf(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
|
| 1238 |
if input_user_clmn_names[4] and input_user_clmn_names[5]:
|
| 1239 |
bax_annotations.append({
|
| 1240 |
'vertices': vertix,
|
|
|
|
| 1256 |
'vertices': vertix,
|
| 1257 |
'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
|
| 1258 |
'author': 'ADR',
|
| 1259 |
+
'custom_data': {'FireRating': secondary_tobeprinted[i], 'AcousticRating': 'N/A', 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
|
| 1260 |
'label': new_data[i][1], #change label to whatever u want
|
| 1261 |
'Height': heightat[i], #for tameem to change - i added any values'
|
| 1262 |
'Width':widthat[i],
|
|
|
|
| 1270 |
'vertices': vertix,
|
| 1271 |
'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
|
| 1272 |
'author': 'ADR',
|
| 1273 |
+
'custom_data': {'FireRating': 'N/A', 'AcousticRating': secondary_tobeprinted[i], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
|
| 1274 |
'label': new_data[i][1], #change label to whatever u want
|
| 1275 |
'Height': heightat[i], #for tameem to change - i added any values'
|
| 1276 |
'Width':widthat[i],
|
|
|
|
| 1293 |
G = str(float(g/255))
|
| 1294 |
B = str(float(b/255))
|
| 1295 |
|
| 1296 |
+
#vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
|
| 1297 |
+
vertix = point_mupdf_to_pdf(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
|
| 1298 |
bax_annotations.append({
|
| 1299 |
'vertices': vertix,
|
| 1300 |
'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
|
|
|
|
| 1311 |
|
| 1312 |
return bax_annotations
|
| 1313 |
|
| 1314 |
+
|
| 1315 |
#Handle missing widths or heights in some rows
|
| 1316 |
def generate_separate_dimensions(widths):
|
| 1317 |
widthat = []
|
|
|
|
| 1347 |
return tostring(root, encoding="unicode", method="xml")
|
| 1348 |
|
| 1349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1350 |
|
| 1351 |
def mainRun(schedule, plan, searcharray):
|
| 1352 |
print("mainRun is RUNNING")
|
|
|
|
| 1359 |
p1_type = type(plan[0])
|
| 1360 |
print(f"el mawgood fe p[0]: {p1_type}")
|
| 1361 |
|
| 1362 |
+
|
| 1363 |
+
print(f"length of search array: {len(searcharray)}")
|
| 1364 |
#dfs = extract_tables(schedule)
|
| 1365 |
print(f"type of schedule: {type(schedule)}")
|
| 1366 |
+
print(f"length of schedules: {len(schedule)}")
|
| 1367 |
+
|
| 1368 |
pdf_widths = []
|
| 1369 |
pdf_heights = []
|
| 1370 |
pdfs_count_type = []
|
|
|
|
| 1373 |
page_number = 0
|
| 1374 |
bax_annotations_all_inputs = [] #for the same plan
|
| 1375 |
#pdfs = []
|
| 1376 |
+
not_found_list = []
|
| 1377 |
+
repeated_labels_list = []
|
| 1378 |
+
missings = []
|
| 1379 |
for p in plan:
|
| 1380 |
annotation_counter +=1
|
| 1381 |
page_number +=1
|
|
|
|
| 1386 |
|
| 1387 |
width_plan = page.cropbox.width # or: width = rect.x1 - rect.x0
|
| 1388 |
height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
|
| 1389 |
+
#width_plan = math.ceil(width_plan)
|
| 1390 |
+
#height_plan = math.ceil(height_plan)
|
| 1391 |
+
for k in range(len(schedule)):
|
| 1392 |
+
dfs = extract_tables_model(schedule[k])
|
| 1393 |
+
user_input_this_schedule = searcharray[k]
|
| 1394 |
+
for j in range(len(user_input_this_schedule)):
|
| 1395 |
+
user_input = user_input_this_schedule[j]
|
| 1396 |
+
|
| 1397 |
+
secondary_presence = False
|
| 1398 |
+
if user_input[4] or user_input[5]:
|
| 1399 |
+
secondary_presence = True
|
| 1400 |
+
main_info_, secondary_info_ = separate_main_secondary(user_input)
|
| 1401 |
+
main_info = [item for item in main_info_ if item]
|
| 1402 |
+
secondary_info = [item for item in secondary_info_ if item]
|
| 1403 |
+
print("feh secondary information")
|
| 1404 |
+
if user_input[4]:
|
| 1405 |
+
print("Fire rate mawgooda")
|
| 1406 |
+
if user_input[5]:
|
| 1407 |
+
print("Acoustic Rate mawgooda")
|
| 1408 |
+
else:
|
| 1409 |
+
print("mafeesh secondary information")
|
| 1410 |
+
|
| 1411 |
+
selected_columns_combined = get_selected_columns_all(dfs, user_input)
|
| 1412 |
+
if selected_columns_combined is None:
|
| 1413 |
+
dfs_normal = extract_tables(schedule[k])
|
| 1414 |
+
column_indices = get_column_indices_from_dfs_normal(dfs_normal, user_input)
|
| 1415 |
+
if column_indices is None:
|
| 1416 |
+
missing_clmns = check_missing(dfs, user_input)
|
| 1417 |
+
missing_message = f"{missing_clmns} can't be extracted from table input {j+1} in schedule {k+1}"
|
| 1418 |
+
missings.append(missing_message)
|
| 1419 |
+
|
| 1420 |
+
continue # continue to the next user input
|
| 1421 |
+
if len(dfs) == 1:
|
| 1422 |
+
selected_columns_combined = get_selected_columns_by_index(dfs[0], column_indices, user_input)
|
| 1423 |
+
if len(dfs) > 1:
|
| 1424 |
+
index_df = get_df_index(dfs, user_input)
|
| 1425 |
+
selected_columns_combined = get_selected_columns_by_index(dfs[index_df], column_indices, user_input)
|
| 1426 |
+
selected_columns_combined = selected_columns_combined.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
|
| 1427 |
+
selected_columns_combined = selected_columns_combined.fillna('N/A')
|
| 1428 |
+
selected_columns_combined = selected_columns_combined.replace(r'(?i)\bn/a\b', 'N/A', regex=True)
|
| 1429 |
+
kelma = get_st_op_pattern(selected_columns_combined, user_input)
|
| 1430 |
+
if "door_type" in selected_columns_combined.columns:
|
| 1431 |
+
col_dict = get_similar_colors_all(selected_columns_combined)
|
| 1432 |
+
flattened_list = get_flattened_tuples_list_all(col_dict)
|
| 1433 |
+
else:
|
| 1434 |
+
if secondary_presence:
|
| 1435 |
+
main_info = main_info + [""]
|
| 1436 |
+
flattened_list = get_flattened_tuples_list_no_doortype(selected_columns_combined)
|
| 1437 |
+
plan_texts = read_text(p)
|
| 1438 |
+
|
| 1439 |
+
if secondary_presence:
|
| 1440 |
+
locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
|
| 1441 |
+
not_found_list.append(not_found)
|
| 1442 |
+
new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
|
| 1443 |
+
|
| 1444 |
+
repeated_labels = get_repeated_labels(locations)
|
| 1445 |
+
repeated_labels = list(repeated_labels)
|
| 1446 |
+
repeated_labels_list.append(repeated_labels)
|
| 1447 |
+
if kelma == None:
|
| 1448 |
+
widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
|
| 1449 |
+
else:
|
| 1450 |
+
width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
|
| 1451 |
+
cleaned_width = get_cleaned_width(width_info_tobeprinted)
|
| 1452 |
+
widths = get_widths_bb_format(cleaned_width, kelma)
|
| 1453 |
+
|
| 1454 |
+
|
| 1455 |
+
#Count type annotation
|
| 1456 |
+
widht_count, height_count = generate_separate_dimensions(widths)
|
| 1457 |
+
bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, page)
|
| 1458 |
+
bax_annotations_all_inputs.append(bax)
|
| 1459 |
+
|
| 1460 |
+
|
| 1461 |
+
else:
|
| 1462 |
+
locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
|
| 1463 |
+
not_found_list.append(not_found)
|
| 1464 |
+
new_data = get_cleaned_data(locations)
|
| 1465 |
+
if len(new_data) == 0:
|
| 1466 |
+
continue
|
| 1467 |
+
repeated_labels = get_repeated_labels(locations)
|
| 1468 |
+
repeated_labels = list(repeated_labels)
|
| 1469 |
+
repeated_labels_list.append(repeated_labels)
|
| 1470 |
+
if kelma == None:
|
| 1471 |
+
widths = get_width_info_tobeprinted(new_data)
|
| 1472 |
+
else:
|
| 1473 |
+
width_info_tobeprinted = get_width_info_tobeprinted(new_data)
|
| 1474 |
+
cleaned_width = get_cleaned_width(width_info_tobeprinted)
|
| 1475 |
+
widths = get_widths_bb_format(cleaned_width, kelma)
|
| 1476 |
+
#count type annotation
|
| 1477 |
+
widht_count, height_count = generate_separate_dimensions(widths)
|
| 1478 |
+
bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, page)
|
| 1479 |
+
bax_annotations_all_inputs.append(bax)
|
| 1480 |
+
|
| 1481 |
# if it is not byte type
|
| 1482 |
#pdfs_count_type.append(convert_to_bytes(p))
|
| 1483 |
pdfs_count_type.append(p)
|
|
|
|
| 1492 |
|
| 1493 |
column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
|
| 1494 |
|
| 1495 |
+
## Getting the not found in all plans
|
| 1496 |
+
flattened_not_found_list = [item for sublist in not_found_list for item in sublist]
|
| 1497 |
+
counts_not_found = Counter(flattened_not_found_list)
|
| 1498 |
+
not_found_any_plan = []
|
| 1499 |
+
for key, value in counts_not_found.items():
|
| 1500 |
+
if value == len(pdfs_count_type):
|
| 1501 |
+
not_found_any_plan.append(key)
|
| 1502 |
+
|
| 1503 |
+
flattened_repeated_labels_list = [item for sublist in repeated_labels_list for item in sublist]
|
| 1504 |
pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
|
| 1505 |
column_xml = generate_bluebeam_columns_raw(column_order)
|
| 1506 |
|
| 1507 |
+
repeated_labels = flattened_repeated_labels_list
|
| 1508 |
##### SHOULD return pretty_xml, column_xml, merged_pdf
|
| 1509 |
+
not_found = not_found_any_plan
|
| 1510 |
annotatedimgs=[]
|
| 1511 |
doc2 =fitz.open('pdf',merged_pdf)
|
| 1512 |
len_doc2 = len(doc2)
|
|
|
|
| 1534 |
v='stroke'
|
| 1535 |
x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
|
| 1536 |
list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
|
| 1537 |
+
return annotatedimgs, doc2 , list1, repeated_labels , not_found, pretty_xml, column_xml
|
|
|
|
|
|