MeasurementOrg

Sleeping

App Files Files Community

Marthee commited on Apr 5, 2025

Commit

9c9a7e1

verified ·

1 Parent(s): 3dbac4d

Update Doors_Schedule.py

Browse files

Files changed (1) hide show

Doors_Schedule.py +34 -15

Doors_Schedule.py CHANGED Viewed

@@ -52,8 +52,10 @@ def search_columns(df):
   door_id_pattern = r'\b(?:door\s*)?(?:id|no|number)(?!-)\b'
   door_type_pattern = r'^\s*(?:\S*\s+)?door\s*[\n\s]*type\s*$|^type\s*$'
-  width_pattern = r'^\s*(?:WIDTH|Width|width)\s*$'
-  height_pattern = r'^\s*(?:HEIGHT|Height|height)\s*$'
   structural_opening_pattern = r'\b(?:Structural\s+opening|structural\s+opening)\b'
   # Function to search in column names and return column indices
@@ -152,20 +154,37 @@ def crop_rename_table(indices, clmn_name, clmn_idx,df):
   return slctd_clms
 def details_in_another_table(clmn_name, clmn_idx, current_dfs, dfs):
-  for dff in dfs:
-    if dff.shape[1] == current_dfs.shape[1]:
-      df = dff
-  # Create a new DataFrame with selected columns
-  new_df = df.iloc[:, clmn_idx].copy()  # Use .copy() to avoid modifying original df
-  column_names_row = pd.DataFrame([new_df.columns], columns=new_df.columns)
-  # Append the original data below the column names row
-  new_df = pd.concat([column_names_row, new_df], ignore_index=True)
-  # Rename columns
-  new_df.columns = clmn_name
-  return new_df
 def extract_tables(schedule):
   doc = fitz.open("pdf",schedule)
@@ -191,7 +210,7 @@ def get_selected_columns(dfs):
         print(f"this is df {i} MIX, search in another df but make sure of the length")
       #IN COLUMNS
-      if len(starting_row_index) == 0:
         print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
         #details in another table
         if len(dfs[i]) <10:
@@ -199,7 +218,7 @@ def get_selected_columns(dfs):
           selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
         #details in the same table
         if len(dfs[i]) >10:
-          selected_columns_new = generate_current_table_without_cropping(clmn_idx, clmn_name, dfs[i])
           selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
       #IN CELLS

   door_id_pattern = r'\b(?:door\s*)?(?:id|no|number)(?!-)\b'
   door_type_pattern = r'^\s*(?:\S*\s+)?door\s*[\n\s]*type\s*$|^type\s*$'
+  #width_pattern = r'^\s*(?:WIDTH|Width|width)\s*$'
+  #height_pattern = r'^\s*(?:HEIGHT|Height|height)\s*$'
+  width_pattern = r'^\s*width\s*(?:\n|\s)+.*$'
+  height_pattern = r'^\s*height\s*(?:\n|\s)+.*$'
   structural_opening_pattern = r'\b(?:Structural\s+opening|structural\s+opening)\b'
   # Function to search in column names and return column indices
   return slctd_clms
+def clean_column_row(row):
+    return [re.sub(r'^\d+-\s*', '', str(cell)) for cell in row]
 def details_in_another_table(clmn_name, clmn_idx, current_dfs, dfs):
+    matching_dfs = [
+        dff for dff in dfs
+        if dff is not current_dfs and current_dfs.shape[1] == dff.shape[1]
+    ]
+    if not matching_dfs:
+        return None
+    updated_dfs = []
+    for dff in matching_dfs:
+        selected_dff = dff.iloc[:, clmn_idx].copy()
+        # Clean the column names and make them a row
+        cleaned_header = clean_column_row(selected_dff.columns.tolist())
+        col_names_as_row = pd.DataFrame([cleaned_header])
+        # Rename columns
+        selected_dff.columns = clmn_name
+        col_names_as_row.columns = clmn_name
+        # Combine the cleaned row with data
+        temp_df = pd.concat([col_names_as_row, selected_dff], ignore_index=True)
+        updated_dfs.append(temp_df)
+    combined_df = pd.concat(updated_dfs, ignore_index=True)
+    return combined_df
 def extract_tables(schedule):
   doc = fitz.open("pdf",schedule)
         print(f"this is df {i} MIX, search in another df but make sure of the length")
       #IN COLUMNS
+      if len(starting_row_index) == 0 and len(clm_idx)>2:
         print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
         #details in another table
         if len(dfs[i]) <10:
           selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
         #details in the same table
         if len(dfs[i]) >10:
+          selected_columns_new = generate_current_table_without_cropping(clmn_idx,clmn_name, dfs[i])
           selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
       #IN CELLS