MeasurementOrg

Sleeping

App Files Files Community

Marthee commited on Mar 17, 2025

Commit

9e12634

verified ·

1 Parent(s): c562e63

Create Doors_Schedule

Browse files

Files changed (1) hide show

Doors_Schedule +229 -0

Doors_Schedule ADDED Viewed

	@@ -0,0 +1,229 @@

+from collections import defaultdict
+import pandas as pd
+import random
+import re
+import io
+import pypdfium2 as pdfium
+import fitz
+from PIL import Image, ImageDraw
+from PyPDF2 import PdfReader, PdfWriter
+from PyPDF2.generic import TextStringObject, NameObject, ArrayObject, FloatObject
+from PyPDF2.generic import NameObject, TextStringObject, DictionaryObject, FloatObject, ArrayObject
+from PyPDF2 import PdfReader
+from PyPDF2.generic import TextStringObject
+def convert2img(path):
+    pdf = pdfium.PdfDocument(path)
+    page = pdf.get_page(0)
+    pil_image = page.render().to_pil()
+    pl1=np.array(pil_image)
+    img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
+    return img
+def convert2pillow(path):
+  pdf = pdfium.PdfDocument(path)
+  page = pdf.get_page(0)
+  pil_image = page.render().to_pil()
+  return pil_image
+def calculate_midpoint(x1,y1,x2,y2):
+  xm = int((x1 + x2) / 2)
+  ym = int((y1 + y2) / 2)
+  return (xm, ym)
+def read_text(input_pdf_path):
+    pdf_document = fitz.open(input_pdf_path)
+    for page_num in range(pdf_document.page_count):
+        page = pdf_document[page_num]
+        text_instances = page.get_text("words")
+        page.apply_redactions()
+    return text_instances
+def search_columns(df):
+  import pandas as pd
+  import re
+  # Define patterns
+  door_id_pattern = r'\b(?:door\s*)?(?:id|no|number)(?!-)\b'
+  door_type_pattern = r'^\s*(?:\S*\s+)?door\s*[\n\s]*type\s*$|^type\s*$'
+  width_pattern = r'^\s*(?:WIDTH|Width|width)\s*$'
+  height_pattern = r'^\s*(?:HEIGHT|Height|height)\s*$'
+  structural_opening_pattern = r'\b(?:Structural\s+opening|structural\s+opening)\b'
+  # Function to search in column names and return column indices
+  def find_column_indices(df, patterns):
+      matches = {}
+      for key, pattern in patterns.items():
+          indices = [i for i, col in enumerate(df.columns) if re.search(pattern, col, re.IGNORECASE)]
+          if indices:
+              matches[key] = indices  # Store column index if found
+      return matches
+  # Function to search in cells and return (row index, column index) pairs
+  def find_matches_in_cells(df, patterns):
+      matches = {}
+      for key, pattern in patterns.items():
+          found = []
+          for row_idx in range(min(2, len(df))):  # Limit to the first two rows
+              for col_idx in range(len(df.columns)):
+                  cell = df.iat[row_idx, col_idx]
+                  if isinstance(cell, str) and re.search(pattern, cell, re.IGNORECASE):
+                      found.append((row_idx, col_idx))  # Store (row index, column index)
+          if found:
+              matches[key] = found  # Store if any matches are found
+      return matches
+  # Search in column names first
+  patterns = {
+      "door_id": door_id_pattern,
+      "door_type": door_type_pattern,
+      "width": width_pattern,
+      "height": height_pattern
+  }
+  column_matches = find_column_indices(df, patterns)
+  # If door_id and door_type are NOT found in column names, search in cells
+  if "door_id" not in column_matches and "door_type" not in column_matches:
+      cell_matches = find_matches_in_cells(df, {"door_id": door_id_pattern, "door_type": door_type_pattern})
+      column_matches.update(cell_matches)  # Merge results
+  # If width and height are NOT found in column names, search for them in cells
+  if "width" not in column_matches and "height" not in column_matches:
+      cell_matches = find_matches_in_cells(df, {"width": width_pattern, "height": height_pattern})
+      column_matches.update(cell_matches)  # Merge results
+  # If width and height are still NOT found, search for structural opening in column names
+  if "width" not in column_matches or "height" not in column_matches:
+      structural_opening_match = find_column_indices(df, {"structural opening": structural_opening_pattern})
+      column_matches.update(structural_opening_match)
+  # If structural opening is also NOT found in column names, search in cells
+  if "structural opening" not in column_matches:
+      structural_opening_match = find_matches_in_cells(df, {"structural opening": structural_opening_pattern})
+      column_matches.update(structural_opening_match)
+  # Print results
+  #print(column_matches)
+  return column_matches
+def row_clmn_indices(column_matches):
+  clm_idx = []
+  starting_row_index = []
+  for key in column_matches.keys():
+    if type(column_matches[key][0]) == tuple:
+      clm_idx.append((key,column_matches[key][0][1]))
+      starting_row_index.append(column_matches[key][0][0])
+    else:
+      clm_idx.append((key,column_matches[key][0]))
+  return clm_idx, starting_row_index
+def generate_current_table_without_cropping(clm_idx,df):
+  selected_df = df.iloc[:, clm_idx]
+  print("hello I generated the selected columns table without cropping")
+  return selected_df
+def column_name_index(clm_idx):
+  clmn_name = []
+  clmn_idx = []
+  for indd in clm_idx:
+    cl_nm, cl_idx = indd
+    clmn_name.append(cl_nm)
+    clmn_idx.append(cl_idx)
+  return clmn_name, clmn_idx
+def crop_rename_table(indices, clmn_name, clmn_idx,df):
+  #crop_at = (max(set(indices), key=indices.count)) + 1
+  crop_at =  max(indices) + 1
+  df = df.iloc[crop_at:]  # Starts from row index 5 (zero-based index)
+  df.reset_index(drop=True, inplace=True)  # Reset index after cropping
+  slctd_clms = df.iloc[:, clmn_idx]  # Select columns by index
+  slctd_clms.columns = clmn_name  # Rename selected columns
+  return slctd_clms
+def details_in_another_table(clmn_name, clmn_idx, current_dfs, dfs):
+  for dff in dfs:
+    if dff.shape[1] == current_dfs.shape[1]:
+      df = dff
+  # Create a new DataFrame with selected columns
+  new_df = df.iloc[:, clmn_idx].copy()  # Use .copy() to avoid modifying original df
+  column_names_row = pd.DataFrame([new_df.columns], columns=new_df.columns)
+  # Append the original data below the column names row
+  new_df = pd.concat([column_names_row, new_df], ignore_index=True)
+  # Rename columns
+  new_df.columns = clmn_name
+  return new_df
+def extract_tables(schedule):
+  doc = fitz.open(schedule)
+  for page in doc:
+    tabs = page.find_tables()
+  dfs = []
+  for tab in tabs:
+    df = tab.to_pandas()
+    dfs.append(df)
+  return dfs
+def get_selected_columns(dfs):
+  selected_columns = []
+  for i in range(len(dfs)):
+    column_matches = search_columns(dfs[i])
+    clm_idx, starting_row_index = row_clmn_indices(column_matches)
+    clmn_name, clmn_idx = column_name_index(clm_idx)
+    if len(clm_idx) == 0 and len(starting_row_index) == 0:
+      print(f"this is df {i}, SEARCH IN ANOTHER DF")
+    else:
+      #MIX
+      if (len(clm_idx) != len(starting_row_index)) and len(starting_row_index) > 0:
+        print(f"this is df {i} MIX, search in another df but make sure of the length")
+      #IN COLUMNS
+      if len(starting_row_index) == 0:
+        print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
+        #details in another table
+        if len(dfs[i]) <10:
+          selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
+          selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
+        #details in the same table
+        if len(dfs[i]) >10:
+          selected_columns_new = generate_current_table_without_cropping(clmn_idx,dfs[i])
+          selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
+      #IN CELLS
+      if len(starting_row_index) == len(clm_idx):
+        print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
+        #details in another table
+        if len(dfs[i]) <10:
+          selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
+          selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
+        #details in the same table
+        if len(dfs[i]) >10:
+          print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
+          selected_columns_new = crop_rename_table(starting_row_index, clmn_name, clmn_idx,dfs[i])
+          selected_columns.append((selected_columns_new, dfs[i],clm_idx, clmn_name, starting_row_index))
+  return selected_columns
+def get_st_op_pattern(clm_idx, clmn_name, starting_row_index):
+  target = 'structural opening'
+  clm_dict = dict(clm_idx)  # Convert list of tuples to dictionary
+  structural_opening_value = clm_dict.get(target)  # Returns None if not found
+  if target in clmn_name:
+    position = clmn_name.index(target)
+  kelma = df.iloc[starting_row_index[position], structural_opening_value]
+  return kelma