Spaces:

Copopopopo
/

Golden_ERS

Build error

App Files Files Community

Copopopopo commited on Mar 27, 2025

Commit

af1260e

verified ·

1 Parent(s): 3823ff7

Update HF_processor.py

Browse files

Files changed (1) hide show

HF_processor.py +43 -6

HF_processor.py CHANGED Viewed

@@ -131,17 +131,15 @@ class FMEADataPipeline:
                 lambda x: mapping_dict_code.get(x.replace("_secondary", "")) if x else None
             )
-            # Construct the description column with catalog code source
             # Construct the description column with catalog code source
             self.fmea[f"{name}_description"] = merged_m2.apply(
                 lambda x: (
-                    f"*{x.replace('_secondary', '').split(' ; ')[0]} ({catalog_code_dict.get(x.replace('_secondary', ''), 'Unknown')})"
                     if "_secondary" in x else
-                    f"*{mapping_dict_short_text.get(mapping_dict_code.get(x), x).split(' ; ')[0]}"
-                    if " ; " in mapping_dict_short_text.get(mapping_dict_code.get(x), x) else
                     mapping_dict_short_text.get(mapping_dict_code.get(x), x)
                 )
             )
     def column_arranger(self):
@@ -179,7 +177,7 @@ class FMEADataPipeline:
         bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'  # To clean bullets for specific columns
         for _, row in self.fmea.iterrows():
-            cell_value = row[columns[0]]
             if isinstance(cell_value, str):
                 # Split on newline characters (\n)
@@ -206,4 +204,43 @@ class FMEADataPipeline:
         # Create a new DataFrame with processed rows
         self.new_fmea = pd.DataFrame(new_rows)
         self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
-        return self.new_fmea

                 lambda x: mapping_dict_code.get(x.replace("_secondary", "")) if x else None
             )
             # Construct the description column with catalog code source
             self.fmea[f"{name}_description"] = merged_m2.apply(
                 lambda x: (
+                    f"{x.replace('_secondary', '')} ({catalog_code_dict.get(x.replace('_secondary', ''), 'Unknown')})"
                     if "_secondary" in x else
                     mapping_dict_short_text.get(mapping_dict_code.get(x), x)
                 )
             )
+        return self.fmea
     def column_arranger(self):
         bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'  # To clean bullets for specific columns
         for _, row in self.fmea.iterrows():
+            cell_value = row.loc[columns[0]]
             if isinstance(cell_value, str):
                 # Split on newline characters (\n)
         # Create a new DataFrame with processed rows
         self.new_fmea = pd.DataFrame(new_rows)
         self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
+        return self.new_fmea
+    def process_and_split_excel_2(self):
+        columns_to_split = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
+        clean_columns = ['Proposed Task', 'Task Type','Frequency', 'Action Party', 'TA (Y/N)']  # Columns to clean bullet points
+        bullet_pattern = r'^\s*(?:\d+[\)\.\-•]\s*|[a-zA-Z]\))'  # Regex to clean bullets
+        separated_rows = []
+        for _, row in self.fmea.iterrows():
+            split_values = []
+            for col in columns_to_split:
+                if isinstance(row[col], str) and row[col]:
+                    values = [item.strip() for item in row[col].split('\n') if item.strip()]
+                    if col in clean_columns:
+                        values = [re.sub(bullet_pattern, '', v).strip() for v in values]
+                else:
+                    values = [row[col]]  # Keep non-string values as is
+                split_values.append(values)
+            # Determine the maximum number of splits across all columns
+            max_length = max(len(values) for values in split_values)
+            # Ensure all columns have the same number of values by repeating the last value
+            for i, values in enumerate(split_values):
+                if len(values) < max_length:
+                    split_values[i] = values + [values[-1]] * (max_length - len(values))
+            # Create new rows for each split value
+            for i in range(max_length):
+                new_row = row.copy()
+                for col, values in zip(columns_to_split, split_values):
+                    new_row[col] = values[i]
+                separated_rows.append(new_row)
+        # Create a new DataFrame with processed rows
+        self.new_fmea = pd.DataFrame(separated_rows)
+        self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
+        return self.new_fmea