Spaces:

Copopopopo
/

Golden_ERS

Build error

App Files Files Community

Copopopopo commited on Jan 7, 2025

Commit

00259f5

verified ·

1 Parent(s): d986a8e

Update HF_processor.py

Browse files

Files changed (1) hide show

HF_processor.py +16 -2

HF_processor.py CHANGED Viewed

@@ -90,9 +90,12 @@ class FMEADataPipeline:
     def process_and_split_excel(self):
-        print('Got into process_and_split_excel')
         new_rows = []
         columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
         for _, row in self.fmea.iterrows():
             cell_value = row[columns[0]]
@@ -105,14 +108,25 @@ class FMEADataPipeline:
                     for column in columns:
                         column_values = row[column].split('\n') if isinstance(row[column], str) else [row[column]]
                         if idx < len(column_values):
-                            new_row[column] = column_values[idx]
                         else:
                             new_row[column] = np.nan  # Fill with NaN if the split is not aligned
                     new_rows.append(new_row)
             else:
                 new_rows.append(row)
         self.new_fmea = pd.DataFrame(new_rows)
         self.new_fmea.to_excel('processed_excel.xlsx', index=False)
         return self.new_fmea

     def process_and_split_excel(self):
         new_rows = []
         columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
+        clean_columns = ['Frequency', 'Action Party', 'TA (Y/N)']  # Columns to clean bullet points
+        # Enhanced regex for bullet points
+        bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'
         for _, row in self.fmea.iterrows():
             cell_value = row[columns[0]]
                     for column in columns:
                         column_values = row[column].split('\n') if isinstance(row[column], str) else [row[column]]
                         if idx < len(column_values):
+                            new_value = column_values[idx]
+                            # Remove bullet points if column is in clean_columns
+                            if column in clean_columns:
+                                new_value = re.sub(bullet_pattern, '', new_value).strip()
+                            new_row[column] = new_value
                         else:
                             new_row[column] = np.nan  # Fill with NaN if the split is not aligned
                     new_rows.append(new_row)
             else:
+                # Clean up bullet points for non-split rows in clean_columns
+                for column in clean_columns:
+                    if isinstance(row[column], str):
+                        row[column] = re.sub(bullet_pattern, '', row[column]).strip()
                 new_rows.append(row)
         self.new_fmea = pd.DataFrame(new_rows)
         self.new_fmea.to_excel('processed_excel.xlsx', index=False)
+        print('process_and_split_excel done')
         return self.new_fmea