Copopopopo commited on
Commit
00259f5
·
verified ·
1 Parent(s): d986a8e

Update HF_processor.py

Browse files
Files changed (1) hide show
  1. HF_processor.py +16 -2
HF_processor.py CHANGED
@@ -90,9 +90,12 @@ class FMEADataPipeline:
90
 
91
 
92
  def process_and_split_excel(self):
93
- print('Got into process_and_split_excel')
94
  new_rows = []
95
  columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
 
 
 
 
96
 
97
  for _, row in self.fmea.iterrows():
98
  cell_value = row[columns[0]]
@@ -105,14 +108,25 @@ class FMEADataPipeline:
105
  for column in columns:
106
  column_values = row[column].split('\n') if isinstance(row[column], str) else [row[column]]
107
  if idx < len(column_values):
108
- new_row[column] = column_values[idx]
 
 
 
 
 
 
109
  else:
110
  new_row[column] = np.nan # Fill with NaN if the split is not aligned
111
  new_rows.append(new_row)
112
  else:
 
 
 
 
113
  new_rows.append(row)
114
 
115
  self.new_fmea = pd.DataFrame(new_rows)
116
  self.new_fmea.to_excel('processed_excel.xlsx', index=False)
117
 
 
118
  return self.new_fmea
 
90
 
91
 
92
  def process_and_split_excel(self):
 
93
  new_rows = []
94
  columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
95
+ clean_columns = ['Frequency', 'Action Party', 'TA (Y/N)'] # Columns to clean bullet points
96
+
97
+ # Enhanced regex for bullet points
98
+ bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'
99
 
100
  for _, row in self.fmea.iterrows():
101
  cell_value = row[columns[0]]
 
108
  for column in columns:
109
  column_values = row[column].split('\n') if isinstance(row[column], str) else [row[column]]
110
  if idx < len(column_values):
111
+ new_value = column_values[idx]
112
+
113
+ # Remove bullet points if column is in clean_columns
114
+ if column in clean_columns:
115
+ new_value = re.sub(bullet_pattern, '', new_value).strip()
116
+
117
+ new_row[column] = new_value
118
  else:
119
  new_row[column] = np.nan # Fill with NaN if the split is not aligned
120
  new_rows.append(new_row)
121
  else:
122
+ # Clean up bullet points for non-split rows in clean_columns
123
+ for column in clean_columns:
124
+ if isinstance(row[column], str):
125
+ row[column] = re.sub(bullet_pattern, '', row[column]).strip()
126
  new_rows.append(row)
127
 
128
  self.new_fmea = pd.DataFrame(new_rows)
129
  self.new_fmea.to_excel('processed_excel.xlsx', index=False)
130
 
131
+ print('process_and_split_excel done')
132
  return self.new_fmea