Copopopopo commited on
Commit
47f321a
·
verified ·
1 Parent(s): 36c1a97

Update HF_processor.py

Browse files
Files changed (1) hide show
  1. HF_processor.py +6 -26
HF_processor.py CHANGED
@@ -1,6 +1,6 @@
1
  import pandas as pd
2
  import numpy as np
3
- import re
4
  from fuzzywuzzy import process
5
 
6
  class FMEADataPipeline:
@@ -22,11 +22,13 @@ class FMEADataPipeline:
22
  self.new_fmea = None
23
 
24
  def read_catalog_profile(self):
 
25
  self.cp = pd.read_json(self.catalog_profile['cp'],orient='split')
26
  self.object_part = pd.read_json(self.catalog_profile['object part'],orient='split')
27
  self.symptom = pd.read_json(self.catalog_profile['symptom'],orient='split')
28
  self.damage = pd.read_json(self.catalog_profile['damage'],orient='split')
29
- self.cause = pd.read_json(self.catalog_profile['cause'],orient='split')
 
30
 
31
  def build_connector(self):
32
  self.code_group = self.cp[self.cp['Catalog profile']==self.catalog_code][['Catalog','Code group']]
@@ -36,10 +38,6 @@ class FMEADataPipeline:
36
  self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
37
  self.fmea['Catalog Profile (SAP)'] = self.catalog_code
38
 
39
- def column_dropper(self):
40
- cols = [6,7,8,9,10,11,17,18,19,20,21,22,23]
41
- self.fmea.drop(self.fmea.columns[cols],axis=1,inplace=True)
42
- self.fmea = self.fmea.iloc[1:]
43
 
44
  def column_matcher(self):
45
  for code,sap in zip(self.fmea_code['fmea code'],[self.object_part,self.symptom,self.damage,self.cause]):
@@ -66,8 +64,7 @@ class FMEADataPipeline:
66
 
67
  self.fmea[f"{name}_description"] = self.fmea[name].apply(
68
  lambda x: mapping_dict_short_text.get(x) if x in mapping_dict_short_text else None)
69
-
70
- print('Column Matcher Done')
71
 
72
  def column_arranger(self):
73
  catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
@@ -91,17 +88,11 @@ class FMEADataPipeline:
91
  self.fmea.insert(12,cause_5.name,cause_5)
92
  self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
93
 
94
- print('Column Arranger Done')
95
-
96
 
97
  def process_and_split_excel(self):
98
  print('Got into process_and_split_excel')
99
  new_rows = []
100
  columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
101
- clean_columns = ['Frequency', 'Action Party', 'TA (Y/N)'] # Columns to clean bullet points
102
-
103
- # Enhanced regex for bullet points
104
- bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'
105
 
106
  for _, row in self.fmea.iterrows():
107
  cell_value = row[columns[0]]
@@ -114,25 +105,14 @@ class FMEADataPipeline:
114
  for column in columns:
115
  column_values = row[column].split('\n') if isinstance(row[column], str) else [row[column]]
116
  if idx < len(column_values):
117
- new_value = column_values[idx]
118
-
119
- # Remove bullet points if column is in clean_columns
120
- if column in clean_columns:
121
- new_value = re.sub(bullet_pattern, '', new_value).strip()
122
-
123
- new_row[column] = new_value
124
  else:
125
  new_row[column] = np.nan # Fill with NaN if the split is not aligned
126
  new_rows.append(new_row)
127
  else:
128
- # Clean up bullet points for non-split rows in clean_columns
129
- for column in clean_columns:
130
- if isinstance(row[column], str):
131
- row[column] = re.sub(bullet_pattern, '', row[column]).strip()
132
  new_rows.append(row)
133
 
134
  self.new_fmea = pd.DataFrame(new_rows)
135
  self.new_fmea.to_excel('processed_excel.xlsx', index=False)
136
 
137
- print('process_and_split_excel done')
138
  return self.new_fmea
 
1
  import pandas as pd
2
  import numpy as np
3
+ import json
4
  from fuzzywuzzy import process
5
 
6
  class FMEADataPipeline:
 
22
  self.new_fmea = None
23
 
24
  def read_catalog_profile(self):
25
+
26
  self.cp = pd.read_json(self.catalog_profile['cp'],orient='split')
27
  self.object_part = pd.read_json(self.catalog_profile['object part'],orient='split')
28
  self.symptom = pd.read_json(self.catalog_profile['symptom'],orient='split')
29
  self.damage = pd.read_json(self.catalog_profile['damage'],orient='split')
30
+ self.cause = pd.read_json(self.catalog_profile['cause'],orient='split')
31
+
32
 
33
  def build_connector(self):
34
  self.code_group = self.cp[self.cp['Catalog profile']==self.catalog_code][['Catalog','Code group']]
 
38
  self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
39
  self.fmea['Catalog Profile (SAP)'] = self.catalog_code
40
 
 
 
 
 
41
 
42
  def column_matcher(self):
43
  for code,sap in zip(self.fmea_code['fmea code'],[self.object_part,self.symptom,self.damage,self.cause]):
 
64
 
65
  self.fmea[f"{name}_description"] = self.fmea[name].apply(
66
  lambda x: mapping_dict_short_text.get(x) if x in mapping_dict_short_text else None)
67
+
 
68
 
69
  def column_arranger(self):
70
  catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
 
88
  self.fmea.insert(12,cause_5.name,cause_5)
89
  self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
90
 
 
 
91
 
92
  def process_and_split_excel(self):
93
  print('Got into process_and_split_excel')
94
  new_rows = []
95
  columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
 
 
 
 
96
 
97
  for _, row in self.fmea.iterrows():
98
  cell_value = row[columns[0]]
 
105
  for column in columns:
106
  column_values = row[column].split('\n') if isinstance(row[column], str) else [row[column]]
107
  if idx < len(column_values):
108
+ new_row[column] = column_values[idx]
 
 
 
 
 
 
109
  else:
110
  new_row[column] = np.nan # Fill with NaN if the split is not aligned
111
  new_rows.append(new_row)
112
  else:
 
 
 
 
113
  new_rows.append(row)
114
 
115
  self.new_fmea = pd.DataFrame(new_rows)
116
  self.new_fmea.to_excel('processed_excel.xlsx', index=False)
117
 
 
118
  return self.new_fmea