Copopopopo commited on
Commit
7dd4209
·
verified ·
1 Parent(s): b86f23a

Update HF_processor.py according to DEV

Browse files
Files changed (1) hide show
  1. HF_processor.py +79 -23
HF_processor.py CHANGED
@@ -38,35 +38,91 @@ class FMEADataPipeline:
38
  self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
39
  self.fmea['Catalog Profile (SAP)'] = self.catalog_code
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def column_matcher(self):
42
- for code,sap in zip(self.fmea_code['fmea code'],[self.object_part,self.symptom,self.damage,self.cause]):
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- filtered_table_2 = sap[sap['Code group'] == self.code_group[self.code_group['fmea code']==code]['Code group'].values[0]]
45
- s = filtered_table_2['Short text'].tolist()
46
- m = self.fmea[code].apply(lambda x : process.extract(x, s,limit=1))
47
- m2 = m.apply(lambda x:', '.join([i[0] for i in x if i[1] >= self.threshold]))
48
 
49
- s_all = sap['Short text'].tolist()
50
- m_all = self.fmea[code].apply(lambda x : process.extract(x, s_all,limit=1))
51
- m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]))
 
52
 
53
- # Add "_secondary" flag to m2_all values
54
  m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)
55
-
56
  merged_m2 = m2.combine(m2_all_flagged, lambda x, y: x if x else y)
57
- mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
58
- mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
59
-
60
- #extract name for new column from code_group table
61
- name = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0]
62
 
63
- # Apply the mapping
64
- self.fmea[name] = merged_m2.apply(
65
- lambda x: mapping_dict_code.get(x) if x in mapping_dict_code else None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- self.fmea[f"{name}_description"] = self.fmea[name].apply(
68
- lambda x: mapping_dict_short_text.get(x) if x in mapping_dict_short_text else None)
69
-
70
 
71
  def column_arranger(self):
72
  catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
@@ -89,7 +145,7 @@ class FMEADataPipeline:
89
  self.fmea.insert(10,c_damage_desc.name,c_damage_desc)
90
  self.fmea.insert(12,cause_5.name,cause_5)
91
  self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
92
-
93
 
94
  def process_and_split_excel(self):
95
  new_rows = []
@@ -100,7 +156,7 @@ class FMEADataPipeline:
100
  bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*' # To clean bullets for specific columns
101
 
102
  for _, row in self.fmea.iterrows():
103
- cell_value = row[columns[0]] # 'Proposed Task'
104
 
105
  if isinstance(cell_value, str):
106
  # Split on newline characters (\n)
 
38
  self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
39
  self.fmea['Catalog Profile (SAP)'] = self.catalog_code
40
 
41
+ def split_columns_rows(self):
42
+ columns_to_split = ['Failure Mode', 'Failure Mechanism', 'Failure Cause','Failure Effect']
43
+ separated_rows = []
44
+
45
+ # Iterate over each row in the DataFrame
46
+ for _, row in self.fmea.iterrows():
47
+ # Process each column to split by various numbering formats (e.g., '1.', '1)', '1-', etc.)
48
+ split_values = []
49
+ for col in columns_to_split:
50
+ # Split by numbering patterns and strip whitespace
51
+ if isinstance(row[col], str) and row[col]:
52
+ values = [item.strip() for item in re.split(r'\d+[\)\.-]\s*|[a-zA-Z]+[\)\.-]\s*', row[col]) if item.strip()]
53
+ else:
54
+ values = [row[col]] # Keep non-string values as is
55
+ split_values.append(values)
56
+
57
+ # Check the maximum length of split values across columns to split
58
+ max_length = max(len(values) for values in split_values)
59
+
60
+ # Ensure all split columns have equal length by repeating the last value if needed
61
+ for i, values in enumerate(split_values):
62
+ if len(values) < max_length:
63
+ split_values[i] = values + [values[-1]] * (max_length - len(values))
64
+
65
+ # Create new rows for each split value
66
+ for i in range(max_length):
67
+ new_row = row.copy()
68
+ for col, values in zip(columns_to_split, split_values):
69
+ new_row[col] = values[i]
70
+ separated_rows.append(new_row)
71
+
72
+ # rewrite previous fmea
73
+ self.fmea = pd.DataFrame(separated_rows)
74
+ self.fmea.to_excel('processed_fmea.xlsx', index=False)
75
+ return self.fmea
76
+
77
  def column_matcher(self):
78
+ for code, sap in zip(self.fmea_code['fmea code'], [self.object_part, self.symptom, self.damage, self.cause]):
79
+ # Find the matching code group for the current FMEA code
80
+ matching_code_group = self.code_group[self.code_group['fmea code'] == code]['Code group']
81
+ if matching_code_group.empty:
82
+ continue # Skip if no matching code group is found
83
+
84
+ # Get the first matching code group value
85
+ matching_code_group_value = matching_code_group.values[0]
86
+
87
+ # Filter the corresponding SAP table by the matching code group
88
+ filtered_table_2 = sap[sap['Code group'] == matching_code_group_value]
89
+ s = filtered_table_2['Short text'].tolist() if not filtered_table_2.empty else []
90
 
91
+ # Perform fuzzy matching for the current FMEA code
92
+ m = self.fmea[code].apply(lambda x: process.extract(x, s, limit=1) if pd.notna(x) else [])
93
+ m2 = m.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]) if x else None)
 
94
 
95
+ # Perform fuzzy matching against all short texts in the SAP table
96
+ s_all = sap['Short text'].tolist() if not sap.empty else []
97
+ m_all = self.fmea[code].apply(lambda x: process.extract(x, s_all, limit=1) if pd.notna(x) else [])
98
+ m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]) if x else None)
99
 
100
+ # Flag secondary matches
101
  m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)
 
102
  merged_m2 = m2.combine(m2_all_flagged, lambda x, y: x if x else y)
 
 
 
 
 
103
 
104
+ # Create mapping dictionaries for codes and descriptions
105
+ if not sap.empty and 'Short text' in sap.columns and 'Code' in sap.columns:
106
+ mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
107
+ mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
108
+ else:
109
+ mapping_dict_code = {}
110
+ mapping_dict_short_text = {}
111
+
112
+ # Get the catalog name for the current FMEA code
113
+ name = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0] if not matching_code_group.empty else None
114
+
115
+ # Add new columns to the FMEA table
116
+ if name and not merged_m2.empty:
117
+ self.fmea[name] = merged_m2.apply(
118
+ lambda x: mapping_dict_code.get(x) if x in mapping_dict_code else None)
119
+ self.fmea[f"{name}_description"] = self.fmea[name].apply(
120
+ lambda x: mapping_dict_short_text.get(x) if x in mapping_dict_short_text else None)
121
+ else:
122
+ self.fmea[name] = None
123
+ self.fmea[f"{name}_description"] = None
124
 
125
+ print('Completed column matcher function')
 
 
126
 
127
  def column_arranger(self):
128
  catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
 
145
  self.fmea.insert(10,c_damage_desc.name,c_damage_desc)
146
  self.fmea.insert(12,cause_5.name,cause_5)
147
  self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
148
+ print('Completed column arranger function')
149
 
150
  def process_and_split_excel(self):
151
  new_rows = []
 
156
  bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*' # To clean bullets for specific columns
157
 
158
  for _, row in self.fmea.iterrows():
159
+ cell_value = row[columns[0]]
160
 
161
  if isinstance(cell_value, str):
162
  # Split on newline characters (\n)