File size: 12,130 Bytes
e0219a5
 
47f321a
690a536
e0219a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47f321a
e0219a5
 
 
 
47f321a
 
e0219a5
 
 
 
 
 
 
 
7dd4209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3acc8f6
 
 
 
 
 
 
7dd4209
2d7818d
7dd4209
 
 
 
 
2d7818d
7dd4209
 
e0219a5
2d7818d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f955e97
 
 
e0219a5
f955e97
37e280c
f955e97
2d7818d
 
 
e0219a5
2d7818d
f955e97
 
2d7818d
f955e97
2d7818d
 
f955e97
2d7818d
f955e97
 
 
 
2d7818d
 
 
af1260e
86bec4e
2d7818d
 
f955e97
af1260e
cfacb7e
e0219a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c4f999
7dd4209
9c4f999
 
e0219a5
 
 
 
00259f5
b86f23a
 
 
 
e0219a5
af1260e
b86f23a
dd32e59
b86f23a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0219a5
dd32e59
e0219a5
b86f23a
 
e0219a5
7594dc2
af1260e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import pandas as pd
import numpy as np
import json
import re
from fuzzywuzzy import process

class FMEADataPipeline:

    def __init__(self,catalog_profile,fmea,catalog_code,threshold):
        self.catalog_profile = catalog_profile
        self.fmea = fmea
        self.catalog_code = catalog_code
        self.threshold = threshold
        self.json_cp = None
        self.cp = None
        self.object_part = None
        self.symptom = None
        self.damage = None
        self.cause = None
        self.ERS_filename = None
        self.code_group = None
        self.fmea_code = {}
        self.new_fmea = None
    
    def read_catalog_profile(self):

        self.cp = pd.read_json(self.catalog_profile['cp'],orient='split')
        self.object_part = pd.read_json(self.catalog_profile['object part'],orient='split')
        self.symptom = pd.read_json(self.catalog_profile['symptom'],orient='split')
        self.damage = pd.read_json(self.catalog_profile['damage'],orient='split')
        self.cause = pd.read_json(self.catalog_profile['cause'],orient='split') 

    def build_connector(self):
        self.code_group = self.cp[self.cp['Catalog profile']==self.catalog_code][['Catalog','Code group']]
        self.fmea_code = {'fmea code': ['Component','Failure Mode','Failure Mechanism','Failure Cause'],
                          'Catalog':['B-Object Part','D-Symptom','C-Damage','5-Cause']}
        self.fmea_code = pd.DataFrame(self.fmea_code)
        self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
        self.fmea['Catalog Profile (SAP)'] = self.catalog_code

    def split_columns_rows(self):
        columns_to_split = ['Failure Mode', 'Failure Mechanism', 'Failure Cause','Failure Effect']
        separated_rows = []

        # Iterate over each row in the DataFrame
        for _, row in self.fmea.iterrows():
            # Process each column to split by various numbering formats (e.g., '1.', '1)', '1-', etc.)
            split_values = []
            for col in columns_to_split:
                # Split by numbering patterns and strip whitespace
                if isinstance(row[col], str) and row[col]:
                    values = [item.strip() for item in re.split(r'\d+[\)\.-]\s*|[a-zA-Z]+[\)\.-]\s*', row[col]) if item.strip()]
                else:
                    values = [row[col]]  # Keep non-string values as is
                split_values.append(values)

            # Check the maximum length of split values across columns to split
            max_length = max(len(values) for values in split_values)

            # Ensure all split columns have equal length by repeating the last value if needed
            for i, values in enumerate(split_values):
                if len(values) < max_length:
                    split_values[i] = values + [values[-1]] * (max_length - len(values))

            # Create new rows for each split value
            for i in range(max_length):
                new_row = row.copy()
                for col, values in zip(columns_to_split, split_values):
                    new_row[col] = values[i]
                separated_rows.append(new_row)

        # rewrite previous fmea
        self.fmea = pd.DataFrame(separated_rows)

    def bracket_remover(self):
        columns = ['Component','Failure Mode', 'Failure Mechanism', 'Failure Cause','Failure Effect']

        for col in columns:
            if col in self.fmea.columns:
                self.fmea[col] = self.fmea[col].str.replace(r"\s*\(.*?\)", "", regex=True).str.strip()    
    
    def column_matcher_21Jan(self):
        for code, sap in zip(self.fmea_code['fmea code'], [self.object_part, self.symptom, self.damage, self.cause]):
            # Find the matching code group for the current FMEA code
            matching_code_group = self.code_group[self.code_group['fmea code'] == code]['Code group']
            if matching_code_group.empty:
                continue  # Skip if no matching code group is found

            # Get the first matching code group value
            matching_code_group_value = matching_code_group.values[0]

            # Extract the catalog profile and its group for prioritization
            catalog_profile = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0]
            catalog_group = catalog_profile[1]  # Second character of the catalog code

            # Filter SAP table for each priority level
            # 1. Catalog Profile
            profile_sap = sap[sap['Code group'] == catalog_profile]
            s_profile = profile_sap['Short text'].tolist()
            m_profile = self.fmea[code].apply(lambda x: process.extract(x, s_profile, limit=1))
            m2_profile = m_profile.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]))

            # 2. Catalog Group (excluding the catalog profile)
            group_sap = sap[(sap['Code group'].str[1] == catalog_group) & (sap['Code group'] != catalog_profile)]
            s_group = group_sap['Short text'].tolist()
            m_group = self.fmea[code].apply(lambda x: process.extract(x, s_group, limit=1))
            m2_group = m_group.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]))

            # 3. Entire SAP catalog
            s_all = sap['Short text'].tolist()
            m_all = self.fmea[code].apply(lambda x: process.extract(x, s_all, limit=1))
            m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]))

            # Add "_secondary" flag to m2_all values
            m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)

            # Merge prioritized matches: Profile > Group > All
            merged_m2 = m2_profile.combine(m2_group, lambda x, y: x if x else y)
            merged_m2 = merged_m2.combine(m2_all_flagged, lambda x, y: x if x else y)

            # Create mapping dictionaries
            mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
            mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
            catalog_code_dict = sap.set_index('Short text')['Code group'].to_dict()

            # Create the new column name based on catalog
            name = catalog_profile

            # Apply the mapping for the catalog column
            self.fmea[name] = merged_m2.apply(
                lambda x: mapping_dict_code.get(x.replace("_secondary", "")) if x else None
            )

            # Construct the description column with catalog code source
            self.fmea[f"{name}_description"] = merged_m2.apply(
                lambda x: (
                    f"{x.replace('_secondary', '')} ({catalog_code_dict.get(x.replace('_secondary', ''), 'Unknown')})"
                    if "_secondary" in x else
                    mapping_dict_short_text.get(mapping_dict_code.get(x), x)
                )
            )
        return self.fmea

            
    def column_arranger(self):
        catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
        b_object_part = self.fmea.pop('B-Object Part')
        d_symptom = self.fmea.pop('D-Symptom')
        c_damage = self.fmea.pop('C-Damage')
        cause_5 = self.fmea.pop('5-Cause')

        b_object_part_desc = self.fmea.pop('B-Object Part_description')
        d_symptom_desc = self.fmea.pop('D-Symptom_description')
        c_damage_desc = self.fmea.pop('C-Damage_description')
        cause_5_desc = self.fmea.pop('5-Cause_description')

        self.fmea.insert(1,catalog_profile.name,catalog_profile)
        self.fmea.insert(3,b_object_part.name,b_object_part)
        self.fmea.insert(4,b_object_part_desc.name,b_object_part_desc)
        self.fmea.insert(6,d_symptom.name,d_symptom)
        self.fmea.insert(7,d_symptom_desc.name,d_symptom_desc)
        self.fmea.insert(9,c_damage.name,c_damage)
        self.fmea.insert(10,c_damage_desc.name,c_damage_desc)
        self.fmea.insert(12,cause_5.name,cause_5)
        self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
        
        print('Completed column arranger function')
        self.fmea.to_excel('processed_fmea.xlsx', index=False)
        return self.fmea

    def process_and_split_excel(self):
        new_rows = []
        columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
        clean_columns = ['Frequency', 'Action Party', 'TA (Y/N)']  # Columns to clean bullet points

        # Regex to remove bullet points or numbering in the clean columns
        bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'  # To clean bullets for specific columns

        for _, row in self.fmea.iterrows():
            cell_value = row.loc[columns[0]]

            if isinstance(cell_value, str):
                # Split on newline characters (\n)
                points = [point.strip() for point in cell_value.split('\n') if point.strip()]
                for idx, point in enumerate(points):
                    new_row = row.copy()
                    for column in columns:
                        column_value = row[column]
                        if isinstance(column_value, str):
                            # Split column by newline and align them
                            column_points = [p.strip() for p in column_value.split('\n') if p.strip()]
                            new_value = column_points[idx] if idx < len(column_points) else np.nan
                            # Clean bullet points for specific columns
                            if column in clean_columns:
                                new_value = re.sub(bullet_pattern, '', new_value).strip() if isinstance(new_value, str) else new_value
                            new_row[column] = new_value
                        else:
                            new_row[column] = np.nan if idx > 0 else column_value
                    new_rows.append(new_row)
            else:
                # If the value is not a string, add the row without modification
                new_rows.append(row)

        # Create a new DataFrame with processed rows
        self.new_fmea = pd.DataFrame(new_rows)
        self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
        return self.new_fmea
    
    def process_and_split_excel_2(self):
        columns_to_split = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
        clean_columns = ['Proposed Task', 'Task Type','Frequency', 'Action Party', 'TA (Y/N)']  # Columns to clean bullet points
        bullet_pattern = r'^\s*(?:\d+[\)\.\-•]\s*|[a-zA-Z]\))'  # Regex to clean bullets
        
        separated_rows = []
        
        for _, row in self.fmea.iterrows():
            split_values = []
            
            for col in columns_to_split:
                if isinstance(row[col], str) and row[col]:
                    values = [item.strip() for item in row[col].split('\n') if item.strip()]
                    if col in clean_columns:
                        values = [re.sub(bullet_pattern, '', v).strip() for v in values]
                else:
                    values = [row[col]]  # Keep non-string values as is
                split_values.append(values)
            
            # Determine the maximum number of splits across all columns
            max_length = max(len(values) for values in split_values)
            
            # Ensure all columns have the same number of values by repeating the last value
            for i, values in enumerate(split_values):
                if len(values) < max_length:
                    split_values[i] = values + [values[-1]] * (max_length - len(values))
            
            # Create new rows for each split value
            for i in range(max_length):
                new_row = row.copy()
                for col, values in zip(columns_to_split, split_values):
                    new_row[col] = values[i]
                separated_rows.append(new_row)
        
        # Create a new DataFrame with processed rows
        self.new_fmea = pd.DataFrame(separated_rows)
        self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
        return self.new_fmea