Spaces:

Copopopopo
/

Golden_ERS

Build error

App Files Files Community

Copopopopo commited on Jan 27, 2025

Commit

2d7818d

verified ·

1 Parent(s): f8cd1ac

massive update to column_matcher() function on 27 Jan 2025

Browse files

Files changed (1) hide show

HF_processor.py +35 -15

HF_processor.py CHANGED Viewed

@@ -74,22 +74,34 @@ class FMEADataPipeline:
         self.fmea.to_excel('processed_fmea.xlsx', index=False)
         return self.fmea
-    def column_matcher(self):
         for code, sap in zip(self.fmea_code['fmea code'], [self.object_part, self.symptom, self.damage, self.cause]):
             # Find the matching code group for the current FMEA code
             matching_code_group = self.code_group[self.code_group['fmea code'] == code]['Code group']
             if matching_code_group.empty:
                 continue  # Skip if no matching code group is found
             # Get the first matching code group value
             matching_code_group_value = matching_code_group.values[0]
-            # Filter the corresponding SAP table by the matching code group
-            filtered_table_2 = sap[sap['Code group'] == matching_code_group_value]
-            s = filtered_table_2['Short text'].tolist()
-            m = self.fmea[code].apply(lambda x: process.extract(x, s, limit=1))
-            m2 = m.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]))
             s_all = sap['Short text'].tolist()
             m_all = self.fmea[code].apply(lambda x: process.extract(x, s_all, limit=1))
             m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]))
@@ -97,22 +109,30 @@ class FMEADataPipeline:
             # Add "_secondary" flag to m2_all values
             m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)
-            # Merge m2 and flagged m2_all
-            merged_m2 = m2.combine(m2_all_flagged, lambda x, y: x if x else y)
             mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
             mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
-            # Extract name for the new column from code_group table
-            name = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0]
-            # Apply the mapping
             self.fmea[name] = merged_m2.apply(
                 lambda x: mapping_dict_code.get(x.replace("_secondary", "")) if x else None
             )
-            self.fmea[f"{name}_description"] = self.fmea[name].apply(
-                lambda x: (mapping_dict_short_text.get(x) + " (secondary)") if x and "_secondary" in merged_m2.loc[self.fmea.index[self.fmea[name] == x][0]] else mapping_dict_short_text.get(x)
             )
     def column_arranger(self):

         self.fmea.to_excel('processed_fmea.xlsx', index=False)
         return self.fmea
+    def column_matcher_21Jan(self):
         for code, sap in zip(self.fmea_code['fmea code'], [self.object_part, self.symptom, self.damage, self.cause]):
             # Find the matching code group for the current FMEA code
             matching_code_group = self.code_group[self.code_group['fmea code'] == code]['Code group']
             if matching_code_group.empty:
                 continue  # Skip if no matching code group is found
             # Get the first matching code group value
             matching_code_group_value = matching_code_group.values[0]
+            # Extract the catalog profile and its group for prioritization
+            catalog_profile = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0]
+            catalog_group = catalog_profile[1]  # Second character of the catalog code
+            # Filter SAP table for each priority level
+            # 1. Catalog Profile
+            profile_sap = sap[sap['Code group'] == catalog_profile]
+            s_profile = profile_sap['Short text'].tolist()
+            m_profile = self.fmea[code].apply(lambda x: process.extract(x, s_profile, limit=1))
+            m2_profile = m_profile.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]))
+            # 2. Catalog Group (excluding the catalog profile)
+            group_sap = sap[(sap['Code group'].str[1] == catalog_group) & (sap['Code group'] != catalog_profile)]
+            s_group = group_sap['Short text'].tolist()
+            m_group = self.fmea[code].apply(lambda x: process.extract(x, s_group, limit=1))
+            m2_group = m_group.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]))
+            # 3. Entire SAP catalog
             s_all = sap['Short text'].tolist()
             m_all = self.fmea[code].apply(lambda x: process.extract(x, s_all, limit=1))
             m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]))
             # Add "_secondary" flag to m2_all values
             m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)
+            # Merge prioritized matches: Profile > Group > All
+            merged_m2 = m2_profile.combine(m2_group, lambda x, y: x if x else y)
+            merged_m2 = merged_m2.combine(m2_all_flagged, lambda x, y: x if x else y)
+            # Create mapping dictionaries
             mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
             mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
+            catalog_code_dict = sap.set_index('Short text')['Code group'].to_dict()
+            # Create the new column name based on catalog
+            name = catalog_profile
+            # Apply the mapping for the catalog column
             self.fmea[name] = merged_m2.apply(
                 lambda x: mapping_dict_code.get(x.replace("_secondary", "")) if x else None
             )
+            # Construct the description column with catalog code source
+            self.fmea[f"{name}_description"] = merged_m2.apply(
+                lambda x: (
+                    f"{x.replace('_secondary', '')} ({catalog_code_dict.get(x.replace('_secondary', ''), 'Unknown')})"
+                    if "_secondary" in x else
+                    mapping_dict_short_text.get(mapping_dict_code.get(x), x)
+                )
             )
     def column_arranger(self):