Spaces:

Copopopopo
/

Golden_ERS

Build error

App Files Files Community

Copopopopo commited on Jan 20, 2025

Commit

7dd4209

verified ·

1 Parent(s): b86f23a

Update HF_processor.py according to DEV

Browse files

Files changed (1) hide show

HF_processor.py +79 -23

HF_processor.py CHANGED Viewed

@@ -38,35 +38,91 @@ class FMEADataPipeline:
         self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
         self.fmea['Catalog Profile (SAP)'] = self.catalog_code
     def column_matcher(self):
-        for code,sap in zip(self.fmea_code['fmea code'],[self.object_part,self.symptom,self.damage,self.cause]):
-            filtered_table_2 = sap[sap['Code group'] == self.code_group[self.code_group['fmea code']==code]['Code group'].values[0]]
-            s = filtered_table_2['Short text'].tolist()
-            m = self.fmea[code].apply(lambda x : process.extract(x, s,limit=1))
-            m2 = m.apply(lambda x:', '.join([i[0] for i in x if i[1] >= self.threshold]))
-            s_all = sap['Short text'].tolist()
-            m_all = self.fmea[code].apply(lambda x : process.extract(x, s_all,limit=1))
-            m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]))
-            # Add "_secondary" flag to m2_all values
             m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)
             merged_m2 = m2.combine(m2_all_flagged, lambda x, y: x if x else y)
-            mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
-            mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
-            #extract name for new column from code_group table
-            name = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0]
-            # Apply the mapping
-            self.fmea[name] = merged_m2.apply(
-            lambda x: mapping_dict_code.get(x) if x in mapping_dict_code else None)
-            self.fmea[f"{name}_description"] = self.fmea[name].apply(
-                lambda x: mapping_dict_short_text.get(x) if x in mapping_dict_short_text else None)
     def column_arranger(self):
         catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
@@ -89,7 +145,7 @@ class FMEADataPipeline:
         self.fmea.insert(10,c_damage_desc.name,c_damage_desc)
         self.fmea.insert(12,cause_5.name,cause_5)
         self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
     def process_and_split_excel(self):
         new_rows = []
@@ -100,7 +156,7 @@ class FMEADataPipeline:
         bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'  # To clean bullets for specific columns
         for _, row in self.fmea.iterrows():
-            cell_value = row[columns[0]]  # 'Proposed Task'
             if isinstance(cell_value, str):
                 # Split on newline characters (\n)

         self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
         self.fmea['Catalog Profile (SAP)'] = self.catalog_code
+    def split_columns_rows(self):
+        columns_to_split = ['Failure Mode', 'Failure Mechanism', 'Failure Cause','Failure Effect']
+        separated_rows = []
+        # Iterate over each row in the DataFrame
+        for _, row in self.fmea.iterrows():
+            # Process each column to split by various numbering formats (e.g., '1.', '1)', '1-', etc.)
+            split_values = []
+            for col in columns_to_split:
+                # Split by numbering patterns and strip whitespace
+                if isinstance(row[col], str) and row[col]:
+                    values = [item.strip() for item in re.split(r'\d+[\)\.-]\s*|[a-zA-Z]+[\)\.-]\s*', row[col]) if item.strip()]
+                else:
+                    values = [row[col]]  # Keep non-string values as is
+                split_values.append(values)
+            # Check the maximum length of split values across columns to split
+            max_length = max(len(values) for values in split_values)
+            # Ensure all split columns have equal length by repeating the last value if needed
+            for i, values in enumerate(split_values):
+                if len(values) < max_length:
+                    split_values[i] = values + [values[-1]] * (max_length - len(values))
+            # Create new rows for each split value
+            for i in range(max_length):
+                new_row = row.copy()
+                for col, values in zip(columns_to_split, split_values):
+                    new_row[col] = values[i]
+                separated_rows.append(new_row)
+        # rewrite previous fmea
+        self.fmea = pd.DataFrame(separated_rows)
+        self.fmea.to_excel('processed_fmea.xlsx', index=False)
+        return self.fmea
     def column_matcher(self):
+        for code, sap in zip(self.fmea_code['fmea code'], [self.object_part, self.symptom, self.damage, self.cause]):
+            # Find the matching code group for the current FMEA code
+            matching_code_group = self.code_group[self.code_group['fmea code'] == code]['Code group']
+            if matching_code_group.empty:
+                continue  # Skip if no matching code group is found
+            # Get the first matching code group value
+            matching_code_group_value = matching_code_group.values[0]
+            # Filter the corresponding SAP table by the matching code group
+            filtered_table_2 = sap[sap['Code group'] == matching_code_group_value]
+            s = filtered_table_2['Short text'].tolist() if not filtered_table_2.empty else []
+            # Perform fuzzy matching for the current FMEA code
+            m = self.fmea[code].apply(lambda x: process.extract(x, s, limit=1) if pd.notna(x) else [])
+            m2 = m.apply(lambda x: ', '.join([i[0] for i in x if i[1] >= self.threshold]) if x else None)
+            # Perform fuzzy matching against all short texts in the SAP table
+            s_all = sap['Short text'].tolist() if not sap.empty else []
+            m_all = self.fmea[code].apply(lambda x: process.extract(x, s_all, limit=1) if pd.notna(x) else [])
+            m2_all = m_all.apply(lambda x: ''.join([i[0] for i in x if i[1] >= self.threshold]) if x else None)
+            # Flag secondary matches
             m2_all_flagged = m2_all.apply(lambda x: f"{x}_secondary" if x else x)
             merged_m2 = m2.combine(m2_all_flagged, lambda x, y: x if x else y)
+            # Create mapping dictionaries for codes and descriptions
+            if not sap.empty and 'Short text' in sap.columns and 'Code' in sap.columns:
+                mapping_dict_code = sap.set_index('Short text')['Code'].to_dict()
+                mapping_dict_short_text = sap.set_index('Code')['Short text'].to_dict()
+            else:
+                mapping_dict_code = {}
+                mapping_dict_short_text = {}
+            # Get the catalog name for the current FMEA code
+            name = self.code_group[self.code_group['fmea code'] == code]['Catalog'].values[0] if not matching_code_group.empty else None
+            # Add new columns to the FMEA table
+            if name and not merged_m2.empty:
+                self.fmea[name] = merged_m2.apply(
+                    lambda x: mapping_dict_code.get(x) if x in mapping_dict_code else None)
+                self.fmea[f"{name}_description"] = self.fmea[name].apply(
+                    lambda x: mapping_dict_short_text.get(x) if x in mapping_dict_short_text else None)
+            else:
+                self.fmea[name] = None
+                self.fmea[f"{name}_description"] = None
+        print('Completed column matcher function')
     def column_arranger(self):
         catalog_profile = self.fmea.pop('Catalog Profile (SAP)')
         self.fmea.insert(10,c_damage_desc.name,c_damage_desc)
         self.fmea.insert(12,cause_5.name,cause_5)
         self.fmea.insert(13,cause_5_desc.name,cause_5_desc)
+        print('Completed column arranger function')
     def process_and_split_excel(self):
         new_rows = []
         bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'  # To clean bullets for specific columns
         for _, row in self.fmea.iterrows():
+            cell_value = row[columns[0]]
             if isinstance(cell_value, str):
                 # Split on newline characters (\n)