Spaces:
Build error
Build error
fix issue with the process_and_split_excel function. Moved from splitting numbers/bullets to \n
Browse files- HF_processor.py +25 -35
HF_processor.py
CHANGED
|
@@ -30,7 +30,6 @@ class FMEADataPipeline:
|
|
| 30 |
self.damage = pd.read_json(self.catalog_profile['damage'],orient='split')
|
| 31 |
self.cause = pd.read_json(self.catalog_profile['cause'],orient='split')
|
| 32 |
|
| 33 |
-
|
| 34 |
def build_connector(self):
|
| 35 |
self.code_group = self.cp[self.cp['Catalog profile']==self.catalog_code][['Catalog','Code group']]
|
| 36 |
self.fmea_code = {'fmea code': ['Component','Failure Mode','Failure Mechanism','Failure Cause'],
|
|
@@ -39,7 +38,6 @@ class FMEADataPipeline:
|
|
| 39 |
self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
|
| 40 |
self.fmea['Catalog Profile (SAP)'] = self.catalog_code
|
| 41 |
|
| 42 |
-
|
| 43 |
def column_matcher(self):
|
| 44 |
for code,sap in zip(self.fmea_code['fmea code'],[self.object_part,self.symptom,self.damage,self.cause]):
|
| 45 |
|
|
@@ -97,44 +95,36 @@ class FMEADataPipeline:
|
|
| 97 |
new_rows = []
|
| 98 |
columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
|
| 99 |
clean_columns = ['Frequency', 'Action Party', 'TA (Y/N)'] # Columns to clean bullet points
|
| 100 |
-
|
| 101 |
-
#
|
| 102 |
-
bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*'
|
| 103 |
-
|
| 104 |
-
|
| 105 |
for _, row in self.fmea.iterrows():
|
| 106 |
-
cell_value = row[columns[0]]
|
| 107 |
-
|
| 108 |
if isinstance(cell_value, str):
|
| 109 |
-
#
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
for
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
if
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
new_row[column] = np.nan # Fill with NaN if the split is not aligned
|
| 127 |
-
new_rows.append(new_row)
|
| 128 |
-
else:
|
| 129 |
-
# Clean up bullet points for non-split rows in clean_columns
|
| 130 |
-
for column in clean_columns:
|
| 131 |
-
if isinstance(row[column], str):
|
| 132 |
-
row[column] = re.sub(bullet_pattern, '', row[column]).strip()
|
| 133 |
-
new_rows.append(row)
|
| 134 |
else:
|
| 135 |
# If the value is not a string, add the row without modification
|
| 136 |
new_rows.append(row)
|
| 137 |
-
|
|
|
|
| 138 |
self.new_fmea = pd.DataFrame(new_rows)
|
| 139 |
self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
|
| 140 |
return self.new_fmea
|
|
|
|
| 30 |
self.damage = pd.read_json(self.catalog_profile['damage'],orient='split')
|
| 31 |
self.cause = pd.read_json(self.catalog_profile['cause'],orient='split')
|
| 32 |
|
|
|
|
| 33 |
def build_connector(self):
|
| 34 |
self.code_group = self.cp[self.cp['Catalog profile']==self.catalog_code][['Catalog','Code group']]
|
| 35 |
self.fmea_code = {'fmea code': ['Component','Failure Mode','Failure Mechanism','Failure Cause'],
|
|
|
|
| 38 |
self.code_group = pd.merge(self.code_group,self.fmea_code,how='left',on='Catalog')
|
| 39 |
self.fmea['Catalog Profile (SAP)'] = self.catalog_code
|
| 40 |
|
|
|
|
| 41 |
def column_matcher(self):
|
| 42 |
for code,sap in zip(self.fmea_code['fmea code'],[self.object_part,self.symptom,self.damage,self.cause]):
|
| 43 |
|
|
|
|
| 95 |
new_rows = []
|
| 96 |
columns = ['Proposed Task', 'Task Type', 'Frequency', 'Action Party', 'TA (Y/N)']
|
| 97 |
clean_columns = ['Frequency', 'Action Party', 'TA (Y/N)'] # Columns to clean bullet points
|
| 98 |
+
|
| 99 |
+
# Regex to remove bullet points or numbering in the clean columns
|
| 100 |
+
bullet_pattern = r'^\s*[\da-zA-Z]+[)\.\-•]?\s*' # To clean bullets for specific columns
|
| 101 |
+
|
|
|
|
| 102 |
for _, row in self.fmea.iterrows():
|
| 103 |
+
cell_value = row[columns[0]] # 'Proposed Task'
|
| 104 |
+
|
| 105 |
if isinstance(cell_value, str):
|
| 106 |
+
# Split on newline characters (\n)
|
| 107 |
+
points = [point.strip() for point in cell_value.split('\n') if point.strip()]
|
| 108 |
+
for idx, point in enumerate(points):
|
| 109 |
+
new_row = row.copy()
|
| 110 |
+
for column in columns:
|
| 111 |
+
column_value = row[column]
|
| 112 |
+
if isinstance(column_value, str):
|
| 113 |
+
# Split column by newline and align them
|
| 114 |
+
column_points = [p.strip() for p in column_value.split('\n') if p.strip()]
|
| 115 |
+
new_value = column_points[idx] if idx < len(column_points) else np.nan
|
| 116 |
+
# Clean bullet points for specific columns
|
| 117 |
+
if column in clean_columns:
|
| 118 |
+
new_value = re.sub(bullet_pattern, '', new_value).strip() if isinstance(new_value, str) else new_value
|
| 119 |
+
new_row[column] = new_value
|
| 120 |
+
else:
|
| 121 |
+
new_row[column] = np.nan if idx > 0 else column_value
|
| 122 |
+
new_rows.append(new_row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
else:
|
| 124 |
# If the value is not a string, add the row without modification
|
| 125 |
new_rows.append(row)
|
| 126 |
+
|
| 127 |
+
# Create a new DataFrame with processed rows
|
| 128 |
self.new_fmea = pd.DataFrame(new_rows)
|
| 129 |
self.new_fmea.to_excel('processed_fmea.xlsx', index=False)
|
| 130 |
return self.new_fmea
|