Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import sys | |
| # Add the project root to sys.path to import path_utils | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| import path_utils | |
| def perform_feature_engineering(): | |
| # Load raw data | |
| raw_path = path_utils.get_raw_data_path('ai4i2020.csv') | |
| if not os.path.exists(raw_path): | |
| print(f"Error: Raw dataset not found at {raw_path}") | |
| return | |
| df = pd.read_csv(raw_path) | |
| print("Raw data loaded.") | |
| # 1. Temperature Difference (Process - Air) | |
| df['temp_diff'] = df['Process temperature [K]'] - df['Air temperature [K]'] | |
| # 2. Power (Torque * angular speed in rad/s) | |
| # Angular speed = RPM * 2 * PI / 60 | |
| df['power'] = df['Torque [Nm]'] * (df['Rotational speed [rpm]'] * 2 * np.pi / 60) | |
| # 3. Tool Wear * Torque (Mechanical stress indicator) | |
| df['tool_wear_torque'] = df['Tool wear [min]'] * df['Torque [Nm]'] | |
| # 4. Ordinal Encoding for Type (L < M < H quality) | |
| type_map = {'L': 0, 'M': 1, 'H': 2} | |
| df['Type'] = df['Type'].map(type_map) | |
| # 5. Drop Data Leakage and Unnecessary columns | |
| # Sub-labels (TWF, HDF, PWF, OSF, RNF) indicate the cause of failure, which is leakage for binary classification | |
| cols_to_drop = ['UDI', 'Product ID', 'TWF', 'HDF', 'PWF', 'OSF', 'RNF'] | |
| df = df.drop(columns=cols_to_drop) | |
| # Save processed features | |
| processed_path = path_utils.get_processed_data_path('features.csv') | |
| df.to_csv(processed_path, index=False) | |
| print(f"Feature engineering complete. File saved to {processed_path}") | |
| print(f"Columns in processed data: {df.columns.tolist()}") | |
| if __name__ == "__main__": | |
| perform_feature_engineering() | |