Spaces:
Sleeping
Sleeping
Upload featureengineer.py with huggingface_hub
Browse files- featureengineer.py +14 -8
featureengineer.py
CHANGED
|
@@ -13,29 +13,32 @@ class FeatureEngineer(BaseEstimator, TransformerMixin):
|
|
| 13 |
else:
|
| 14 |
# These are the expected column names after initial preprocessing
|
| 15 |
# They should be consistent with the features defined in the overall dataset.
|
| 16 |
-
expected_column_names = [
|
| 17 |
'Engine_rpm', 'Lub_oil_pressure', 'Fuel_pressure',
|
| 18 |
'Coolant_pressure', 'lub_oil_temp', 'Coolant_temp'
|
| 19 |
]
|
| 20 |
-
df = pd.DataFrame(X, columns=expected_column_names)
|
| 21 |
|
| 22 |
df.columns = (df.columns
|
| 23 |
.str.strip()
|
| 24 |
.str.replace(" ","_")
|
| 25 |
.str.replace(r"[^\w]","_",regex=True)
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
-
engine_rpm_col = 'Engine_rpm'
|
| 29 |
lub_oil_pressure_col = 'Lub_oil_pressure'
|
| 30 |
fuel_pressure_col = 'Fuel_pressure'
|
| 31 |
coolant_pressure_col = 'Coolant_pressure'
|
| 32 |
lub_oil_temp_col = 'lub_oil_temp'
|
| 33 |
-
coolant_temp_col = 'Coolant_temp'
|
| 34 |
|
| 35 |
-
core_sensor_cols =
|
|
|
|
|
|
|
| 36 |
engine_rpm_col, lub_oil_pressure_col, fuel_pressure_col,
|
| 37 |
coolant_pressure_col, lub_oil_temp_col, coolant_temp_col
|
| 38 |
-
]
|
| 39 |
|
| 40 |
# ===== diff features
|
| 41 |
for col_name in df.select_dtypes(include=np.number).columns:
|
|
@@ -57,8 +60,11 @@ class FeatureEngineer(BaseEstimator, TransformerMixin):
|
|
| 57 |
|
| 58 |
# ===== aggregates
|
| 59 |
# Corrected: Use actual string column names instead of integer indices
|
| 60 |
-
df["temp_gap"] = df[lub_oil_temp_col] - df[coolant_temp_col] # oil vs coolant
|
| 61 |
-
df["pressure_sum"] = df[[lub_oil_pressure_col, fuel_pressure_col, coolant_pressure_col]].sum(axis=1)
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
df = df.fillna(0)
|
| 64 |
|
|
|
|
| 13 |
else:
|
| 14 |
# These are the expected column names after initial preprocessing
|
| 15 |
# They should be consistent with the features defined in the overall dataset.
|
| 16 |
+
"""expected_column_names = [
|
| 17 |
'Engine_rpm', 'Lub_oil_pressure', 'Fuel_pressure',
|
| 18 |
'Coolant_pressure', 'lub_oil_temp', 'Coolant_temp'
|
| 19 |
]
|
| 20 |
+
df = pd.DataFrame(X, columns=expected_column_names)"""
|
| 21 |
|
| 22 |
df.columns = (df.columns
|
| 23 |
.str.strip()
|
| 24 |
.str.replace(" ","_")
|
| 25 |
.str.replace(r"[^\w]","_",regex=True)
|
| 26 |
+
.str.lower()
|
| 27 |
)
|
| 28 |
|
| 29 |
+
"""engine_rpm_col = 'Engine_rpm'
|
| 30 |
lub_oil_pressure_col = 'Lub_oil_pressure'
|
| 31 |
fuel_pressure_col = 'Fuel_pressure'
|
| 32 |
coolant_pressure_col = 'Coolant_pressure'
|
| 33 |
lub_oil_temp_col = 'lub_oil_temp'
|
| 34 |
+
coolant_temp_col = 'Coolant_temp'"""
|
| 35 |
|
| 36 |
+
core_sensor_cols = df.columns.tolist()
|
| 37 |
+
"""
|
| 38 |
+
[
|
| 39 |
engine_rpm_col, lub_oil_pressure_col, fuel_pressure_col,
|
| 40 |
coolant_pressure_col, lub_oil_temp_col, coolant_temp_col
|
| 41 |
+
]"""
|
| 42 |
|
| 43 |
# ===== diff features
|
| 44 |
for col_name in df.select_dtypes(include=np.number).columns:
|
|
|
|
| 60 |
|
| 61 |
# ===== aggregates
|
| 62 |
# Corrected: Use actual string column names instead of integer indices
|
| 63 |
+
#df["temp_gap"] = df[lub_oil_temp_col] - df[coolant_temp_col] # oil vs coolant
|
| 64 |
+
#df["pressure_sum"] = df[[lub_oil_pressure_col, fuel_pressure_col, coolant_pressure_col]].sum(axis=1)
|
| 65 |
+
|
| 66 |
+
df["temp_gap"] = df['lub_oil_temp'] - df['coolant_temp'] # oil vs coolant
|
| 67 |
+
df["pressure_sum"] = df[['lub_oil_pressure','fuel_pressure', 'coolant_pressure']].sum(axis=1)
|
| 68 |
|
| 69 |
df = df.fillna(0)
|
| 70 |
|