harikrishna1985 commited on
Commit
259a6e5
·
verified ·
1 Parent(s): 11a55e4

Upload src/preprocess.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/preprocess.py +52 -0
src/preprocess.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ REQUIRED_COLUMNS = [
5
+ "engine_rpm",
6
+ "coolant_temperature",
7
+ "oil_pressure",
8
+ "fuel_pressure",
9
+ "intake_temp",
10
+ "battery_voltage",
11
+ ]
12
+
13
+
14
+ def validate_input(df: pd.DataFrame) -> None:
15
+ """Check if all required columns are present"""
16
+ missing_cols = [col for col in REQUIRED_COLUMNS if col not in df.columns]
17
+ if missing_cols:
18
+ raise ValueError(f"Missing required columns: {missing_cols}")
19
+
20
+
21
+ def handle_missing_values(df: pd.DataFrame) -> pd.DataFrame:
22
+ """Simple missing value handling"""
23
+ return df.fillna(df.median(numeric_only=True))
24
+
25
+
26
+ def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
27
+ """Create derived features (if used in training)"""
28
+
29
+
30
+ df["temp_pressure_ratio"] = df["coolant_temperature"] / (df["oil_pressure"] + 1)
31
+ df["rpm_pressure_interaction"] = df["engine_rpm"] * df["oil_pressure"]
32
+
33
+ return df
34
+
35
+
36
+ def ensure_column_order(df: pd.DataFrame) -> pd.DataFrame:
37
+ """Ensure correct feature order for model"""
38
+ return df[sorted(df.columns)]
39
+
40
+
41
+ def preprocess_input(df: pd.DataFrame) -> pd.DataFrame:
42
+ """Full preprocessing pipeline"""
43
+
44
+ validate_input(df)
45
+
46
+ df = handle_missing_values(df)
47
+
48
+ df = feature_engineering(df)
49
+
50
+ df = ensure_column_order(df)
51
+
52
+ return df