Spaces:
Sleeping
Sleeping
| """Feature engineering ported from notebooks/EDA.ipynb (add_features cell). | |
| All features are scoped to (Year, Race, Driver, Stint) so nothing leaks across | |
| pit stops, races, or years. Rolling features apply shift(1) before rolling so | |
| the current lap is never part of its own feature. | |
| """ | |
| from __future__ import annotations | |
| import pandas as pd | |
| GROUP_KEY = ["Year", "Race", "Driver", "Stint"] | |
| SORT_KEY = GROUP_KEY + ["LapNumber"] | |
| TYRELIFE_BUCKETS = { | |
| "bins": [-0.1, 5, 10, 15, 20, 25, 30, 40, 60, 200], | |
| "labels": ["0_5", "6_10", "11_15", "16_20", "21_25", "26_30", "31_40", "41_60", "60plus"], | |
| } | |
| def apply_feature_engineering(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.sort_values(SORT_KEY).reset_index(drop=True) | |
| g = df.groupby(GROUP_KEY, sort=False) | |
| for lag in (1, 2, 3): | |
| df[f"LapTime_lag{lag}"] = g["LapTime (s)"].shift(lag) | |
| df[f"LapTime_Delta_lag{lag}"] = g["LapTime_Delta"].shift(lag) | |
| df[f"Position_lag{lag}"] = g["Position"].shift(lag) | |
| for w in (3, 5): | |
| df[f"LapTime_roll{w}_mean"] = g["LapTime (s)"].transform( | |
| lambda s: s.shift(1).rolling(w, min_periods=1).mean() | |
| ) | |
| df[f"LapTime_roll{w}_std"] = g["LapTime (s)"].transform( | |
| lambda s: s.shift(1).rolling(w, min_periods=2).std() | |
| ) | |
| df[f"LapTimeDelta_roll{w}_mean"] = g["LapTime_Delta"].transform( | |
| lambda s: s.shift(1).rolling(w, min_periods=1).mean() | |
| ) | |
| df["TyreLife_bucket"] = pd.cut( | |
| df["TyreLife"], | |
| bins=TYRELIFE_BUCKETS["bins"], | |
| labels=TYRELIFE_BUCKETS["labels"], | |
| ).astype(str) | |
| df["IsEarlyRace"] = (df["RaceProgress"] < 0.25).astype(int) | |
| df["IsLateRace"] = (df["RaceProgress"] > 0.70).astype(int) | |
| df["StintMin_sofar"] = g["LapTime (s)"].cummin() | |
| df["LapTime_vs_StintMin"] = df["LapTime (s)"] - df["StintMin_sofar"] | |
| return df | |