f1-pit-predictor / src /feature_engineering.py
T0MYYY's picture
Deploy full-stack FastAPI + dashboard with CSV batch inference
bb21b5d verified
"""Feature engineering ported from notebooks/EDA.ipynb (add_features cell).
All features are scoped to (Year, Race, Driver, Stint) so nothing leaks across
pit stops, races, or years. Rolling features apply shift(1) before rolling so
the current lap is never part of its own feature.
"""
from __future__ import annotations
import pandas as pd
GROUP_KEY = ["Year", "Race", "Driver", "Stint"]
SORT_KEY = GROUP_KEY + ["LapNumber"]
TYRELIFE_BUCKETS = {
"bins": [-0.1, 5, 10, 15, 20, 25, 30, 40, 60, 200],
"labels": ["0_5", "6_10", "11_15", "16_20", "21_25", "26_30", "31_40", "41_60", "60plus"],
}
def apply_feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
df = df.sort_values(SORT_KEY).reset_index(drop=True)
g = df.groupby(GROUP_KEY, sort=False)
for lag in (1, 2, 3):
df[f"LapTime_lag{lag}"] = g["LapTime (s)"].shift(lag)
df[f"LapTime_Delta_lag{lag}"] = g["LapTime_Delta"].shift(lag)
df[f"Position_lag{lag}"] = g["Position"].shift(lag)
for w in (3, 5):
df[f"LapTime_roll{w}_mean"] = g["LapTime (s)"].transform(
lambda s: s.shift(1).rolling(w, min_periods=1).mean()
)
df[f"LapTime_roll{w}_std"] = g["LapTime (s)"].transform(
lambda s: s.shift(1).rolling(w, min_periods=2).std()
)
df[f"LapTimeDelta_roll{w}_mean"] = g["LapTime_Delta"].transform(
lambda s: s.shift(1).rolling(w, min_periods=1).mean()
)
df["TyreLife_bucket"] = pd.cut(
df["TyreLife"],
bins=TYRELIFE_BUCKETS["bins"],
labels=TYRELIFE_BUCKETS["labels"],
).astype(str)
df["IsEarlyRace"] = (df["RaceProgress"] < 0.25).astype(int)
df["IsLateRace"] = (df["RaceProgress"] > 0.70).astype(int)
df["StintMin_sofar"] = g["LapTime (s)"].cummin()
df["LapTime_vs_StintMin"] = df["LapTime (s)"] - df["StintMin_sofar"]
return df