2026_MLB_Model / features /batter_features.py
Syntrex's picture
Rename features/batter_features.txt to features/batter_features.py
6b0d9f1 verified
raw
history blame
1.8 kB
from __future__ import annotations
import pandas as pd
def _safe_quantile(series: pd.Series, q: float) -> float:
valid = pd.to_numeric(series, errors="coerce").dropna()
if valid.empty:
return 0.0
return float(valid.quantile(q))
def batter_summary(df: pd.DataFrame) -> pd.DataFrame:
if df.empty or "player_name" not in df.columns:
return pd.DataFrame()
rows: list[dict] = []
for player_name, group in df.groupby("player_name"):
launch_speed = pd.to_numeric(group["launch_speed"], errors="coerce") if "launch_speed" in group.columns else pd.Series(dtype=float)
launch_angle = pd.to_numeric(group["launch_angle"], errors="coerce") if "launch_angle" in group.columns else pd.Series(dtype=float)
xwoba = pd.to_numeric(group["xwoba"], errors="coerce") if "xwoba" in group.columns else pd.Series(dtype=float)
hard_hit_rate = float((launch_speed >= 95).mean()) if len(launch_speed.dropna()) else 0.0
barrel_like_rate = float(((launch_speed >= 98) & (launch_angle.between(26, 30))).mean()) if len(launch_speed.dropna()) else 0.0
rows.append(
{
"player_name": player_name,
"events": int(len(group)),
"ev_avg": float(launch_speed.mean()) if len(launch_speed.dropna()) else 0.0,
"ev90": _safe_quantile(launch_speed, 0.90),
"la_avg": float(launch_angle.mean()) if len(launch_angle.dropna()) else 0.0,
"hard_hit_rate": hard_hit_rate,
"barrel_like_rate": barrel_like_rate,
"xwoba_avg": float(xwoba.mean()) if len(xwoba.dropna()) else 0.0,
}
)
out = pd.DataFrame(rows)
return out.sort_values(["ev90", "xwoba_avg"], ascending=False).reset_index(drop=True)