Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| """ | |
| Batch 12B — Batter Trend Model | |
| Computes per-batter rolling quality metrics over 7d and 30d windows relative to a | |
| reference date (usually the scheduled game date). Produces trend-direction flags | |
| for use as output/debug fields in the simulator. | |
| Rules: | |
| - descriptive stats return None when the window has < 5 rows | |
| - boolean flags return False when descriptive stats are None | |
| - reference_date in the simulator path MUST come from game context, | |
| not datetime.now() | |
| """ | |
| from datetime import date, datetime | |
| from typing import Any | |
| import pandas as pd | |
| # --------------------------------------------------------------------------- | |
| # helpers (shared with batter_baseline to avoid redefining formulas) | |
| # --------------------------------------------------------------------------- | |
| def _percentile(series: pd.Series, q: float) -> float | None: | |
| numeric = pd.to_numeric(series, errors="coerce").dropna() | |
| if len(numeric) < 5: | |
| return None | |
| return float(numeric.quantile(q)) | |
| def _safe_mean(series: pd.Series) -> float | None: | |
| numeric = pd.to_numeric(series, errors="coerce").dropna() | |
| if len(numeric) < 5: | |
| return None | |
| return float(numeric.mean()) | |
| def _barrel_rate(launch_speed: pd.Series, launch_angle: pd.Series) -> float | None: | |
| """Same barrel approximation as batter_baseline._build_barrel_mask.""" | |
| valid = pd.DataFrame( | |
| { | |
| "ls": pd.to_numeric(launch_speed, errors="coerce"), | |
| "la": pd.to_numeric(launch_angle, errors="coerce"), | |
| } | |
| ).dropna() | |
| if len(valid) < 5: | |
| return None | |
| mask = ( | |
| ((valid["ls"] >= 98) & (valid["la"].between(26, 30))) | |
| | ((valid["ls"] >= 99) & (valid["la"].between(25, 31))) | |
| | ((valid["ls"] >= 100) & (valid["la"].between(23, 33))) | |
| | ((valid["ls"] >= 102) & (valid["la"].between(20, 35))) | |
| ) | |
| return float(mask.mean()) | |
| def _parse_reference_date(reference_date: Any) -> date | None: | |
| """Parse reference_date from game_row value (str, date, or datetime).""" | |
| if reference_date is None: | |
| return None | |
| if isinstance(reference_date, datetime): | |
| return reference_date.date() | |
| if isinstance(reference_date, date): | |
| return reference_date | |
| if isinstance(reference_date, str): | |
| for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"): | |
| try: | |
| return datetime.strptime(reference_date[:19], fmt).date() | |
| except ValueError: | |
| continue | |
| return None | |
| def _window_df( | |
| player_df: pd.DataFrame, | |
| ref: date, | |
| days: int, | |
| ) -> pd.DataFrame: | |
| """Filter player_df to rows within [ref − days, ref).""" | |
| if "game_date" not in player_df.columns: | |
| return player_df.iloc[0:0] # empty | |
| game_dates = pd.to_datetime(player_df["game_date"], errors="coerce") | |
| cutoff = pd.Timestamp(ref) | |
| lo = cutoff - pd.Timedelta(days=days) | |
| mask = (game_dates >= lo) & (game_dates < cutoff) | |
| return player_df[mask] | |
| # --------------------------------------------------------------------------- | |
| # public API | |
| # --------------------------------------------------------------------------- | |
| _EMPTY_TREND: dict[str, Any] = { | |
| "ev90_7d": None, | |
| "ev90_30d": None, | |
| "barrel_7d": None, | |
| "barrel_30d": None, | |
| "xwoba_7d": None, | |
| "xwoba_30d": None, | |
| "trend_delta_ev90": None, | |
| "trend_delta_barrel": None, | |
| "trend_sample_size_7d": 0, # pitch-event count (~4 per PA), not AB count | |
| "trend_sample_size_30d": 0, # pitch-event count (~4 per PA), not AB count | |
| "batter_hot_flag": False, | |
| "batter_cold_flag": False, | |
| "batter_trending_up_flag": False, | |
| } | |
| def build_batter_trend_row( | |
| statcast_df: pd.DataFrame, | |
| player_name: str, | |
| reference_date: Any = None, | |
| ) -> dict[str, Any]: | |
| """ | |
| Compute trend fields for *player_name* relative to *reference_date*. | |
| Parameters | |
| ---------- | |
| statcast_df : DataFrame | |
| Full batter Statcast dataset (may contain multiple players). | |
| player_name : str | |
| Exact player name as it appears in statcast_df["player_name"]. | |
| reference_date : str | date | datetime | None | |
| Game date from game_row context. In the simulator path this MUST be | |
| supplied from game_row["game_datetime_utc"] or game_row["game_date"]. | |
| If None, descriptive stats are still attempted but the caller should | |
| treat the result as approximate. | |
| Returns | |
| ------- | |
| dict with trend fields; descriptive stats are None when the window has < 5 | |
| rows; boolean flags are False when the underlying stat is None. | |
| """ | |
| if statcast_df is None or statcast_df.empty: | |
| return dict(_EMPTY_TREND) | |
| ref = _parse_reference_date(reference_date) | |
| try: | |
| player_df = statcast_df[ | |
| statcast_df["player_name"].astype(str) == str(player_name) | |
| ].copy() | |
| except Exception: | |
| return dict(_EMPTY_TREND) | |
| if player_df.empty: | |
| return dict(_EMPTY_TREND) | |
| # Season baseline (all available data) | |
| ev90_season = _percentile(player_df.get("launch_speed", pd.Series(dtype=float)), 0.90) | |
| barrel_season = _barrel_rate( | |
| player_df.get("launch_speed", pd.Series(dtype=float)), | |
| player_df.get("launch_angle", pd.Series(dtype=float)), | |
| ) | |
| # Windowed subsets (only if reference_date is available) | |
| if ref is None: | |
| return dict(_EMPTY_TREND) | |
| df7 = _window_df(player_df, ref, 7) | |
| df30 = _window_df(player_df, ref, 30) | |
| n7 = len(df7) | |
| n30 = len(df30) | |
| ev90_7d = _percentile(df7.get("launch_speed", pd.Series(dtype=float)), 0.90) | |
| ev90_30d = _percentile(df30.get("launch_speed", pd.Series(dtype=float)), 0.90) | |
| barrel_7d = _barrel_rate( | |
| df7.get("launch_speed", pd.Series(dtype=float)), | |
| df7.get("launch_angle", pd.Series(dtype=float)), | |
| ) | |
| barrel_30d = _barrel_rate( | |
| df30.get("launch_speed", pd.Series(dtype=float)), | |
| df30.get("launch_angle", pd.Series(dtype=float)), | |
| ) | |
| xwoba_7d = _safe_mean(df7.get("estimated_woba_using_speedangle", pd.Series(dtype=float))) | |
| xwoba_30d = _safe_mean(df30.get("estimated_woba_using_speedangle", pd.Series(dtype=float))) | |
| # Deltas vs season baseline | |
| trend_delta_ev90 = ( | |
| round(ev90_7d - ev90_season, 3) | |
| if (ev90_7d is not None and ev90_season is not None) | |
| else None | |
| ) | |
| trend_delta_barrel = ( | |
| round(barrel_7d - barrel_season, 4) | |
| if (barrel_7d is not None and barrel_season is not None) | |
| else None | |
| ) | |
| # Flags (gated on minimum sample) | |
| batter_hot_flag = bool( | |
| trend_delta_ev90 is not None | |
| and trend_delta_ev90 > 1.5 | |
| and n7 >= 15 | |
| ) | |
| batter_cold_flag = bool( | |
| trend_delta_ev90 is not None | |
| and trend_delta_ev90 < -2.0 | |
| and n7 >= 15 | |
| ) | |
| batter_trending_up_flag = bool( | |
| trend_delta_barrel is not None | |
| and trend_delta_barrel > 0.02 | |
| and n30 >= 30 | |
| ) | |
| return { | |
| "ev90_7d": ev90_7d, | |
| "ev90_30d": ev90_30d, | |
| "barrel_7d": barrel_7d, | |
| "barrel_30d": barrel_30d, | |
| "xwoba_7d": xwoba_7d, | |
| "xwoba_30d": xwoba_30d, | |
| "trend_delta_ev90": trend_delta_ev90, | |
| "trend_delta_barrel": trend_delta_barrel, | |
| "trend_sample_size_7d": n7, # pitch-event count, not AB count | |
| "trend_sample_size_30d": n30, # pitch-event count, not AB count | |
| "batter_hot_flag": batter_hot_flag, | |
| "batter_cold_flag": batter_cold_flag, | |
| "batter_trending_up_flag": batter_trending_up_flag, | |
| } | |