from __future__ import annotations """ Batch 12B — Batter Trend Model Computes per-batter rolling quality metrics over 7d and 30d windows relative to a reference date (usually the scheduled game date). Produces trend-direction flags for use as output/debug fields in the simulator. Rules: - descriptive stats return None when the window has < 5 rows - boolean flags return False when descriptive stats are None - reference_date in the simulator path MUST come from game context, not datetime.now() """ from datetime import date, datetime from typing import Any import pandas as pd # --------------------------------------------------------------------------- # helpers (shared with batter_baseline to avoid redefining formulas) # --------------------------------------------------------------------------- def _percentile(series: pd.Series, q: float) -> float | None: numeric = pd.to_numeric(series, errors="coerce").dropna() if len(numeric) < 5: return None return float(numeric.quantile(q)) def _safe_mean(series: pd.Series) -> float | None: numeric = pd.to_numeric(series, errors="coerce").dropna() if len(numeric) < 5: return None return float(numeric.mean()) def _barrel_rate(launch_speed: pd.Series, launch_angle: pd.Series) -> float | None: """Same barrel approximation as batter_baseline._build_barrel_mask.""" valid = pd.DataFrame( { "ls": pd.to_numeric(launch_speed, errors="coerce"), "la": pd.to_numeric(launch_angle, errors="coerce"), } ).dropna() if len(valid) < 5: return None mask = ( ((valid["ls"] >= 98) & (valid["la"].between(26, 30))) | ((valid["ls"] >= 99) & (valid["la"].between(25, 31))) | ((valid["ls"] >= 100) & (valid["la"].between(23, 33))) | ((valid["ls"] >= 102) & (valid["la"].between(20, 35))) ) return float(mask.mean()) def _parse_reference_date(reference_date: Any) -> date | None: """Parse reference_date from game_row value (str, date, or datetime).""" if reference_date is None: return None if isinstance(reference_date, datetime): return reference_date.date() if isinstance(reference_date, date): return reference_date if isinstance(reference_date, str): for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"): try: return datetime.strptime(reference_date[:19], fmt).date() except ValueError: continue return None def _window_df( player_df: pd.DataFrame, ref: date, days: int, ) -> pd.DataFrame: """Filter player_df to rows within [ref − days, ref).""" if "game_date" not in player_df.columns: return player_df.iloc[0:0] # empty game_dates = pd.to_datetime(player_df["game_date"], errors="coerce") cutoff = pd.Timestamp(ref) lo = cutoff - pd.Timedelta(days=days) mask = (game_dates >= lo) & (game_dates < cutoff) return player_df[mask] # --------------------------------------------------------------------------- # public API # --------------------------------------------------------------------------- _EMPTY_TREND: dict[str, Any] = { "ev90_7d": None, "ev90_30d": None, "barrel_7d": None, "barrel_30d": None, "xwoba_7d": None, "xwoba_30d": None, "trend_delta_ev90": None, "trend_delta_barrel": None, "trend_sample_size_7d": 0, # pitch-event count (~4 per PA), not AB count "trend_sample_size_30d": 0, # pitch-event count (~4 per PA), not AB count "batter_hot_flag": False, "batter_cold_flag": False, "batter_trending_up_flag": False, } def build_batter_trend_row( statcast_df: pd.DataFrame, player_name: str, reference_date: Any = None, ) -> dict[str, Any]: """ Compute trend fields for *player_name* relative to *reference_date*. Parameters ---------- statcast_df : DataFrame Full batter Statcast dataset (may contain multiple players). player_name : str Exact player name as it appears in statcast_df["player_name"]. reference_date : str | date | datetime | None Game date from game_row context. In the simulator path this MUST be supplied from game_row["game_datetime_utc"] or game_row["game_date"]. If None, descriptive stats are still attempted but the caller should treat the result as approximate. Returns ------- dict with trend fields; descriptive stats are None when the window has < 5 rows; boolean flags are False when the underlying stat is None. """ if statcast_df is None or statcast_df.empty: return dict(_EMPTY_TREND) ref = _parse_reference_date(reference_date) try: player_df = statcast_df[ statcast_df["player_name"].astype(str) == str(player_name) ].copy() except Exception: return dict(_EMPTY_TREND) if player_df.empty: return dict(_EMPTY_TREND) # Season baseline (all available data) ev90_season = _percentile(player_df.get("launch_speed", pd.Series(dtype=float)), 0.90) barrel_season = _barrel_rate( player_df.get("launch_speed", pd.Series(dtype=float)), player_df.get("launch_angle", pd.Series(dtype=float)), ) # Windowed subsets (only if reference_date is available) if ref is None: return dict(_EMPTY_TREND) df7 = _window_df(player_df, ref, 7) df30 = _window_df(player_df, ref, 30) n7 = len(df7) n30 = len(df30) ev90_7d = _percentile(df7.get("launch_speed", pd.Series(dtype=float)), 0.90) ev90_30d = _percentile(df30.get("launch_speed", pd.Series(dtype=float)), 0.90) barrel_7d = _barrel_rate( df7.get("launch_speed", pd.Series(dtype=float)), df7.get("launch_angle", pd.Series(dtype=float)), ) barrel_30d = _barrel_rate( df30.get("launch_speed", pd.Series(dtype=float)), df30.get("launch_angle", pd.Series(dtype=float)), ) xwoba_7d = _safe_mean(df7.get("estimated_woba_using_speedangle", pd.Series(dtype=float))) xwoba_30d = _safe_mean(df30.get("estimated_woba_using_speedangle", pd.Series(dtype=float))) # Deltas vs season baseline trend_delta_ev90 = ( round(ev90_7d - ev90_season, 3) if (ev90_7d is not None and ev90_season is not None) else None ) trend_delta_barrel = ( round(barrel_7d - barrel_season, 4) if (barrel_7d is not None and barrel_season is not None) else None ) # Flags (gated on minimum sample) batter_hot_flag = bool( trend_delta_ev90 is not None and trend_delta_ev90 > 1.5 and n7 >= 15 ) batter_cold_flag = bool( trend_delta_ev90 is not None and trend_delta_ev90 < -2.0 and n7 >= 15 ) batter_trending_up_flag = bool( trend_delta_barrel is not None and trend_delta_barrel > 0.02 and n30 >= 30 ) return { "ev90_7d": ev90_7d, "ev90_30d": ev90_30d, "barrel_7d": barrel_7d, "barrel_30d": barrel_30d, "xwoba_7d": xwoba_7d, "xwoba_30d": xwoba_30d, "trend_delta_ev90": trend_delta_ev90, "trend_delta_barrel": trend_delta_barrel, "trend_sample_size_7d": n7, # pitch-event count, not AB count "trend_sample_size_30d": n30, # pitch-event count, not AB count "batter_hot_flag": batter_hot_flag, "batter_cold_flag": batter_cold_flag, "batter_trending_up_flag": batter_trending_up_flag, }