from __future__ import annotations import re import datetime import pandas as pd from utils.logger import logger PALETTE = { "bg": "#080B14", "panel": "#0F1629", "panel_alt": "#141C35", "accent_blue": "#38BDF8", "accent_green": "#22C55E", "accent_yellow": "#F59E0B", "accent_red": "#EF4444", "accent_indigo": "#818CF8", "text_primary": "#F1F5F9", "text_secondary": "#94A3B8", "text_dim": "#475569", } def _clamp(val, lo=0.0, hi=100.0): if val is None: return None return max(lo, min(hi, float(val))) def _safe_score(val, lo, hi, default=50.0) -> float: """Maps val in [lo, hi] to [0, 100]. Returns default if val is None. Clamps result.""" if val is None: return _clamp(default) span = hi - lo if span == 0: return _clamp(default) return _clamp((float(val) - lo) / span * 100) def _fmt_val(val, fmt: str = "float") -> str: """Consistent None formatting: None → '—', pct → XX.X%, float → X.X, int → X.""" if val is None: return "—" try: f = float(val) except Exception: return str(val) if fmt == "pct": return f"{f * 100:.1f}%" elif fmt == "pct_direct": return f"{f:.1f}%" elif fmt == "int": return str(int(round(f))) else: return f"{f:.1f}" def _sanitize_id(s: str) -> str: """Lowercase, spaces → underscores, strip non-alphanumeric.""" s = str(s).lower().replace(" ", "_") s = re.sub(r"[^a-z0-9_\-]", "", s) return s # --------------------------------------------------------------------------- # Player list helpers # --------------------------------------------------------------------------- def get_available_hitters(statcast_df: pd.DataFrame) -> list[str]: """Players with batted ball events (launch_speed not null OR events not null).""" if statcast_df.empty or "player_name" not in statcast_df.columns: return [] cols_exist = [c for c in ["launch_speed", "events"] if c in statcast_df.columns] if not cols_exist: return sorted(statcast_df["player_name"].dropna().unique().tolist()) mask = pd.Series(False, index=statcast_df.index) for c in cols_exist: mask = mask | statcast_df[c].notna() return sorted(statcast_df[mask]["player_name"].dropna().unique().tolist()) def get_available_pitchers(statcast_df: pd.DataFrame) -> list[str]: """Players with pitching events (release_speed not null).""" if statcast_df.empty or "player_name" not in statcast_df.columns: return [] if "release_speed" not in statcast_df.columns: return [] mask = statcast_df["release_speed"].notna() return sorted(statcast_df[mask]["player_name"].dropna().unique().tolist()) def get_available_dates_for_player(statcast_df: pd.DataFrame, player_name: str) -> list[str]: """Returns up to 20 most-recent game dates for the player as ISO strings.""" if statcast_df.empty or "player_name" not in statcast_df.columns: return [] rows = statcast_df[statcast_df["player_name"].astype(str) == str(player_name)] if "game_date" not in rows.columns: return [] dates = rows["game_date"].dropna().sort_values(ascending=False).unique() return [str(d)[:10] for d in dates[:20]] def _get_player_team(statcast_df: pd.DataFrame, player_name: str) -> str: """Try to extract team name from statcast columns for this player.""" if statcast_df.empty or "player_name" not in statcast_df.columns: return "—" rows = statcast_df[statcast_df["player_name"].astype(str) == str(player_name)] if rows.empty: return "—" for col in ["team", "batter_team", "home_team"]: if col in rows.columns: val = rows[col].dropna() if not val.empty: return str(val.iloc[-1]) return "—" # --------------------------------------------------------------------------- # Data quality # --------------------------------------------------------------------------- def _data_quality_hitter(features: dict) -> str: pa = features.get("plate_appearances", 0) or 0 if pa >= 80: return "full" elif pa >= 25: return "partial" return "limited" def _data_quality_pitcher(features: dict) -> str: ss = features.get("sample_size", 0) or 0 if ss >= 100: return "full" elif ss >= 30: return "partial" return "limited" # --------------------------------------------------------------------------- # Timeframe filtering # --------------------------------------------------------------------------- def _filter_statcast_by_window( statcast_df: pd.DataFrame, player_name: str, mode: str, year: int | None = None, date: str | None = None, start_date: str | None = None, end_date: str | None = None, ) -> tuple[pd.DataFrame, bool]: """ Returns (filtered_df, used_fallback). If window yields empty, falls back to full player data; used_fallback=True. """ df = statcast_df[statcast_df["player_name"].astype(str) == str(player_name)].copy() if df.empty: return df, False if "game_date" not in df.columns: return df, False df["game_date"] = pd.to_datetime(df["game_date"], errors="coerce") result = df used_fallback = False try: if mode == "single_date" and date: mask = df["game_date"].dt.date == pd.to_datetime(date).date() result = df[mask] elif mode == "date_range" and start_date and end_date: lo = pd.to_datetime(start_date) hi = pd.to_datetime(end_date) result = df[(df["game_date"] >= lo) & (df["game_date"] <= hi)] elif mode == "season" and year: result = df[df["game_date"].dt.year == int(year)] else: result = df except Exception: result = df if result.empty: used_fallback = True result = df # fallback to full player data return result, used_fallback # --------------------------------------------------------------------------- # Metric computation (v1 proxies) # --------------------------------------------------------------------------- def _compute_hitter_metrics(features: dict, baseline: dict) -> dict: """All metrics are v1 proxies. Returns values in [0, 100].""" ev90 = features.get("ev90") hard_hit = features.get("hard_hit_rate") barrel = features.get("barrel_rate") la_opt = features.get("la_optimal_hr_rate") fb_rate = features.get("fb_rate") pull_barrel = features.get("pulled_barrel_rate") pull_air = features.get("pull_air_rate") pa = features.get("plate_appearances", 0) or 0 contact_plus = _clamp( 0.40 * _safe_score(ev90, 85, 103, default=40) + 0.35 * _safe_score(hard_hit, 0, 0.50, default=40) + 0.25 * _safe_score(barrel, 0, 0.15, default=30) ) hr_shape_plus = _clamp( 0.40 * _safe_score(la_opt, 0, 0.15, default=30) + 0.35 * _safe_score(fb_rate, 0, 0.50, default=30) + 0.25 * _safe_score(barrel, 0, 0.15, default=30) ) damage_zone_plus = _clamp( 0.60 * _safe_score(pull_barrel, 0, 0.10, default=25) + 0.40 * _safe_score(pull_air, 0, 0.25, default=25) ) ball_flight_confidence = _clamp(_safe_score(pa, 0, 200, default=20)) return { "contact_plus": contact_plus, "hr_shape_plus": hr_shape_plus, "damage_zone_plus": damage_zone_plus, "ball_flight_confidence": ball_flight_confidence, } def _compute_pitcher_metrics(features: dict) -> dict: """All metrics are v1 proxies. Returns values in [0, 100].""" velo = features.get("avg_release_speed") pfx_z = features.get("avg_pfx_z") ball_rate = features.get("ball_rate") csw_rate = features.get("csw_rate") barrel_all = features.get("barrel_rate_allowed") ev_all = features.get("ev_allowed") stuff_plus = _clamp( 0.55 * _safe_score(velo, 85, 99, default=40) + 0.45 * _safe_score(abs(pfx_z or 0), 0, 1.2, default=30) ) if ball_rate is None and csw_rate is None: command_plus = 50.0 else: command_plus = _clamp( 0.50 * (100 - _safe_score(ball_rate, 0.15, 0.42, default=50)) + 0.50 * _safe_score(csw_rate, 0.18, 0.38, default=40) ) damage_zone_plus = _clamp( 0.50 * (100 - _safe_score(barrel_all, 0, 0.15, default=50)) + 0.50 * (100 - _safe_score(ev_all, 85, 95, default=50)) ) bullpen_fatigue = 50.0 return { "stuff_plus": stuff_plus, "command_plus": command_plus, "damage_zone_plus": damage_zone_plus, "bullpen_fatigue": bullpen_fatigue, } # --------------------------------------------------------------------------- # Readout builders # --------------------------------------------------------------------------- def build_hitter_readout(metrics: dict, features: dict, trend: dict) -> list[str]: lines = [] cp = metrics.get("contact_plus", 50) hs = metrics.get("hr_shape_plus", 50) pa = features.get("plate_appearances", 0) or 0 if cp >= 80: lines.append("Elite contact profile — high exit velocity, strong barrel frequency.") elif cp >= 65: lines.append("Above-average contact quality with solid hard-hit rates.") elif cp < 40: lines.append("Below-average contact profile — limited hard-hit frequency.") else: lines.append("Moderate contact quality in selected window.") if hs >= 75: lines.append("Strong HR trajectory shape — pull power with optimal launch angle.") elif hs >= 55: lines.append("Moderate home run shape. Fly ball profile developing.") hot = (trend or {}).get("batter_hot_flag") cold = (trend or {}).get("batter_cold_flag") ev90_7 = (trend or {}).get("ev90_7d") if hot and ev90_7: lines.append(f"TRENDING HOT — EV90 up in last 7 days ({float(ev90_7):.1f} mph).") elif cold: lines.append("COOLING — exit velocity down over last 7 days.") if pa < 30: lines.append(f"Limited sample ({pa} PA). Metrics are early-window estimates.") return lines[:4] def build_pitcher_readout(metrics: dict, features: dict) -> list[str]: lines = [] sp = metrics.get("stuff_plus", 50) cp = metrics.get("command_plus", 50) dz = metrics.get("damage_zone_plus", 50) ss = features.get("sample_size", 0) or 0 if sp >= 80: lines.append("Elite velocity and movement combo — difficult to square up.") elif sp >= 65: lines.append("Above-average stuff. Good movement on primary pitches.") elif sp < 40: lines.append("Below-average stuff. Hitter-friendly pitch profile.") else: lines.append("Average stuff in selected window.") if cp >= 70: lines.append("High command profile — limits free passes and works ahead in count.") elif cp < 40: lines.append("Command issues — elevated ball rate, struggles to work ahead.") if dz >= 70: lines.append("Suppresses hard contact well — low barrel and EV allowed.") elif dz < 40: lines.append("Vulnerable to barrel damage — high hard-hit contact allowed.") if ss < 30: lines.append(f"Limited sample ({ss} events). Metrics are early-window estimates.") return lines[:4] # --------------------------------------------------------------------------- # Timeframe label # --------------------------------------------------------------------------- def _build_timeframe_label(mode, year, date, start_date, end_date) -> str: if mode == "single_date" and date: return _sanitize_id(str(date)[:10]) elif mode == "date_range" and start_date and end_date: return _sanitize_id(f"{str(start_date)[:10]}_to_{str(end_date)[:10]}") elif mode == "season" and year: return f"{year}_season" return "recent" # --------------------------------------------------------------------------- # Public data builders # --------------------------------------------------------------------------- def build_hitter_card_data( player_name: str, statcast_df: pd.DataFrame, mode: str = "season", year: int | None = None, date: str | None = None, start_date: str | None = None, end_date: str | None = None, ) -> dict: from models.batter_baseline import build_batter_feature_row, compute_batter_baseline from models.batter_trend_model import build_batter_trend_row from models.rolling_form_model import build_batter_rolling_form_row windowed_df, used_fallback = _filter_statcast_by_window( statcast_df, player_name, mode, year, date, start_date, end_date ) ref_date = ( end_date or date or (str(windowed_df["game_date"].max())[:10] if not windowed_df.empty and "game_date" in windowed_df.columns else None) ) features = {} baseline = {} trend = {} rolling = {} # Features from windowed data — respects selected timeframe try: features = build_batter_feature_row(windowed_df, player_name) except Exception as exc: logger.warning("[card_data] batter features windowed: %s", exc) try: features = build_batter_feature_row(statcast_df, player_name) except Exception as exc2: logger.warning("[card_data] batter features full fallback: %s", exc2) # Baseline can use full sample try: baseline = compute_batter_baseline(features) if features else {} except Exception as exc: logger.warning("[card_data] batter baseline: %s", exc) # Trend + rolling use windowed reference try: trend = build_batter_trend_row(windowed_df, player_name, reference_date=ref_date) except Exception: trend = {} try: rolling = build_batter_rolling_form_row(windowed_df, player_name, reference_date=ref_date) except Exception: rolling = {} metrics = _compute_hitter_metrics(features, baseline) readout = build_hitter_readout(metrics, features, trend) dq = "limited" if used_fallback else _data_quality_hitter(features) team = _get_player_team(statcast_df, player_name) tf_label = _build_timeframe_label(mode, year, date, start_date, end_date) if used_fallback: tf_label = f"{tf_label}_fallback" card_id = _sanitize_id( f"{player_name}_{tf_label}_hitter_{int(datetime.datetime.utcnow().timestamp())}" ) payload = { "card_type": "hitter", "player_name": player_name, "team": team, "timeframe": tf_label, "data_quality": dq, "card_id": card_id, "metrics": metrics, "summary": { "ev90": features.get("ev90"), "barrel_rate": features.get("barrel_rate"), "hard_hit_rate": features.get("hard_hit_rate"), "xwoba": features.get("xwoba"), "avg_launch_angle": features.get("avg_launch_angle"), }, "baseline": baseline, "trend": trend, "rolling": rolling, "readout": readout, "plate_appearances": features.get("plate_appearances", 0), "windowed_df": windowed_df, } logger.info( "[card_generated] player=%s type=hitter range=%s quality=%s", player_name, tf_label, dq, ) return payload def build_pitcher_card_data( player_name: str, statcast_df: pd.DataFrame, mode: str = "season", year: int | None = None, date: str | None = None, start_date: str | None = None, end_date: str | None = None, ) -> dict: from models.pitcher_adjustment import build_pitcher_feature_row from models.rolling_form_model import build_pitcher_rolling_form_row windowed_df, used_fallback = _filter_statcast_by_window( statcast_df, player_name, mode, year, date, start_date, end_date ) ref_date = ( end_date or date or (str(windowed_df["game_date"].max())[:10] if not windowed_df.empty and "game_date" in windowed_df.columns else None) ) features = {} rolling = {} try: features = build_pitcher_feature_row(windowed_df, player_name) except Exception as exc: logger.warning("[card_data] pitcher features windowed: %s", exc) try: features = build_pitcher_feature_row(statcast_df, player_name) except Exception as exc2: logger.warning("[card_data] pitcher features full fallback: %s", exc2) try: rolling = build_pitcher_rolling_form_row(windowed_df, player_name, reference_date=ref_date) except Exception: rolling = {} metrics = _compute_pitcher_metrics(features) readout = build_pitcher_readout(metrics, features) dq = "limited" if used_fallback else _data_quality_pitcher(features) team = _get_player_team(statcast_df, player_name) tf_label = _build_timeframe_label(mode, year, date, start_date, end_date) if used_fallback: tf_label = f"{tf_label}_fallback" card_id = _sanitize_id( f"{player_name}_{tf_label}_pitcher_{int(datetime.datetime.utcnow().timestamp())}" ) payload = { "card_type": "pitcher", "player_name": player_name, "team": team, "timeframe": tf_label, "data_quality": dq, "card_id": card_id, "metrics": metrics, "summary": { "avg_release_speed": features.get("avg_release_speed"), "avg_release_spin_rate": features.get("avg_release_spin_rate"), "ev_allowed": features.get("ev_allowed"), "barrel_rate_allowed": features.get("barrel_rate_allowed"), "swstr_rate": features.get("swstr_rate"), }, "damage": { "gb_rate_allowed": features.get("gb_rate_allowed"), "fb_rate_allowed": features.get("fb_rate_allowed"), "ld_rate_allowed": features.get("ld_rate_allowed"), "popup_rate_allowed": features.get("popup_rate_allowed"), }, "rolling": rolling, "readout": readout, "sample_size": features.get("sample_size", 0), "windowed_df": windowed_df, } logger.info( "[card_generated] player=%s type=pitcher range=%s quality=%s", player_name, tf_label, dq, ) return payload def build_game_summary_card_data( game_pk: str | int, statcast_df: pd.DataFrame, game_row: dict, player_name: str | None = None, selected_hitters: list[dict] | None = None, selected_pitchers: list[dict] | None = None, batter_log_df: pd.DataFrame | None = None, ) -> dict: gdf = pd.DataFrame() if not statcast_df.empty and "game_pk" in statcast_df.columns: gdf = statcast_df[statcast_df["game_pk"].astype(str) == str(game_pk)].copy() if player_name: gdf = gdf[gdf["player_name"].astype(str) == str(player_name)] if not gdf.empty else gdf hitter_rows = [] if batter_log_df is not None and not batter_log_df.empty and "batter_name" in batter_log_df.columns: # PA-level path: live_batter_game_log_2026 (one row per PA, proper batter identity) bgl = batter_log_df.copy() if "game_pk" in bgl.columns: bgl = bgl[bgl["game_pk"].astype(str) == str(game_pk)] for bname, bgrp in bgl.groupby("batter_name"): hr_count = int(pd.to_numeric(bgrp["hr_flag"], errors="coerce").fillna(0).sum()) hit_count = int(pd.to_numeric(bgrp["hit_flag"], errors="coerce").fillna(0).sum()) speeds = pd.to_numeric(bgrp["launch_speed"], errors="coerce").dropna() ev90 = float(speeds.quantile(0.90)) if len(speeds) >= 3 else None barrels = int(pd.to_numeric(bgrp["barrel"], errors="coerce").fillna(0).sum()) hitter_rows.append({ "player_name": bname, "hr": hr_count, "hits": hit_count, "ev90": ev90, "barrels": barrels, }) hitter_rows.sort(key=lambda x: (x["hr"], x["ev90"] or 0), reverse=True) elif not gdf.empty and "events" in gdf.columns: # Fallback: pitch-level grouping (used when batter_log_df unavailable) for pname, pgrp in gdf.groupby("player_name"): contact = pgrp[pgrp["launch_speed"].notna()] if "launch_speed" in pgrp.columns else pd.DataFrame() hr_count = int((pgrp["events"] == "home_run").sum()) hit_count = int(pgrp["events"].isin(["single", "double", "triple", "home_run"]).sum()) ev90 = float(contact["launch_speed"].quantile(0.90)) if len(contact) >= 3 else None barrels = int((contact["launch_speed"] >= 98).sum()) if not contact.empty else 0 hitter_rows.append({ "player_name": pname, "hr": hr_count, "hits": hit_count, "ev90": ev90, "barrels": barrels, }) hitter_rows.sort(key=lambda x: (x["hr"], x["ev90"] or 0), reverse=True) game_date_str = str(game_row.get("game_date", "—"))[:10] card_id = _sanitize_id(f"game_{game_pk}_{int(datetime.datetime.utcnow().timestamp())}") away = game_row.get("away_team", "—") home = game_row.get("home_team", "—") payload = { "card_type": "game_summary", "player_name": f"{away} @ {home}", "team": "—", "game_pk": str(game_pk), "away_team": away, "home_team": home, "away_score": game_row.get("away_score"), "home_score": game_row.get("home_score"), "game_date": game_date_str, "data_quality": "full" if not gdf.empty else "limited", "card_id": card_id, "hitters": hitter_rows[:8], "player_filter": player_name, "windowed_df": gdf, "timeframe": game_date_str, "selected_hitters": selected_hitters or [], "selected_pitchers": selected_pitchers or [], } logger.info( "[game_summary_generated] game_pk=%s date=%s quality=%s", game_pk, game_date_str, payload["data_quality"], ) return payload