Spaces:
Running
Running
| """ | |
| analytics/props_mapper.py | |
| Maps sportsbook HR prop rows to the shared HR probability engine and computes | |
| edge for the Props page. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Callable | |
| import pandas as pd | |
| from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge | |
| from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice | |
| from data.mlb_starters import lookup_pitchers_for_game, lookup_batter_current_team | |
| from data.odds_name_map import map_odds_name_to_model_name | |
| from models.hr_probability_engine import build_hr_probability_result | |
| from models.pitcher_adjustment import build_pitcher_feature_row | |
| from visualization.cards.player_identity import normalize_for_matching, to_canonical_name | |
| def build_strikeout_probability_result_v2(*args, **kwargs): | |
| from models.strikeout_probability_engine_v2 import ( | |
| build_strikeout_probability_result_v2 as _build_strikeout_probability_result_v2, | |
| ) | |
| return _build_strikeout_probability_result_v2(*args, **kwargs) | |
| def _build_statcast_name_index(statcast_df: pd.DataFrame) -> dict[str, str]: | |
| if statcast_df.empty or "player_name" not in statcast_df.columns: | |
| return {} | |
| index: dict[str, str] = {} | |
| for name in statcast_df["player_name"].astype(str).unique(): | |
| normalized = map_odds_name_to_model_name(name) | |
| if normalized not in index: | |
| index[normalized] = name | |
| canonical = to_canonical_name(name) | |
| if canonical != name: | |
| canonical_norm = map_odds_name_to_model_name(canonical) | |
| if canonical_norm not in index: | |
| index[canonical_norm] = name | |
| suffix_stripped = normalize_for_matching(canonical) | |
| if suffix_stripped and suffix_stripped not in index: | |
| index[suffix_stripped] = name | |
| return index | |
| def _build_game_context_from_row(row: Any) -> dict[str, Any]: | |
| return { | |
| "away_team": str(row.get("away_team", "") or "").strip(), | |
| "home_team": str(row.get("home_team", "") or "").strip(), | |
| "venue": str( | |
| row.get("venue") | |
| or row.get("stadium") | |
| or row.get("venue_name") | |
| or row.get("park") | |
| or "" | |
| ).strip(), | |
| "game_datetime_utc": str( | |
| row.get("game_datetime_utc") | |
| or row.get("commence_time") | |
| or "" | |
| ).strip(), | |
| "game_date": str(row.get("game_date", "") or "").strip(), | |
| "lineup_slot": row.get("lineup_slot"), | |
| "lineup_slot_source": row.get("lineup_slot_source"), | |
| "team_total": row.get("team_total"), | |
| "team_total_source": row.get("team_total_source"), | |
| } | |
| def _normalize_team_name(value: Any) -> str: | |
| return " ".join(str(value or "").strip().lower().split()) | |
| def _to_display_name(value: Any) -> str: | |
| return str(value or "").strip() | |
| def _normalize_person_name(value: Any) -> str: | |
| return normalize_for_matching(to_canonical_name(str(value or "").strip())) | |
| def _names_match(left: Any, right: Any) -> bool: | |
| left_norm = _normalize_person_name(left) | |
| right_norm = _normalize_person_name(right) | |
| return bool(left_norm and right_norm and left_norm == right_norm) | |
| def _compute_verdict( | |
| bet_ev: float | None, | |
| edge: float | None, | |
| confidence_score: float | None, | |
| is_modeled: bool, | |
| ) -> str: | |
| if not is_modeled: | |
| return "tracked" | |
| try: | |
| ev = float(bet_ev if bet_ev is not None else -9.0) | |
| ed = float(edge if edge is not None else -9.0) | |
| conf = float(confidence_score if confidence_score is not None else 0.0) | |
| except Exception: | |
| return "pass" | |
| if ev >= 0.05 and ed >= 0.01 and conf >= 62: | |
| return "bet" | |
| if ev >= -0.03 and ed >= -0.01 and conf >= 45: | |
| return "watch" | |
| return "pass" | |
| def _confidence_display_remap(raw_score: float | None) -> float | None: | |
| try: | |
| raw = float(raw_score) | |
| except Exception: | |
| return None | |
| if raw <= 40.0: | |
| return max(1.0, min(100.0, raw)) | |
| return max(1.0, min(100.0, 40.0 + ((raw - 40.0) * 1.45))) | |
| def _normalize_confidence_components(value: Any) -> list[dict[str, Any]]: | |
| if not isinstance(value, list): | |
| return [] | |
| normalized: list[dict[str, Any]] = [] | |
| for item in value: | |
| if not isinstance(item, dict): | |
| continue | |
| label = str(item.get("label") or "").strip() | |
| if not label: | |
| continue | |
| try: | |
| component_value = float(item.get("value") or 0.0) | |
| except Exception: | |
| component_value = 0.0 | |
| normalized.append( | |
| { | |
| "label": label, | |
| "value": round(component_value, 1), | |
| "direction": str(item.get("direction") or "").strip().lower() or None, | |
| } | |
| ) | |
| return normalized | |
| def _select_confidence_primary_driver( | |
| penalties: list[dict[str, Any]], | |
| bonuses: list[dict[str, Any]], | |
| ) -> dict[str, Any] | None: | |
| penalty_candidates = [item for item in penalties if float(item.get("value") or 0.0) > 0.0] | |
| bonus_candidates = [item for item in bonuses if float(item.get("value") or 0.0) > 0.0] | |
| if penalty_candidates: | |
| return max(penalty_candidates, key=lambda item: float(item.get("value") or 0.0)) | |
| if bonus_candidates: | |
| return max(bonus_candidates, key=lambda item: float(item.get("value") or 0.0)) | |
| return None | |
| def _build_strikeout_confidence_payload( | |
| probability_result: dict[str, Any], | |
| ) -> dict[str, Any]: | |
| source = str(probability_result.get("confidence_source") or "strikeout_v2_live") | |
| raw_score = probability_result.get("confidence_score_raw", probability_result.get("confidence_score")) | |
| raw_bucket = probability_result.get("confidence_bucket") | |
| reasons = list(probability_result.get("confidence_reasons") or []) | |
| bonuses = _normalize_confidence_components(probability_result.get("confidence_component_bonuses")) | |
| penalties = _normalize_confidence_components(probability_result.get("confidence_component_penalties")) | |
| raw_score_float = float(raw_score) if raw_score is not None else None | |
| display_score = _confidence_display_remap(raw_score_float) | |
| display_bucket = None | |
| if display_score is not None: | |
| if display_score >= 75: | |
| display_bucket = "high" | |
| elif display_score >= 55: | |
| display_bucket = "medium" | |
| else: | |
| display_bucket = "low" | |
| primary_driver = _select_confidence_primary_driver(penalties, bonuses) | |
| summary_label = str((primary_driver or {}).get("label") or "").strip() or None | |
| return { | |
| "confidence_score_raw": round(raw_score_float, 1) if raw_score_float is not None else None, | |
| "confidence_score_display": round(display_score, 1) if display_score is not None else None, | |
| "confidence_source": source, | |
| "confidence_component_bonuses": bonuses, | |
| "confidence_component_penalties": penalties, | |
| "confidence_primary_driver": primary_driver, | |
| "confidence_summary_label": summary_label, | |
| "confidence_bucket_raw": raw_bucket, | |
| "confidence_bucket_display": display_bucket, | |
| "confidence_reasons": reasons[:5], | |
| } | |
| def _classify_strikeout_probability_status( | |
| *, | |
| fair_prob: float | None, | |
| implied: float | None, | |
| pitcher_name: str, | |
| probability_result: dict[str, Any], | |
| ) -> str: | |
| if fair_prob is not None: | |
| return "modeled_ok" if implied is not None else "missing_implied_prob" | |
| if not str(pitcher_name or "").strip(): | |
| return "missing_pitcher_context" | |
| if str(probability_result.get("pitcher_resolution_status") or "").strip().lower() == "unresolved": | |
| return "missing_pitcher_context" | |
| if str(probability_result.get("projected_starter_match_status") or "").strip().lower() == "resolved_pitcher_mismatch": | |
| return "projected_starter_mismatch" | |
| return "empty_probability_result" | |
| def _classify_hr_probability_status( | |
| *, | |
| threshold_int: int, | |
| is_modeled: bool, | |
| model_prob: float | None, | |
| implied: float | None, | |
| probability_result: dict[str, Any], | |
| statcast_df: pd.DataFrame | None, | |
| pitcher_name: str, | |
| ) -> str: | |
| if threshold_int != 1 or not is_modeled: | |
| return "unmodeled_ladder" | |
| if model_prob is not None: | |
| return "modeled_ok" if implied is not None else "missing_implied_prob" | |
| if statcast_df is None or statcast_df.empty: | |
| return "missing_baseline" | |
| baseline_prob = probability_result.get("baseline_hr_prob") | |
| pitcher_status = str(probability_result.get("pitcher_resolution_status") or "").strip().lower() | |
| skipped_layers = str(probability_result.get("skipped_layers") or "").strip().lower() | |
| batter_rows_missing = baseline_prob is None | |
| if batter_rows_missing: | |
| return "missing_baseline" | |
| if implied is None: | |
| return "missing_implied_prob" | |
| if not str(pitcher_name or "").strip(): | |
| return "missing_pitcher_context" | |
| if pitcher_status in {"pitcher_missing", "unresolved", "matchup_incomplete"}: | |
| return "missing_pitcher_context" | |
| if "pitcher_missing" in skipped_layers or "matchup_incomplete" in skipped_layers: | |
| return "missing_pitcher_context" | |
| if baseline_prob is not None: | |
| return "empty_probability_result" | |
| return "unknown" | |
| def _infer_batter_team( | |
| batter_name: str, | |
| batter_statcast_df: pd.DataFrame, | |
| ) -> str: | |
| if ( | |
| batter_statcast_df is None | |
| or batter_statcast_df.empty | |
| or not batter_name | |
| or "player_name" not in batter_statcast_df.columns | |
| ): | |
| return "" | |
| normalized_target = _normalize_person_name(batter_name) | |
| player_rows = batter_statcast_df[ | |
| batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target | |
| ].copy() | |
| if player_rows.empty: | |
| return "" | |
| team_values: list[str] = [] | |
| if {"inning_topbot", "home_team", "away_team"}.issubset(player_rows.columns): | |
| inning_half = player_rows["inning_topbot"].fillna("").astype(str).str.lower() | |
| top_mask = inning_half.str.contains("top") | |
| bottom_mask = inning_half.str.contains("bot|bottom") | |
| if top_mask.any(): | |
| team_values.extend( | |
| player_rows.loc[top_mask, "away_team"].dropna().astype(str).tolist() | |
| ) | |
| if bottom_mask.any(): | |
| team_values.extend( | |
| player_rows.loc[bottom_mask, "home_team"].dropna().astype(str).tolist() | |
| ) | |
| for col in ["team", "batter_team", "team_name"]: | |
| if col in player_rows.columns: | |
| team_values.extend(player_rows[col].dropna().astype(str).tolist()) | |
| normalized = [_normalize_team_name(v) for v in team_values if str(v).strip()] | |
| if not normalized: | |
| return "" | |
| return pd.Series(normalized).mode().iloc[0] | |
| def _resolve_batter_team( | |
| row: Any, | |
| batter_name: str, | |
| batter_statcast_df: pd.DataFrame, | |
| ) -> tuple[str, str]: | |
| row_team, row_source = _resolve_batter_team_from_row_context( | |
| row=row, | |
| batter_name=batter_name, | |
| ) | |
| if row_team: | |
| return (row_team, row_source) | |
| away_team = _to_display_name(row.get("away_team")) | |
| home_team = _to_display_name(row.get("home_team")) | |
| away_norm = _normalize_team_name(away_team) | |
| home_norm = _normalize_team_name(home_team) | |
| statcast_ok = ( | |
| batter_statcast_df is not None | |
| and not batter_statcast_df.empty | |
| and batter_name | |
| and "player_name" in batter_statcast_df.columns | |
| ) | |
| if statcast_ok: | |
| normalized_target = _normalize_person_name(batter_name) | |
| player_rows = batter_statcast_df[ | |
| batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target | |
| ].copy() | |
| if not player_rows.empty: | |
| if "source_season" in player_rows.columns: | |
| current_rows = player_rows[pd.to_numeric(player_rows["source_season"], errors="coerce") == 2026].copy() | |
| current_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=current_rows) | |
| if current_team: | |
| if current_team == away_norm and away_team: | |
| return (away_team, "current_season_statcast") | |
| if current_team == home_norm and home_team: | |
| return (home_team, "current_season_statcast") | |
| historical_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=player_rows) | |
| if historical_team: | |
| if historical_team == away_norm and away_team: | |
| return (away_team, "historical_statcast") | |
| if historical_team == home_norm and home_team: | |
| return (home_team, "historical_statcast") | |
| # historical_team doesn't match either current game team (player changed teams); | |
| # fall through to roster lookup instead of returning a stale team name | |
| # Level 4: current-season MLB roster lookup (handles offseason moves and new players) | |
| if batter_name: | |
| roster_team = lookup_batter_current_team(batter_name, away_team or "", home_team or "") | |
| if roster_team: | |
| return (roster_team, "mlb_roster_lookup") | |
| return ("", "unresolved") | |
| def _resolve_batter_team_from_row_context( | |
| row: Any, | |
| batter_name: str, | |
| ) -> tuple[str, str]: | |
| away_team = _to_display_name(row.get("away_team")) | |
| home_team = _to_display_name(row.get("home_team")) | |
| away_norm = _normalize_team_name(away_team) | |
| home_norm = _normalize_team_name(home_team) | |
| for key in ("batter_team", "player_team", "team", "team_name"): | |
| value = _to_display_name(row.get(key)) | |
| value_norm = _normalize_team_name(value) | |
| if value_norm == away_norm and away_team: | |
| return (away_team, f"row_{key}") | |
| if value_norm == home_norm and home_team: | |
| return (home_team, f"row_{key}") | |
| return ("", "unknown") | |
| def _infer_lineup_slot( | |
| batter_name: str, | |
| batter_statcast_df: pd.DataFrame, | |
| ) -> tuple[int | None, str]: | |
| if ( | |
| batter_statcast_df is None | |
| or batter_statcast_df.empty | |
| or not batter_name | |
| or "player_name" not in batter_statcast_df.columns | |
| ): | |
| return (None, "unknown") | |
| player_rows = batter_statcast_df[ | |
| batter_statcast_df["player_name"].astype(str).str.casefold() == batter_name.casefold() | |
| ].copy() | |
| if player_rows.empty: | |
| return (None, "unknown") | |
| for col in ["lineup_slot", "lineup_position", "batting_order", "bat_order"]: | |
| if col not in player_rows.columns: | |
| continue | |
| numeric = pd.to_numeric(player_rows[col], errors="coerce").dropna() | |
| numeric = numeric[(numeric >= 1) & (numeric <= 9)] | |
| if not numeric.empty: | |
| mode = numeric.round().astype(int).mode() | |
| if not mode.empty: | |
| return (int(mode.iloc[0]), "projected") | |
| return (None, "unknown") | |
| def _resolve_pitcher_hand( | |
| pitcher_name: str, | |
| pitcher_statcast_df: pd.DataFrame | None, | |
| ) -> tuple[str, str]: | |
| if not pitcher_name or pitcher_statcast_df is None or pitcher_statcast_df.empty: | |
| return ("", "unavailable") | |
| if {"player_name", "p_throws"}.issubset(pitcher_statcast_df.columns): | |
| direct_rows = pitcher_statcast_df[ | |
| pitcher_statcast_df["player_name"].astype(str).map(_normalize_person_name) == _normalize_person_name(pitcher_name) | |
| ].copy() | |
| if not direct_rows.empty: | |
| direct_hand = str(direct_rows.iloc[0].get("p_throws") or "").strip().upper() | |
| if direct_hand: | |
| return (direct_hand, "statcast_direct") | |
| try: | |
| pitcher_row = build_pitcher_feature_row( | |
| statcast_df=pitcher_statcast_df, | |
| pitcher_name=pitcher_name, | |
| ) | |
| hand = str(pitcher_row.get("p_throws") or "").strip().upper() | |
| return (hand, "pitcher_feature_row" if hand else "unavailable") | |
| except Exception: | |
| return ("", "unavailable") | |
| def _resolve_team_total( | |
| row: Any, | |
| batter_team: str, | |
| ) -> tuple[float | None, str]: | |
| direct_keys = ["team_total", "implied_team_total", "batter_team_total"] | |
| for key in direct_keys: | |
| value = row.get(key) | |
| try: | |
| if value is not None and str(value).strip() not in {"", "nan", "None"}: | |
| return (float(value), "projected") | |
| except Exception: | |
| continue | |
| away_norm = _normalize_team_name(row.get("away_team")) | |
| home_norm = _normalize_team_name(row.get("home_team")) | |
| batter_team_norm = _normalize_team_name(batter_team) | |
| if batter_team_norm and batter_team_norm == away_norm: | |
| for key in ["away_team_total", "away_implied_total"]: | |
| value = row.get(key) | |
| try: | |
| if value is not None and str(value).strip() not in {"", "nan", "None"}: | |
| return (float(value), "projected") | |
| except Exception: | |
| continue | |
| if batter_team_norm and batter_team_norm == home_norm: | |
| for key in ["home_team_total", "home_implied_total"]: | |
| value = row.get(key) | |
| try: | |
| if value is not None and str(value).strip() not in {"", "nan", "None"}: | |
| return (float(value), "projected") | |
| except Exception: | |
| continue | |
| return (None, "unknown") | |
| def _resolve_pitcher_name( | |
| row: Any, | |
| batter_team: str, | |
| probable_starters: dict | None, | |
| ) -> tuple[str, str, str]: | |
| explicit_pitcher = str( | |
| row.get("pitcher_name") | |
| or row.get("pitcher") | |
| or row.get("opposing_pitcher") | |
| or "" | |
| ).strip() | |
| away_team = str(row.get("away_team") or "").strip() | |
| home_team = str(row.get("home_team") or "").strip() | |
| if explicit_pitcher and (not away_team or not home_team or not probable_starters): | |
| return (explicit_pitcher, "row_explicit", "resolved") | |
| if not probable_starters: | |
| return ("", "probable_starters_unavailable", "unresolved") | |
| if not away_team or not home_team: | |
| return ("", "matchup_incomplete", "unresolved") | |
| starters = lookup_pitchers_for_game( | |
| away_team=away_team, | |
| home_team=home_team, | |
| starters_map=probable_starters, | |
| ) | |
| if not starters: | |
| return ("", "matchup_not_found", "unresolved") | |
| away_norm = _normalize_team_name(away_team) | |
| home_norm = _normalize_team_name(home_team) | |
| batter_team_norm = _normalize_team_name(batter_team) | |
| home_pitcher = str(starters.get("home_pitcher") or "").strip() | |
| away_pitcher = str(starters.get("away_pitcher") or "").strip() | |
| if explicit_pitcher: | |
| if _names_match(home_pitcher, explicit_pitcher) or _names_match(away_pitcher, explicit_pitcher): | |
| return (explicit_pitcher, "row_explicit_validated", "resolved") | |
| if batter_team_norm and batter_team_norm == away_norm: | |
| return ( | |
| home_pitcher, | |
| "probable_starters_matchup", | |
| "resolved", | |
| ) | |
| if batter_team_norm and batter_team_norm == home_norm: | |
| return ( | |
| away_pitcher, | |
| "probable_starters_matchup", | |
| "resolved", | |
| ) | |
| if home_pitcher and not away_pitcher: | |
| return (home_pitcher, "probable_starters_single_side", "resolved") | |
| if away_pitcher and not home_pitcher: | |
| return (away_pitcher, "probable_starters_single_side", "resolved") | |
| if explicit_pitcher: | |
| return (explicit_pitcher, "row_explicit_unvalidated", "resolved") | |
| return ("", "batter_team_unresolved", "unresolved") | |
| def _lookup_projected_starter_context( | |
| row: Any, | |
| probable_starters: dict | None, | |
| ) -> dict[str, Any]: | |
| away_team = str(row.get("away_team") or "").strip() | |
| home_team = str(row.get("home_team") or "").strip() | |
| out = { | |
| "projected_home_pitcher": "", | |
| "projected_away_pitcher": "", | |
| "projected_starter_available": False, | |
| "projected_starter_source": "probable_starters_unavailable" if not probable_starters else "matchup_incomplete", | |
| "projected_home_pitcher_source": "", | |
| "projected_away_pitcher_source": "", | |
| "starter_cache_source": "probable_starters_unavailable" if not probable_starters else "matchup_incomplete", | |
| "fallback_used": False, | |
| } | |
| if not probable_starters or not away_team or not home_team: | |
| return out | |
| starters = lookup_pitchers_for_game( | |
| away_team=away_team, | |
| home_team=home_team, | |
| starters_map=probable_starters, | |
| ) | |
| if not starters: | |
| out["projected_starter_source"] = "matchup_not_found" | |
| return out | |
| projected_home = str(starters.get("home_pitcher") or "").strip() | |
| projected_away = str(starters.get("away_pitcher") or "").strip() | |
| out.update( | |
| { | |
| "projected_home_pitcher": projected_home, | |
| "projected_away_pitcher": projected_away, | |
| "projected_starter_available": bool(projected_home or projected_away), | |
| "projected_starter_source": str(starters.get("starter_cache_source") or "probable_starters_matchup"), | |
| "projected_home_pitcher_source": str(starters.get("home_pitcher_source") or ""), | |
| "projected_away_pitcher_source": str(starters.get("away_pitcher_source") or ""), | |
| "starter_cache_source": str(starters.get("starter_cache_source") or "probable_starters_matchup"), | |
| "fallback_used": bool(starters.get("fallback_used")), | |
| } | |
| ) | |
| return out | |
| def _projected_starter_match_status( | |
| resolved_pitcher_name: str, | |
| projected_home_pitcher: str, | |
| projected_away_pitcher: str, | |
| ) -> str: | |
| resolved = str(resolved_pitcher_name or "").strip() | |
| if not projected_home_pitcher and not projected_away_pitcher: | |
| return "projected_starter_unavailable" | |
| if not resolved: | |
| return "projected_starter_available_but_unresolved" | |
| if _names_match(projected_home_pitcher, resolved): | |
| return "matched_projected_home" | |
| if _names_match(projected_away_pitcher, resolved): | |
| return "matched_projected_away" | |
| return "resolved_pitcher_mismatch" | |
| def _resolve_pitcher_team_and_opponent( | |
| row: Any, | |
| pitcher_name: str, | |
| probable_starters: dict | None, | |
| ) -> tuple[str, str]: | |
| away_team = str(row.get("away_team") or "").strip() | |
| home_team = str(row.get("home_team") or "").strip() | |
| if not away_team or not home_team or not pitcher_name or not probable_starters: | |
| return ("", "") | |
| starters = lookup_pitchers_for_game( | |
| away_team=away_team, | |
| home_team=home_team, | |
| starters_map=probable_starters, | |
| ) | |
| if not starters: | |
| return ("", "") | |
| away_pitcher = str(starters.get("away_pitcher") or "").strip() | |
| home_pitcher = str(starters.get("home_pitcher") or "").strip() | |
| if _names_match(away_pitcher, pitcher_name): | |
| return (away_team, home_team) | |
| if _names_match(home_pitcher, pitcher_name): | |
| return (home_team, away_team) | |
| return ("", "") | |
| def _resolve_strikeout_pitcher_name( | |
| row: Any, | |
| probable_starters: dict | None, | |
| ) -> tuple[str, str, str]: | |
| explicit_pitcher = _to_display_name(row.get("player_name_raw") or row.get("player_name")) | |
| away_team = str(row.get("away_team") or "").strip() | |
| home_team = str(row.get("home_team") or "").strip() | |
| if not explicit_pitcher and not probable_starters: | |
| return ("", "missing_pitcher_name", "unresolved") | |
| if not probable_starters or not away_team or not home_team: | |
| return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved") | |
| starters = lookup_pitchers_for_game( | |
| away_team=away_team, | |
| home_team=home_team, | |
| starters_map=probable_starters, | |
| ) | |
| if not starters: | |
| return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved") | |
| projected_home = str(starters.get("home_pitcher") or "").strip() | |
| projected_away = str(starters.get("away_pitcher") or "").strip() | |
| if explicit_pitcher and ( | |
| _names_match(projected_home, explicit_pitcher) | |
| or _names_match(projected_away, explicit_pitcher) | |
| ): | |
| return (explicit_pitcher, "row_explicit_validated", "resolved") | |
| if projected_home and not projected_away: | |
| return (projected_home, "probable_starters_single_side", "resolved") | |
| if projected_away and not projected_home: | |
| return (projected_away, "probable_starters_single_side", "resolved") | |
| if projected_home and projected_away: | |
| return ("", "row_explicit_mismatch", "unresolved") | |
| return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved") | |
| def _extract_team_batters_from_statcast( | |
| team_name: str, | |
| batter_statcast_df: pd.DataFrame | None, | |
| max_players: int = 9, | |
| ) -> list[str]: | |
| if ( | |
| not team_name | |
| or batter_statcast_df is None | |
| or batter_statcast_df.empty | |
| or "player_name" not in batter_statcast_df.columns | |
| ): | |
| return [] | |
| team_norm = _normalize_team_name(team_name) | |
| if not team_norm: | |
| return [] | |
| working = batter_statcast_df.copy() | |
| if "source_season" in working.columns: | |
| current_rows = working[pd.to_numeric(working["source_season"], errors="coerce") == 2026].copy() | |
| if not current_rows.empty: | |
| working = current_rows | |
| players = ( | |
| working.get("player_name", pd.Series(dtype="object")) | |
| .dropna() | |
| .astype(str) | |
| .tolist() | |
| ) | |
| if not players: | |
| return [] | |
| matched_names: list[str] = [] | |
| seen_norms: set[str] = set() | |
| for player_name in players: | |
| inferred_team = _infer_batter_team(player_name, working) | |
| if inferred_team != team_norm: | |
| continue | |
| player_norm = _normalize_person_name(player_name) | |
| if not player_norm or player_norm in seen_norms: | |
| continue | |
| seen_norms.add(player_norm) | |
| matched_names.append(player_name) | |
| if len(matched_names) >= max_players: | |
| break | |
| return matched_names | |
| def _lookup_baseline_metadata( | |
| statcast_df: pd.DataFrame | None, | |
| player_name: str, | |
| ) -> dict[str, Any]: | |
| default = { | |
| "baseline_mode": None, | |
| "prior_sample_size": None, | |
| "season_2026_sample_size": None, | |
| "prior_weight": None, | |
| "season_2026_weight": None, | |
| "baseline_driver": None, | |
| "rolling_overlay_active": None, | |
| } | |
| if ( | |
| statcast_df is None | |
| or statcast_df.empty | |
| or not player_name | |
| or "player_name" not in statcast_df.columns | |
| ): | |
| return default | |
| normalized_target = _normalize_person_name(player_name) | |
| if not normalized_target: | |
| return default | |
| normalized_series = statcast_df["player_name"].astype(str).map(_normalize_person_name) | |
| rows = statcast_df[normalized_series == normalized_target].copy() | |
| if rows.empty: | |
| return default | |
| first_row = rows.iloc[0] | |
| return { | |
| "baseline_mode": first_row.get("baseline_mode"), | |
| "prior_sample_size": first_row.get("prior_sample_size"), | |
| "season_2026_sample_size": first_row.get("season_2026_sample_size"), | |
| "prior_weight": first_row.get("prior_weight"), | |
| "season_2026_weight": first_row.get("season_2026_weight"), | |
| "baseline_driver": first_row.get("baseline_driver"), | |
| "rolling_overlay_active": first_row.get("rolling_overlay_active"), | |
| } | |
| def get_player_hr_prob( | |
| player_name_normalized: str, | |
| statcast_df: pd.DataFrame, | |
| _name_index: dict[str, str] | None = None, | |
| ) -> tuple[float | None, str]: | |
| """ | |
| Backward-compatible wrapper for callers expecting (prob, source). | |
| """ | |
| name_index = _name_index if _name_index is not None else _build_statcast_name_index(statcast_df) | |
| statcast_name = name_index.get(player_name_normalized, player_name_normalized) | |
| result = build_hr_probability_result( | |
| batter_statcast_df=statcast_df, | |
| batter_name=statcast_name, | |
| mode="pregame", | |
| ) | |
| prob = result.get("calibrated_hr_prob") | |
| if prob is None: | |
| return (None, "unavailable") | |
| return (float(prob), "shared_pregame_engine") | |
| def map_hr_props_to_model( | |
| props_df: pd.DataFrame, | |
| statcast_df: pd.DataFrame, | |
| prob_fn: Callable[..., Any] | None = None, | |
| pitcher_stats_df: pd.DataFrame | None = None, | |
| pitcher_statcast_df: pd.DataFrame | None = None, | |
| probable_starters: dict | None = None, | |
| ) -> pd.DataFrame: | |
| """ | |
| Join HR prop rows to shared-engine HR probabilities and compute edge. | |
| Adds columns: | |
| implied_prob, model_hr_prob, model_hr_prob_source, edge | |
| and shared-engine diagnostics: | |
| baseline_hr_prob, pregame_hr_prob, probability_mode, | |
| component adjustment columns, applied_layers, skipped_layers | |
| """ | |
| del prob_fn | |
| if props_df.empty: | |
| return pd.DataFrame() | |
| hr_df = props_df[props_df["market"] == "hr"].copy() | |
| if hr_df.empty: | |
| return pd.DataFrame() | |
| pitcher_df = ( | |
| pitcher_statcast_df | |
| if pitcher_statcast_df is not None | |
| else pitcher_stats_df | |
| if pitcher_stats_df is not None | |
| else statcast_df | |
| ) | |
| name_index = _build_statcast_name_index(statcast_df) | |
| runtime_cache: dict[str, Any] = {"name_index": name_index} | |
| projected_starter_cache: dict[tuple[str, str, str], dict[str, Any]] = {} | |
| batter_team_cache: dict[tuple[str, str, str, str], tuple[str, str]] = {} | |
| pitcher_resolution_cache: dict[tuple[str, str, str, str], tuple[str, str, str]] = {} | |
| pitcher_hand_cache: dict[str, tuple[Any, Any]] = {} | |
| baseline_meta_cache: dict[tuple[int, str], dict[str, Any]] = {} | |
| lineup_slot_cache: dict[tuple[str, str, str], tuple[Any, Any]] = {} | |
| team_total_cache: dict[tuple[str, str, str, str], tuple[Any, Any]] = {} | |
| mapped_rows: list[dict[str, Any]] = [] | |
| for _, row in hr_df.iterrows(): | |
| odds = row.get("odds_american") | |
| batter_name_normalized = str(row.get("player_name") or "").strip() | |
| batter_name = name_index.get(batter_name_normalized, batter_name_normalized) | |
| threshold = row.get("threshold") | |
| try: | |
| threshold_int = int(threshold) if threshold is not None and str(threshold).strip() not in {"", "nan", "None"} else 1 | |
| except Exception: | |
| threshold_int = 1 | |
| is_modeled = bool(row.get("is_modeled")) if pd.notna(row.get("is_modeled")) else threshold_int == 1 | |
| batter_team_key = ( | |
| str(row.get("away_team") or "").strip().lower(), | |
| str(row.get("home_team") or "").strip().lower(), | |
| str(batter_name or "").strip().lower(), | |
| str(row.get("event_id") or "").strip(), | |
| ) | |
| if batter_team_key not in batter_team_cache: | |
| batter_team_cache[batter_team_key] = _resolve_batter_team( | |
| row=row, | |
| batter_name=batter_name, | |
| batter_statcast_df=statcast_df, | |
| ) | |
| batter_team, batter_team_source = batter_team_cache[batter_team_key] | |
| starter_key = ( | |
| str(row.get("away_team") or "").strip().lower(), | |
| str(row.get("home_team") or "").strip().lower(), | |
| str(row.get("event_id") or "").strip(), | |
| ) | |
| if starter_key not in projected_starter_cache: | |
| projected_starter_cache[starter_key] = _lookup_projected_starter_context( | |
| row=row, | |
| probable_starters=probable_starters, | |
| ) | |
| projected_starter_context = projected_starter_cache[starter_key] | |
| pitcher_resolution_key = ( | |
| starter_key[0], | |
| starter_key[1], | |
| str(batter_team or "").strip().lower(), | |
| str(row.get("pitcher_name") or row.get("pitcher") or "").strip().lower(), | |
| ) | |
| if pitcher_resolution_key not in pitcher_resolution_cache: | |
| pitcher_resolution_cache[pitcher_resolution_key] = _resolve_pitcher_name( | |
| row=row, | |
| batter_team=batter_team, | |
| probable_starters=probable_starters, | |
| ) | |
| pitcher_name, resolved_pitcher_source, pitcher_resolution_status = pitcher_resolution_cache[pitcher_resolution_key] | |
| projected_starter_match_status = _projected_starter_match_status( | |
| resolved_pitcher_name=pitcher_name, | |
| projected_home_pitcher=str(projected_starter_context.get("projected_home_pitcher") or ""), | |
| projected_away_pitcher=str(projected_starter_context.get("projected_away_pitcher") or ""), | |
| ) | |
| pitcher_hand_key = str(pitcher_name or "").strip().lower() | |
| if pitcher_hand_key not in pitcher_hand_cache: | |
| pitcher_hand_cache[pitcher_hand_key] = _resolve_pitcher_hand( | |
| pitcher_name=pitcher_name, | |
| pitcher_statcast_df=pitcher_df, | |
| ) | |
| pitcher_hand, pitcher_hand_source = pitcher_hand_cache[pitcher_hand_key] | |
| batter_meta_key = (id(statcast_df), str(batter_name or "").strip().lower()) | |
| if batter_meta_key not in baseline_meta_cache: | |
| baseline_meta_cache[batter_meta_key] = _lookup_baseline_metadata(statcast_df, batter_name) | |
| batter_baseline_meta = baseline_meta_cache[batter_meta_key] | |
| pitcher_meta_key = (id(pitcher_df), str(pitcher_name or "").strip().lower()) | |
| if pitcher_meta_key not in baseline_meta_cache: | |
| baseline_meta_cache[pitcher_meta_key] = _lookup_baseline_metadata(pitcher_df, pitcher_name) | |
| pitcher_baseline_meta = baseline_meta_cache[pitcher_meta_key] | |
| lineup_slot_key = ( | |
| str(batter_team or "").strip().lower(), | |
| str(batter_name or "").strip().lower(), | |
| str(pitcher_hand or "").strip().upper(), | |
| ) | |
| if lineup_slot_key not in lineup_slot_cache: | |
| lineup_slot, lineup_slot_source = _infer_lineup_slot( | |
| batter_name=batter_name, | |
| batter_statcast_df=statcast_df, | |
| ) | |
| lineup_slot_cache[lineup_slot_key] = (lineup_slot, lineup_slot_source) | |
| lineup_slot, lineup_slot_source = lineup_slot_cache[lineup_slot_key] | |
| team_total_key = ( | |
| str(row.get("away_team") or "").strip().lower(), | |
| str(row.get("home_team") or "").strip().lower(), | |
| str(batter_team or "").strip().lower(), | |
| str(row.get("event_id") or "").strip(), | |
| str(row.get("sportsbook") or "").strip().lower(), | |
| str(row.get("team_total") or row.get("away_team_total") or row.get("home_team_total") or "").strip(), | |
| ) | |
| if team_total_key not in team_total_cache: | |
| team_total_cache[team_total_key] = _resolve_team_total(row=row, batter_team=batter_team) | |
| team_total, team_total_source = team_total_cache[team_total_key] | |
| try: | |
| implied = american_to_implied_prob(odds) if odds is not None else None | |
| except Exception: | |
| implied = None | |
| if is_modeled: | |
| probability_result = build_hr_probability_result( | |
| batter_statcast_df=statcast_df, | |
| batter_name=batter_name, | |
| pitcher_statcast_df=pitcher_df, | |
| pitcher_name=pitcher_name, | |
| game_row={ | |
| **_build_game_context_from_row(row), | |
| "lineup_slot": lineup_slot, | |
| "lineup_slot_source": lineup_slot_source, | |
| "team_total": team_total, | |
| "team_total_source": team_total_source, | |
| "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), | |
| "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), | |
| "projected_starter_available": projected_starter_context.get("projected_starter_available"), | |
| "projected_starter_match_status": projected_starter_match_status, | |
| }, | |
| weather_row=None, | |
| mode="pregame", | |
| runtime_cache=runtime_cache, | |
| ) | |
| model_prob = probability_result.get("calibrated_hr_prob") | |
| if model_prob is not None and implied is not None: | |
| edge = compute_edge(model_prob, implied) | |
| bet_ev = compute_bet_ev(model_prob, odds) if odds is not None else None | |
| source = "shared_pregame_engine" | |
| else: | |
| edge = None | |
| bet_ev = None | |
| source = "unavailable" | |
| else: | |
| probability_result = { | |
| "baseline_hr_prob": None, | |
| "raw_hr_prob": None, | |
| "calibrated_hr_prob": None, | |
| "pregame_hr_prob": None, | |
| "mode": "pregame", | |
| "applied_layers": "", | |
| "skipped_layers": "unmodeled_hr_ladder", | |
| "confidence_score": None, | |
| "confidence_bucket": None, | |
| "confidence_reasons": [], | |
| "opportunity_hr_adjustment": None, | |
| "expected_pa": None, | |
| "pa_multiplier": None, | |
| "lineup_slot_used": lineup_slot, | |
| "lineup_slot_source": lineup_slot_source, | |
| "team_total_used": team_total, | |
| "team_total_source": team_total_source, | |
| "opportunity_mode": None, | |
| "opportunity_reason": None, | |
| "pregame_pitcher_context_adj": None, | |
| "pregame_park_context_adj": None, | |
| "pregame_weather_context_adj": None, | |
| "pregame_context_applied": False, | |
| "pitcher_hr_adjustment": None, | |
| "trend_hr_adjustment": None, | |
| "zone_hr_adjustment": None, | |
| "family_zone_hr_adjustment": None, | |
| "arsenal_hr_adjustment": None, | |
| "pulled_contact_hr_adjustment": None, | |
| "env_hr_adjustment": None, | |
| "park_hr_adjustment": None, | |
| "weather_hr_adjustment": None, | |
| "platoon_hr_adjustment": None, | |
| "trajectory_hr_adjustment": None, | |
| "rolling_hr_adjustment": None, | |
| "pitcher_reliability": None, | |
| "trend_reliability": None, | |
| "zone_reliability": None, | |
| "family_zone_reliability": None, | |
| "arsenal_reliability": None, | |
| "pulled_contact_reliability": None, | |
| "environment_reliability": None, | |
| "trajectory_reliability": None, | |
| "rolling_reliability": None, | |
| "opportunity_reliability": None, | |
| "matchup_platoon_multiplier": None, | |
| "matchup_platoon_reason": "unmodeled_hr_ladder", | |
| "pitcher_resolution_status": "unmodeled_hr_ladder", | |
| "zone_status": "unmodeled_hr_ladder", | |
| "family_zone_status": "unmodeled_hr_ladder", | |
| "arsenal_status": "unmodeled_hr_ladder", | |
| "reason_candidate_count": 0, | |
| "zone_store_sample_size": None, | |
| "family_zone_batter_sample_size": None, | |
| "family_zone_pitcher_sample_size": None, | |
| "arsenal_batter_sample_size": None, | |
| "arsenal_pitcher_sample_size": None, | |
| "model_voice_reason_candidates": [], | |
| "model_voice_tags": [], | |
| } | |
| model_prob = None | |
| edge = None | |
| bet_ev = None | |
| source = "unmodeled_hr_ladder" | |
| probability_status = _classify_hr_probability_status( | |
| threshold_int=threshold_int, | |
| is_modeled=is_modeled, | |
| model_prob=model_prob, | |
| implied=implied, | |
| probability_result=probability_result, | |
| statcast_df=statcast_df, | |
| pitcher_name=pitcher_name, | |
| ) | |
| row_dict = row.to_dict() | |
| row_dict.update( | |
| { | |
| "implied_prob": implied, | |
| "model_hr_prob": model_prob, | |
| "fair_prob": model_prob, | |
| "model_hr_prob_source": source, | |
| "model_hr_prob_source_detail": probability_result.get("applied_layers", ""), | |
| "edge": edge, | |
| "bet_ev": bet_ev, | |
| "baseline_hr_prob": probability_result.get("baseline_hr_prob"), | |
| "raw_hr_prob": probability_result.get("raw_hr_prob"), | |
| "calibrated_hr_prob": probability_result.get("calibrated_hr_prob"), | |
| "pregame_hr_prob": probability_result.get("pregame_hr_prob"), | |
| "probability_mode": probability_result.get("mode"), | |
| "formula_version": probability_result.get("formula_version"), | |
| "is_modeled": is_modeled, | |
| "threshold": threshold_int, | |
| "confidence_score": probability_result.get("confidence_score"), | |
| "confidence_bucket": probability_result.get("confidence_bucket"), | |
| "confidence_reasons": probability_result.get("confidence_reasons"), | |
| "opportunity_hr_adjustment": probability_result.get("opportunity_hr_adjustment"), | |
| "expected_pa": probability_result.get("expected_pa"), | |
| "pa_multiplier": probability_result.get("pa_multiplier"), | |
| "lineup_slot_used": probability_result.get("lineup_slot_used", lineup_slot), | |
| "lineup_slot_source": probability_result.get("lineup_slot_source", lineup_slot_source), | |
| "team_total_used": probability_result.get("team_total_used", team_total), | |
| "team_total_source": probability_result.get("team_total_source", team_total_source), | |
| "opportunity_mode": probability_result.get("opportunity_mode"), | |
| "opportunity_reason": probability_result.get("opportunity_reason"), | |
| "pregame_pitcher_context_adj": probability_result.get("pregame_pitcher_context_adj"), | |
| "pregame_park_context_adj": probability_result.get("pregame_park_context_adj"), | |
| "pregame_weather_context_adj": probability_result.get("pregame_weather_context_adj"), | |
| "pregame_context_applied": probability_result.get("pregame_context_applied", False), | |
| "pitcher_hr_adjustment": probability_result.get("pitcher_hr_adjustment"), | |
| "trend_hr_adjustment": probability_result.get("trend_hr_adjustment"), | |
| "zone_hr_adjustment": probability_result.get("zone_hr_adjustment"), | |
| "family_zone_hr_adjustment": probability_result.get("family_zone_hr_adjustment"), | |
| "arsenal_hr_adjustment": probability_result.get("arsenal_hr_adjustment"), | |
| "pulled_contact_hr_adjustment": probability_result.get("pulled_contact_hr_adjustment"), | |
| "env_hr_adjustment": probability_result.get("env_hr_adjustment"), | |
| "park_hr_adjustment": probability_result.get("park_hr_adjustment"), | |
| "weather_hr_adjustment": probability_result.get("weather_hr_adjustment"), | |
| "platoon_hr_adjustment": probability_result.get("platoon_hr_adjustment"), | |
| "trajectory_hr_adjustment": probability_result.get("trajectory_hr_adjustment"), | |
| "rolling_hr_adjustment": probability_result.get("rolling_hr_adjustment"), | |
| "damage_zone_alignment_subscore": probability_result.get("damage_zone_alignment_subscore"), | |
| "pitch_mix_exposure_subscore": probability_result.get("pitch_mix_exposure_subscore"), | |
| "tunnel_damage_subscore": probability_result.get("tunnel_damage_subscore"), | |
| "count_pattern_damage_subscore": probability_result.get("count_pattern_damage_subscore"), | |
| "handedness_damage_subscore": probability_result.get("handedness_damage_subscore"), | |
| "arsenal_fit_subscore": probability_result.get("arsenal_fit_subscore"), | |
| "environment_amplification_subscore": probability_result.get("environment_amplification_subscore"), | |
| "hr_opportunity_projection": probability_result.get("hr_opportunity_projection"), | |
| "matchup_coverage_confidence": probability_result.get("matchup_coverage_confidence"), | |
| "component_source_map": probability_result.get("component_source_map"), | |
| "expected_pitch_mix_by_count": probability_result.get("expected_pitch_mix_by_count"), | |
| "expected_zone_mix_by_count": probability_result.get("expected_zone_mix_by_count"), | |
| "expected_pitch_zone_mix_by_count": probability_result.get("expected_pitch_zone_mix_by_count"), | |
| "tunnel_pair_scores": probability_result.get("tunnel_pair_scores"), | |
| "predicted_attack_regions": probability_result.get("predicted_attack_regions"), | |
| "predicted_damage_regions": probability_result.get("predicted_damage_regions"), | |
| "predicted_whiff_regions": probability_result.get("predicted_whiff_regions"), | |
| "pitcher_reliability": probability_result.get("pitcher_reliability"), | |
| "trend_reliability": probability_result.get("trend_reliability"), | |
| "zone_reliability": probability_result.get("zone_reliability"), | |
| "family_zone_reliability": probability_result.get("family_zone_reliability"), | |
| "arsenal_reliability": probability_result.get("arsenal_reliability"), | |
| "pulled_contact_reliability": probability_result.get("pulled_contact_reliability"), | |
| "environment_reliability": probability_result.get("environment_reliability"), | |
| "trajectory_reliability": probability_result.get("trajectory_reliability"), | |
| "rolling_reliability": probability_result.get("rolling_reliability"), | |
| "opportunity_reliability": probability_result.get("opportunity_reliability"), | |
| "applied_layers": probability_result.get("applied_layers"), | |
| "skipped_layers": probability_result.get("skipped_layers"), | |
| "matchup_platoon_multiplier": probability_result.get("matchup_platoon_multiplier"), | |
| "matchup_platoon_reason": probability_result.get("matchup_platoon_reason"), | |
| "resolved_pitcher_name": pitcher_name, | |
| "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), | |
| "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), | |
| "projected_starter_available": projected_starter_context.get("projected_starter_available"), | |
| "projected_starter_source": projected_starter_context.get("projected_starter_source"), | |
| "projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"), | |
| "projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"), | |
| "starter_cache_source": projected_starter_context.get("starter_cache_source"), | |
| "fallback_used": projected_starter_context.get("fallback_used"), | |
| "projected_starter_match_status": projected_starter_match_status, | |
| "batter_team": batter_team, | |
| "batter_team_source": batter_team_source, | |
| "resolved_pitcher_source": resolved_pitcher_source, | |
| "pitcher_resolution_status": probability_result.get("pitcher_resolution_status", pitcher_resolution_status), | |
| "pitcher_hand": pitcher_hand, | |
| "pitcher_hand_source": pitcher_hand_source, | |
| "zone_status": probability_result.get("zone_status"), | |
| "family_zone_status": probability_result.get("family_zone_status"), | |
| "arsenal_status": probability_result.get("arsenal_status"), | |
| "reason_candidate_count": probability_result.get("reason_candidate_count"), | |
| "zone_store_sample_size": probability_result.get("zone_store_sample_size"), | |
| "family_zone_batter_sample_size": probability_result.get("family_zone_batter_sample_size"), | |
| "family_zone_pitcher_sample_size": probability_result.get("family_zone_pitcher_sample_size"), | |
| "arsenal_batter_sample_size": probability_result.get("arsenal_batter_sample_size"), | |
| "arsenal_pitcher_sample_size": probability_result.get("arsenal_pitcher_sample_size"), | |
| "model_voice_reason_candidates": probability_result.get("model_voice_reason_candidates", []), | |
| "model_voice_tags": probability_result.get("model_voice_tags", []), | |
| "selection_scope": row.get("selection_scope") or "player", | |
| "expected_modeled_hr_row": bool(threshold_int == 1 and str(row.get("market_family") or row.get("market") or "").strip().lower() == "hr"), | |
| "has_model_probability": model_prob is not None, | |
| "has_modeled_edge": edge is not None, | |
| "model_probability_status": probability_status, | |
| "modeled_row_available": model_prob is not None, | |
| "modeled_row_missing_reason": None if model_prob is not None else probability_status, | |
| "baseline_mode": batter_baseline_meta.get("baseline_mode"), | |
| "prior_sample_size": batter_baseline_meta.get("prior_sample_size"), | |
| "season_2026_sample_size": batter_baseline_meta.get("season_2026_sample_size"), | |
| "prior_weight": batter_baseline_meta.get("prior_weight"), | |
| "season_2026_weight": batter_baseline_meta.get("season_2026_weight"), | |
| "baseline_driver": batter_baseline_meta.get("baseline_driver"), | |
| "rolling_overlay_active": batter_baseline_meta.get("rolling_overlay_active"), | |
| "pitcher_baseline_mode": pitcher_baseline_meta.get("baseline_mode"), | |
| "pitcher_prior_sample_size": pitcher_baseline_meta.get("prior_sample_size"), | |
| "pitcher_season_2026_sample_size": pitcher_baseline_meta.get("season_2026_sample_size"), | |
| "pitcher_prior_weight": pitcher_baseline_meta.get("prior_weight"), | |
| "pitcher_season_2026_weight": pitcher_baseline_meta.get("season_2026_weight"), | |
| "pitcher_baseline_driver": pitcher_baseline_meta.get("baseline_driver"), | |
| "pitcher_rolling_overlay_active": pitcher_baseline_meta.get("rolling_overlay_active"), | |
| } | |
| ) | |
| row_dict["verdict"] = _compute_verdict( | |
| bet_ev=bet_ev, | |
| edge=edge, | |
| confidence_score=row_dict.get("confidence_score"), | |
| is_modeled=is_modeled, | |
| ) | |
| row_dict.update(build_hr_model_voice(row_dict)) | |
| mapped_rows.append(row_dict) | |
| result = pd.DataFrame(mapped_rows) | |
| if result.empty: | |
| return result | |
| has_edge = result["edge"].notna() | |
| with_edge = result[has_edge].sort_values("edge", ascending=False) | |
| without_edge = result[~has_edge] | |
| ordered = pd.concat([with_edge, without_edge], ignore_index=True) | |
| try: | |
| from analytics.execution_layer import enrich_with_execution_layer | |
| return enrich_with_execution_layer(ordered) | |
| except Exception: | |
| return ordered | |
| def map_strikeout_props_to_model( | |
| props_df: pd.DataFrame, | |
| batter_statcast_df: pd.DataFrame, | |
| pitcher_statcast_df: pd.DataFrame | None = None, | |
| probable_starters: dict | None = None, | |
| ) -> pd.DataFrame: | |
| if props_df.empty: | |
| return pd.DataFrame() | |
| k_df = props_df[props_df["market"].astype(str).str.lower() == "k"].copy() | |
| if k_df.empty: | |
| return pd.DataFrame() | |
| pitcher_df = pitcher_statcast_df if pitcher_statcast_df is not None else batter_statcast_df | |
| runtime_cache: dict[str, Any] = {} | |
| projected_starter_cache: dict[tuple[str, str, str], dict[str, Any]] = {} | |
| pitcher_resolution_cache: dict[tuple[str, str, str], tuple[str, str, str]] = {} | |
| team_context_cache: dict[tuple[str, str, str], tuple[str, str]] = {} | |
| pitcher_hand_cache: dict[str, tuple[Any, Any]] = {} | |
| baseline_meta_cache: dict[tuple[int, str], dict[str, Any]] = {} | |
| lineup_cache: dict[str, list[str]] = {} | |
| strikeout_probability_cache: dict[tuple[Any, ...], dict[str, Any]] = {} | |
| mapped_rows: list[dict[str, Any]] = [] | |
| for _, row in k_df.iterrows(): | |
| line = row.get("line") | |
| selection_side = str(row.get("selection_side") or "").strip().lower() | |
| try: | |
| implied = american_to_implied_prob(row.get("odds_american")) if row.get("odds_american") is not None else None | |
| except Exception: | |
| implied = None | |
| starter_key = ( | |
| str(row.get("away_team") or "").strip().lower(), | |
| str(row.get("home_team") or "").strip().lower(), | |
| str(row.get("event_id") or "").strip(), | |
| ) | |
| if starter_key not in projected_starter_cache: | |
| projected_starter_cache[starter_key] = _lookup_projected_starter_context( | |
| row=row, | |
| probable_starters=probable_starters, | |
| ) | |
| projected_starter_context = projected_starter_cache[starter_key] | |
| pitcher_resolution_key = ( | |
| starter_key[0], | |
| starter_key[1], | |
| str(row.get("pitcher_name") or row.get("pitcher") or row.get("player_name") or "").strip().lower(), | |
| ) | |
| if pitcher_resolution_key not in pitcher_resolution_cache: | |
| pitcher_resolution_cache[pitcher_resolution_key] = _resolve_strikeout_pitcher_name( | |
| row=row, | |
| probable_starters=probable_starters, | |
| ) | |
| pitcher_name, resolved_pitcher_source, pitcher_resolution_status = pitcher_resolution_cache[pitcher_resolution_key] | |
| if pitcher_resolution_key not in team_context_cache: | |
| team_context_cache[pitcher_resolution_key] = _resolve_pitcher_team_and_opponent( | |
| row=row, | |
| pitcher_name=pitcher_name, | |
| probable_starters=probable_starters, | |
| ) | |
| pitcher_team, opponent_team = team_context_cache[pitcher_resolution_key] | |
| projected_starter_match_status = _projected_starter_match_status( | |
| resolved_pitcher_name=pitcher_name, | |
| projected_home_pitcher=str(projected_starter_context.get("projected_home_pitcher") or ""), | |
| projected_away_pitcher=str(projected_starter_context.get("projected_away_pitcher") or ""), | |
| ) | |
| pitcher_hand_key = str(pitcher_name or "").strip().lower() | |
| if pitcher_hand_key not in pitcher_hand_cache: | |
| pitcher_hand_cache[pitcher_hand_key] = _resolve_pitcher_hand(pitcher_name=pitcher_name, pitcher_statcast_df=pitcher_df) | |
| pitcher_hand, _ = pitcher_hand_cache[pitcher_hand_key] | |
| pitcher_meta_key = (id(pitcher_df), str(pitcher_name or "").strip().lower()) | |
| if pitcher_meta_key not in baseline_meta_cache: | |
| baseline_meta_cache[pitcher_meta_key] = _lookup_baseline_metadata(pitcher_df, pitcher_name) | |
| pitcher_baseline_meta = baseline_meta_cache[pitcher_meta_key] | |
| lineup_key = str(opponent_team or "").strip().lower() | |
| if lineup_key not in lineup_cache: | |
| lineup_cache[lineup_key] = _extract_team_batters_from_statcast( | |
| team_name=opponent_team, | |
| batter_statcast_df=batter_statcast_df, | |
| ) | |
| opponent_batters = lineup_cache[lineup_key] | |
| canonical_game_row = _build_game_context_from_row(row) | |
| canonical_game_row.update( | |
| { | |
| "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), | |
| "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), | |
| "projected_starter_available": projected_starter_context.get("projected_starter_available"), | |
| "projected_starter_source": projected_starter_context.get("projected_starter_source"), | |
| "projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"), | |
| "projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"), | |
| "starter_cache_source": projected_starter_context.get("starter_cache_source"), | |
| "fallback_used": projected_starter_context.get("fallback_used"), | |
| "projected_starter_match_status": projected_starter_match_status, | |
| "resolved_pitcher_name": pitcher_name, | |
| "resolved_pitcher_source": resolved_pitcher_source, | |
| "pitcher_resolution_status": pitcher_resolution_status, | |
| "pitcher_team": pitcher_team, | |
| "opponent_team": opponent_team, | |
| } | |
| ) | |
| line_value = float(line) if line is not None and str(line).strip() not in {"", "nan", "None"} else None | |
| probability_cache_key = ( | |
| str(pitcher_name or "").strip().lower(), | |
| tuple(str(name or "").strip().lower() for name in opponent_batters), | |
| str(opponent_team or "").strip().lower(), | |
| line_value, | |
| str(selection_side or "").strip().lower(), | |
| str(canonical_game_row.get("away_team") or "").strip().lower(), | |
| str(canonical_game_row.get("home_team") or "").strip().lower(), | |
| str(canonical_game_row.get("projected_starter_match_status") or "").strip().lower(), | |
| ) | |
| if probability_cache_key not in strikeout_probability_cache: | |
| strikeout_probability_cache[probability_cache_key] = build_strikeout_probability_result_v2( | |
| pitcher_statcast_df=pitcher_df, | |
| pitcher_name=pitcher_name, | |
| batter_statcast_df=batter_statcast_df, | |
| opponent_batters=opponent_batters, | |
| opponent_team=opponent_team, | |
| line=line_value, | |
| selection_side=selection_side, | |
| game_row=canonical_game_row, | |
| runtime_cache=runtime_cache, | |
| ) | |
| probability_result_v2 = strikeout_probability_cache[probability_cache_key] | |
| confidence_payload = _build_strikeout_confidence_payload( | |
| probability_result=probability_result_v2, | |
| ) | |
| fair_prob = probability_result_v2.get("fair_prob") | |
| probability_status = _classify_strikeout_probability_status( | |
| fair_prob=fair_prob, | |
| implied=implied, | |
| pitcher_name=pitcher_name, | |
| probability_result={ | |
| **probability_result_v2, | |
| "pitcher_resolution_status": pitcher_resolution_status, | |
| "projected_starter_match_status": projected_starter_match_status, | |
| }, | |
| ) | |
| if fair_prob is not None and implied is not None: | |
| edge = compute_edge(fair_prob, implied) | |
| bet_ev = compute_bet_ev(fair_prob, row.get("odds_american")) if row.get("odds_american") is not None else None | |
| source = "shared_strikeout_engine_v2" | |
| is_modeled = True | |
| else: | |
| edge = None | |
| bet_ev = None | |
| source = "unavailable" | |
| is_modeled = False | |
| row_dict = row.to_dict() | |
| row_dict.update( | |
| { | |
| "selection_scope": row.get("selection_scope") or "pitcher", | |
| "is_modeled": is_modeled, | |
| "implied_prob": implied, | |
| "fair_prob": fair_prob, | |
| "model_k_prob": fair_prob, | |
| "bet_ev": bet_ev, | |
| "edge": edge, | |
| "confidence_score": confidence_payload.get("confidence_score_display"), | |
| "confidence_bucket": confidence_payload.get("confidence_bucket_display"), | |
| "confidence_reasons": confidence_payload.get("confidence_reasons"), | |
| "confidence_score_raw": confidence_payload.get("confidence_score_raw"), | |
| "confidence_score_display": confidence_payload.get("confidence_score_display"), | |
| "confidence_source": confidence_payload.get("confidence_source"), | |
| "confidence_component_bonuses": confidence_payload.get("confidence_component_bonuses"), | |
| "confidence_component_penalties": confidence_payload.get("confidence_component_penalties"), | |
| "confidence_primary_driver": confidence_payload.get("confidence_primary_driver"), | |
| "confidence_summary_label": confidence_payload.get("confidence_summary_label"), | |
| "confidence_bucket_raw": confidence_payload.get("confidence_bucket_raw"), | |
| "confidence_bucket_display": confidence_payload.get("confidence_bucket_display"), | |
| "expected_strikeouts": probability_result_v2.get("expected_strikeouts"), | |
| "expected_strikeouts_v2": probability_result_v2.get("expected_strikeouts_v2"), | |
| "projected_pitch_count": probability_result_v2.get("projected_pitch_count"), | |
| "projected_batters_faced": probability_result_v2.get("projected_batters_faced"), | |
| "projected_innings": probability_result_v2.get("projected_innings"), | |
| "pitches_per_bf": probability_result_v2.get("pitches_per_bf"), | |
| "opportunity_confidence": probability_result_v2.get("opportunity_confidence"), | |
| "opportunity_reasons": probability_result_v2.get("opportunity_reasons"), | |
| "projected_k_rate": probability_result_v2.get("projected_k_rate"), | |
| "fair_prob_v2": probability_result_v2.get("fair_prob_v2"), | |
| "raw_k_prob_v2": probability_result_v2.get("raw_k_prob_v2"), | |
| "calibrated_k_prob_v2": probability_result_v2.get("calibrated_k_prob_v2"), | |
| "confidence_score_v2": probability_result_v2.get("confidence_score_v2"), | |
| "confidence_score_raw_v2": probability_result_v2.get("confidence_score_raw_v2"), | |
| "confidence_score_display_v2": probability_result_v2.get("confidence_score_display_v2"), | |
| "confidence_source_v2": probability_result_v2.get("confidence_source_v2"), | |
| "confidence_bucket_v2": probability_result_v2.get("confidence_bucket_v2"), | |
| "confidence_reasons_v2": probability_result_v2.get("confidence_reasons_v2"), | |
| "confidence_component_bonuses_v2": probability_result_v2.get("confidence_component_bonuses_v2"), | |
| "confidence_component_penalties_v2": probability_result_v2.get("confidence_component_penalties_v2"), | |
| "confidence_primary_driver_v2": probability_result_v2.get("confidence_primary_driver_v2"), | |
| "confidence_summary_label_v2": probability_result_v2.get("confidence_summary_label_v2"), | |
| "k_rate_pitch_signal": probability_result_v2.get("k_rate_pitch_signal"), | |
| "k_rate_anchor": probability_result_v2.get("k_rate_anchor"), | |
| "bb_rate_anchor": probability_result_v2.get("bb_rate_anchor"), | |
| "command_efficiency_signal": probability_result_v2.get("command_efficiency_signal"), | |
| "swing_miss_subscore": probability_result_v2.get("swing_miss_subscore"), | |
| "called_strike_subscore": probability_result_v2.get("called_strike_subscore"), | |
| "command_efficiency_subscore": probability_result_v2.get("command_efficiency_subscore"), | |
| "lineup_whiff_subscore": probability_result_v2.get("lineup_whiff_subscore"), | |
| "zone_matchup_subscore": probability_result_v2.get("zone_matchup_subscore"), | |
| "family_zone_matchup_subscore": probability_result_v2.get("family_zone_matchup_subscore"), | |
| "arsenal_fit_subscore": probability_result_v2.get("arsenal_fit_subscore"), | |
| "tunneling_subscore": probability_result_v2.get("tunneling_subscore"), | |
| "release_consistency_subscore": probability_result_v2.get("release_consistency_subscore"), | |
| "sequencing_subscore": probability_result_v2.get("sequencing_subscore"), | |
| "count_leverage_subscore": probability_result_v2.get("count_leverage_subscore"), | |
| "leash_risk_subscore": probability_result_v2.get("leash_risk_subscore"), | |
| "role_certainty_score": probability_result_v2.get("role_certainty_score"), | |
| "times_through_order_penalty": probability_result_v2.get("times_through_order_penalty"), | |
| "telemetry_path_status": probability_result_v2.get("telemetry_path_status"), | |
| "model_tier": probability_result_v2.get("model_tier"), | |
| "variance_band_low": probability_result_v2.get("variance_band_low"), | |
| "variance_band_high": probability_result_v2.get("variance_band_high"), | |
| "matchup_coverage_confidence": probability_result_v2.get("matchup_coverage_confidence"), | |
| "component_source_map": probability_result_v2.get("component_source_map"), | |
| "predicted_whiff_regions": probability_result_v2.get("predicted_whiff_regions"), | |
| "predicted_attack_regions": probability_result_v2.get("predicted_attack_regions"), | |
| "predicted_damage_regions": probability_result_v2.get("predicted_damage_regions"), | |
| "tunnel_pair_scores": probability_result_v2.get("tunnel_pair_scores"), | |
| "formula_version": probability_result_v2.get("formula_version"), | |
| "pitcher_swstr_rate": probability_result_v2.get("pitcher_swstr_rate"), | |
| "pitcher_csw_rate": probability_result_v2.get("pitcher_csw_rate"), | |
| "pitcher_ball_rate": probability_result_v2.get("pitcher_ball_rate"), | |
| "arsenal_whiff_risk": probability_result_v2.get("arsenal_fit_subscore"), | |
| "family_zone_whiff_risk": probability_result_v2.get("family_zone_matchup_subscore"), | |
| "zone_whiff_risk": probability_result_v2.get("zone_matchup_subscore"), | |
| "trajectory_tunnel_score": probability_result_v2.get("tunneling_subscore"), | |
| "trajectory_release_consistency_score": probability_result_v2.get("release_consistency_subscore"), | |
| "sequencing_score": probability_result_v2.get("sequencing_subscore"), | |
| "applied_layers": probability_result_v2.get("applied_layers"), | |
| "skipped_layers": probability_result_v2.get("skipped_layers"), | |
| "model_k_prob_source": source, | |
| "model_k_prob_source_detail": probability_result_v2.get("applied_layers", ""), | |
| "resolved_pitcher_name": pitcher_name, | |
| "resolved_pitcher_source": resolved_pitcher_source, | |
| "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), | |
| "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), | |
| "projected_starter_available": projected_starter_context.get("projected_starter_available"), | |
| "projected_starter_source": projected_starter_context.get("projected_starter_source"), | |
| "projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"), | |
| "projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"), | |
| "starter_cache_source": projected_starter_context.get("starter_cache_source"), | |
| "fallback_used": projected_starter_context.get("fallback_used"), | |
| "projected_starter_match_status": projected_starter_match_status, | |
| "pitcher_resolution_status": pitcher_resolution_status, | |
| "pitcher_team": pitcher_team, | |
| "opponent_team": opponent_team, | |
| "has_model_probability": fair_prob is not None, | |
| "has_modeled_edge": edge is not None, | |
| "model_probability_status": probability_status, | |
| "modeled_row_available": fair_prob is not None, | |
| "modeled_row_missing_reason": None if fair_prob is not None else probability_status, | |
| "baseline_mode": pitcher_baseline_meta.get("baseline_mode"), | |
| "prior_sample_size": pitcher_baseline_meta.get("prior_sample_size"), | |
| "season_2026_sample_size": pitcher_baseline_meta.get("season_2026_sample_size"), | |
| "prior_weight": pitcher_baseline_meta.get("prior_weight"), | |
| "season_2026_weight": pitcher_baseline_meta.get("season_2026_weight"), | |
| "baseline_driver": pitcher_baseline_meta.get("baseline_driver"), | |
| "rolling_overlay_active": pitcher_baseline_meta.get("rolling_overlay_active"), | |
| } | |
| ) | |
| row_dict["verdict"] = _compute_verdict( | |
| bet_ev=bet_ev, | |
| edge=edge, | |
| confidence_score=row_dict.get("confidence_score"), | |
| is_modeled=is_modeled, | |
| ) | |
| row_dict.update(build_strikeout_model_voice(row_dict)) | |
| mapped_rows.append(row_dict) | |
| return pd.DataFrame(mapped_rows) | |
| def map_no_home_run_props( | |
| props_df: pd.DataFrame, | |
| ) -> pd.DataFrame: | |
| if props_df.empty: | |
| return pd.DataFrame() | |
| no_hr_df = props_df[props_df["market_family"].astype(str).str.lower() == "no_hr"].copy() | |
| if no_hr_df.empty: | |
| return pd.DataFrame() | |
| for idx, row in no_hr_df.iterrows(): | |
| implied = american_to_implied_prob(row.get("odds_american")) if row.get("odds_american") is not None else None | |
| no_hr_df.at[idx, "selection_scope"] = "game" | |
| no_hr_df.at[idx, "implied_prob"] = implied | |
| no_hr_df.at[idx, "fair_prob"] = None | |
| no_hr_df.at[idx, "edge"] = None | |
| no_hr_df.at[idx, "bet_ev"] = None | |
| no_hr_df.at[idx, "confidence_score"] = None | |
| no_hr_df.at[idx, "confidence_bucket"] = None | |
| no_hr_df.at[idx, "confidence_reasons"] = ["No-HR fair probability model not active yet"] | |
| no_hr_df.at[idx, "verdict"] = "tracked" | |
| no_hr_df.at[idx, "model_voice_for"] = "Market is tracked for future release" | |
| no_hr_df.at[idx, "model_voice_against"] = "No-HR fair probability model is not active yet" | |
| return no_hr_df | |
| def map_props_to_models( | |
| props_df: pd.DataFrame, | |
| statcast_df: pd.DataFrame, | |
| pitcher_statcast_df: pd.DataFrame | None = None, | |
| probable_starters: dict | None = None, | |
| ) -> pd.DataFrame: | |
| frames: list[pd.DataFrame] = [] | |
| hr_df = map_hr_props_to_model( | |
| props_df, | |
| statcast_df, | |
| pitcher_statcast_df=pitcher_statcast_df, | |
| probable_starters=probable_starters, | |
| ) | |
| if not hr_df.empty: | |
| frames.append(hr_df) | |
| k_df = map_strikeout_props_to_model( | |
| props_df, | |
| batter_statcast_df=statcast_df, | |
| pitcher_statcast_df=pitcher_statcast_df, | |
| probable_starters=probable_starters, | |
| ) | |
| if not k_df.empty: | |
| frames.append(k_df) | |
| no_hr_df = map_no_home_run_props(props_df) | |
| if not no_hr_df.empty: | |
| frames.append(no_hr_df) | |
| if not frames: | |
| return pd.DataFrame() | |
| return pd.concat(frames, ignore_index=True, sort=False) | |