""" analytics/props_mapper.py Maps sportsbook HR prop rows to the shared HR probability engine and computes edge for the Props page. """ from __future__ import annotations from typing import Any, Callable import pandas as pd from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice from data.mlb_starters import lookup_pitchers_for_game, lookup_batter_current_team from data.odds_name_map import map_odds_name_to_model_name from models.hr_probability_engine import build_hr_probability_result from models.pitcher_adjustment import build_pitcher_feature_row from visualization.cards.player_identity import normalize_for_matching, to_canonical_name def build_strikeout_probability_result_v2(*args, **kwargs): from models.strikeout_probability_engine_v2 import ( build_strikeout_probability_result_v2 as _build_strikeout_probability_result_v2, ) return _build_strikeout_probability_result_v2(*args, **kwargs) def _build_statcast_name_index(statcast_df: pd.DataFrame) -> dict[str, str]: if statcast_df.empty or "player_name" not in statcast_df.columns: return {} index: dict[str, str] = {} for name in statcast_df["player_name"].astype(str).unique(): normalized = map_odds_name_to_model_name(name) if normalized not in index: index[normalized] = name canonical = to_canonical_name(name) if canonical != name: canonical_norm = map_odds_name_to_model_name(canonical) if canonical_norm not in index: index[canonical_norm] = name suffix_stripped = normalize_for_matching(canonical) if suffix_stripped and suffix_stripped not in index: index[suffix_stripped] = name return index def _build_game_context_from_row(row: Any) -> dict[str, Any]: return { "away_team": str(row.get("away_team", "") or "").strip(), "home_team": str(row.get("home_team", "") or "").strip(), "venue": str( row.get("venue") or row.get("stadium") or row.get("venue_name") or row.get("park") or "" ).strip(), "game_datetime_utc": str( row.get("game_datetime_utc") or row.get("commence_time") or "" ).strip(), "game_date": str(row.get("game_date", "") or "").strip(), "lineup_slot": row.get("lineup_slot"), "lineup_slot_source": row.get("lineup_slot_source"), "team_total": row.get("team_total"), "team_total_source": row.get("team_total_source"), } def _normalize_team_name(value: Any) -> str: return " ".join(str(value or "").strip().lower().split()) def _to_display_name(value: Any) -> str: return str(value or "").strip() def _normalize_person_name(value: Any) -> str: return normalize_for_matching(to_canonical_name(str(value or "").strip())) def _names_match(left: Any, right: Any) -> bool: left_norm = _normalize_person_name(left) right_norm = _normalize_person_name(right) return bool(left_norm and right_norm and left_norm == right_norm) def _compute_verdict( bet_ev: float | None, edge: float | None, confidence_score: float | None, is_modeled: bool, ) -> str: if not is_modeled: return "tracked" try: ev = float(bet_ev if bet_ev is not None else -9.0) ed = float(edge if edge is not None else -9.0) conf = float(confidence_score if confidence_score is not None else 0.0) except Exception: return "pass" if ev >= 0.05 and ed >= 0.01 and conf >= 62: return "bet" if ev >= -0.03 and ed >= -0.01 and conf >= 45: return "watch" return "pass" def _confidence_display_remap(raw_score: float | None) -> float | None: try: raw = float(raw_score) except Exception: return None if raw <= 40.0: return max(1.0, min(100.0, raw)) return max(1.0, min(100.0, 40.0 + ((raw - 40.0) * 1.45))) def _normalize_confidence_components(value: Any) -> list[dict[str, Any]]: if not isinstance(value, list): return [] normalized: list[dict[str, Any]] = [] for item in value: if not isinstance(item, dict): continue label = str(item.get("label") or "").strip() if not label: continue try: component_value = float(item.get("value") or 0.0) except Exception: component_value = 0.0 normalized.append( { "label": label, "value": round(component_value, 1), "direction": str(item.get("direction") or "").strip().lower() or None, } ) return normalized def _select_confidence_primary_driver( penalties: list[dict[str, Any]], bonuses: list[dict[str, Any]], ) -> dict[str, Any] | None: penalty_candidates = [item for item in penalties if float(item.get("value") or 0.0) > 0.0] bonus_candidates = [item for item in bonuses if float(item.get("value") or 0.0) > 0.0] if penalty_candidates: return max(penalty_candidates, key=lambda item: float(item.get("value") or 0.0)) if bonus_candidates: return max(bonus_candidates, key=lambda item: float(item.get("value") or 0.0)) return None def _build_strikeout_confidence_payload( probability_result: dict[str, Any], ) -> dict[str, Any]: source = str(probability_result.get("confidence_source") or "strikeout_v2_live") raw_score = probability_result.get("confidence_score_raw", probability_result.get("confidence_score")) raw_bucket = probability_result.get("confidence_bucket") reasons = list(probability_result.get("confidence_reasons") or []) bonuses = _normalize_confidence_components(probability_result.get("confidence_component_bonuses")) penalties = _normalize_confidence_components(probability_result.get("confidence_component_penalties")) raw_score_float = float(raw_score) if raw_score is not None else None display_score = _confidence_display_remap(raw_score_float) display_bucket = None if display_score is not None: if display_score >= 75: display_bucket = "high" elif display_score >= 55: display_bucket = "medium" else: display_bucket = "low" primary_driver = _select_confidence_primary_driver(penalties, bonuses) summary_label = str((primary_driver or {}).get("label") or "").strip() or None return { "confidence_score_raw": round(raw_score_float, 1) if raw_score_float is not None else None, "confidence_score_display": round(display_score, 1) if display_score is not None else None, "confidence_source": source, "confidence_component_bonuses": bonuses, "confidence_component_penalties": penalties, "confidence_primary_driver": primary_driver, "confidence_summary_label": summary_label, "confidence_bucket_raw": raw_bucket, "confidence_bucket_display": display_bucket, "confidence_reasons": reasons[:5], } def _classify_strikeout_probability_status( *, fair_prob: float | None, implied: float | None, pitcher_name: str, probability_result: dict[str, Any], ) -> str: if fair_prob is not None: return "modeled_ok" if implied is not None else "missing_implied_prob" if not str(pitcher_name or "").strip(): return "missing_pitcher_context" if str(probability_result.get("pitcher_resolution_status") or "").strip().lower() == "unresolved": return "missing_pitcher_context" if str(probability_result.get("projected_starter_match_status") or "").strip().lower() == "resolved_pitcher_mismatch": return "projected_starter_mismatch" return "empty_probability_result" def _classify_hr_probability_status( *, threshold_int: int, is_modeled: bool, model_prob: float | None, implied: float | None, probability_result: dict[str, Any], statcast_df: pd.DataFrame | None, pitcher_name: str, ) -> str: if threshold_int != 1 or not is_modeled: return "unmodeled_ladder" if model_prob is not None: return "modeled_ok" if implied is not None else "missing_implied_prob" if statcast_df is None or statcast_df.empty: return "missing_baseline" baseline_prob = probability_result.get("baseline_hr_prob") pitcher_status = str(probability_result.get("pitcher_resolution_status") or "").strip().lower() skipped_layers = str(probability_result.get("skipped_layers") or "").strip().lower() batter_rows_missing = baseline_prob is None if batter_rows_missing: return "missing_baseline" if implied is None: return "missing_implied_prob" if not str(pitcher_name or "").strip(): return "missing_pitcher_context" if pitcher_status in {"pitcher_missing", "unresolved", "matchup_incomplete"}: return "missing_pitcher_context" if "pitcher_missing" in skipped_layers or "matchup_incomplete" in skipped_layers: return "missing_pitcher_context" if baseline_prob is not None: return "empty_probability_result" return "unknown" def _infer_batter_team( batter_name: str, batter_statcast_df: pd.DataFrame, ) -> str: if ( batter_statcast_df is None or batter_statcast_df.empty or not batter_name or "player_name" not in batter_statcast_df.columns ): return "" normalized_target = _normalize_person_name(batter_name) player_rows = batter_statcast_df[ batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target ].copy() if player_rows.empty: return "" team_values: list[str] = [] if {"inning_topbot", "home_team", "away_team"}.issubset(player_rows.columns): inning_half = player_rows["inning_topbot"].fillna("").astype(str).str.lower() top_mask = inning_half.str.contains("top") bottom_mask = inning_half.str.contains("bot|bottom") if top_mask.any(): team_values.extend( player_rows.loc[top_mask, "away_team"].dropna().astype(str).tolist() ) if bottom_mask.any(): team_values.extend( player_rows.loc[bottom_mask, "home_team"].dropna().astype(str).tolist() ) for col in ["team", "batter_team", "team_name"]: if col in player_rows.columns: team_values.extend(player_rows[col].dropna().astype(str).tolist()) normalized = [_normalize_team_name(v) for v in team_values if str(v).strip()] if not normalized: return "" return pd.Series(normalized).mode().iloc[0] def _resolve_batter_team( row: Any, batter_name: str, batter_statcast_df: pd.DataFrame, ) -> tuple[str, str]: row_team, row_source = _resolve_batter_team_from_row_context( row=row, batter_name=batter_name, ) if row_team: return (row_team, row_source) away_team = _to_display_name(row.get("away_team")) home_team = _to_display_name(row.get("home_team")) away_norm = _normalize_team_name(away_team) home_norm = _normalize_team_name(home_team) statcast_ok = ( batter_statcast_df is not None and not batter_statcast_df.empty and batter_name and "player_name" in batter_statcast_df.columns ) if statcast_ok: normalized_target = _normalize_person_name(batter_name) player_rows = batter_statcast_df[ batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target ].copy() if not player_rows.empty: if "source_season" in player_rows.columns: current_rows = player_rows[pd.to_numeric(player_rows["source_season"], errors="coerce") == 2026].copy() current_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=current_rows) if current_team: if current_team == away_norm and away_team: return (away_team, "current_season_statcast") if current_team == home_norm and home_team: return (home_team, "current_season_statcast") historical_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=player_rows) if historical_team: if historical_team == away_norm and away_team: return (away_team, "historical_statcast") if historical_team == home_norm and home_team: return (home_team, "historical_statcast") # historical_team doesn't match either current game team (player changed teams); # fall through to roster lookup instead of returning a stale team name # Level 4: current-season MLB roster lookup (handles offseason moves and new players) if batter_name: roster_team = lookup_batter_current_team(batter_name, away_team or "", home_team or "") if roster_team: return (roster_team, "mlb_roster_lookup") return ("", "unresolved") def _resolve_batter_team_from_row_context( row: Any, batter_name: str, ) -> tuple[str, str]: away_team = _to_display_name(row.get("away_team")) home_team = _to_display_name(row.get("home_team")) away_norm = _normalize_team_name(away_team) home_norm = _normalize_team_name(home_team) for key in ("batter_team", "player_team", "team", "team_name"): value = _to_display_name(row.get(key)) value_norm = _normalize_team_name(value) if value_norm == away_norm and away_team: return (away_team, f"row_{key}") if value_norm == home_norm and home_team: return (home_team, f"row_{key}") return ("", "unknown") def _infer_lineup_slot( batter_name: str, batter_statcast_df: pd.DataFrame, ) -> tuple[int | None, str]: if ( batter_statcast_df is None or batter_statcast_df.empty or not batter_name or "player_name" not in batter_statcast_df.columns ): return (None, "unknown") player_rows = batter_statcast_df[ batter_statcast_df["player_name"].astype(str).str.casefold() == batter_name.casefold() ].copy() if player_rows.empty: return (None, "unknown") for col in ["lineup_slot", "lineup_position", "batting_order", "bat_order"]: if col not in player_rows.columns: continue numeric = pd.to_numeric(player_rows[col], errors="coerce").dropna() numeric = numeric[(numeric >= 1) & (numeric <= 9)] if not numeric.empty: mode = numeric.round().astype(int).mode() if not mode.empty: return (int(mode.iloc[0]), "projected") return (None, "unknown") def _resolve_pitcher_hand( pitcher_name: str, pitcher_statcast_df: pd.DataFrame | None, ) -> tuple[str, str]: if not pitcher_name or pitcher_statcast_df is None or pitcher_statcast_df.empty: return ("", "unavailable") if {"player_name", "p_throws"}.issubset(pitcher_statcast_df.columns): direct_rows = pitcher_statcast_df[ pitcher_statcast_df["player_name"].astype(str).map(_normalize_person_name) == _normalize_person_name(pitcher_name) ].copy() if not direct_rows.empty: direct_hand = str(direct_rows.iloc[0].get("p_throws") or "").strip().upper() if direct_hand: return (direct_hand, "statcast_direct") try: pitcher_row = build_pitcher_feature_row( statcast_df=pitcher_statcast_df, pitcher_name=pitcher_name, ) hand = str(pitcher_row.get("p_throws") or "").strip().upper() return (hand, "pitcher_feature_row" if hand else "unavailable") except Exception: return ("", "unavailable") def _resolve_team_total( row: Any, batter_team: str, ) -> tuple[float | None, str]: direct_keys = ["team_total", "implied_team_total", "batter_team_total"] for key in direct_keys: value = row.get(key) try: if value is not None and str(value).strip() not in {"", "nan", "None"}: return (float(value), "projected") except Exception: continue away_norm = _normalize_team_name(row.get("away_team")) home_norm = _normalize_team_name(row.get("home_team")) batter_team_norm = _normalize_team_name(batter_team) if batter_team_norm and batter_team_norm == away_norm: for key in ["away_team_total", "away_implied_total"]: value = row.get(key) try: if value is not None and str(value).strip() not in {"", "nan", "None"}: return (float(value), "projected") except Exception: continue if batter_team_norm and batter_team_norm == home_norm: for key in ["home_team_total", "home_implied_total"]: value = row.get(key) try: if value is not None and str(value).strip() not in {"", "nan", "None"}: return (float(value), "projected") except Exception: continue return (None, "unknown") def _resolve_pitcher_name( row: Any, batter_team: str, probable_starters: dict | None, ) -> tuple[str, str, str]: explicit_pitcher = str( row.get("pitcher_name") or row.get("pitcher") or row.get("opposing_pitcher") or "" ).strip() away_team = str(row.get("away_team") or "").strip() home_team = str(row.get("home_team") or "").strip() if explicit_pitcher and (not away_team or not home_team or not probable_starters): return (explicit_pitcher, "row_explicit", "resolved") if not probable_starters: return ("", "probable_starters_unavailable", "unresolved") if not away_team or not home_team: return ("", "matchup_incomplete", "unresolved") starters = lookup_pitchers_for_game( away_team=away_team, home_team=home_team, starters_map=probable_starters, ) if not starters: return ("", "matchup_not_found", "unresolved") away_norm = _normalize_team_name(away_team) home_norm = _normalize_team_name(home_team) batter_team_norm = _normalize_team_name(batter_team) home_pitcher = str(starters.get("home_pitcher") or "").strip() away_pitcher = str(starters.get("away_pitcher") or "").strip() if explicit_pitcher: if _names_match(home_pitcher, explicit_pitcher) or _names_match(away_pitcher, explicit_pitcher): return (explicit_pitcher, "row_explicit_validated", "resolved") if batter_team_norm and batter_team_norm == away_norm: return ( home_pitcher, "probable_starters_matchup", "resolved", ) if batter_team_norm and batter_team_norm == home_norm: return ( away_pitcher, "probable_starters_matchup", "resolved", ) if home_pitcher and not away_pitcher: return (home_pitcher, "probable_starters_single_side", "resolved") if away_pitcher and not home_pitcher: return (away_pitcher, "probable_starters_single_side", "resolved") if explicit_pitcher: return (explicit_pitcher, "row_explicit_unvalidated", "resolved") return ("", "batter_team_unresolved", "unresolved") def _lookup_projected_starter_context( row: Any, probable_starters: dict | None, ) -> dict[str, Any]: away_team = str(row.get("away_team") or "").strip() home_team = str(row.get("home_team") or "").strip() out = { "projected_home_pitcher": "", "projected_away_pitcher": "", "projected_starter_available": False, "projected_starter_source": "probable_starters_unavailable" if not probable_starters else "matchup_incomplete", "projected_home_pitcher_source": "", "projected_away_pitcher_source": "", "starter_cache_source": "probable_starters_unavailable" if not probable_starters else "matchup_incomplete", "fallback_used": False, } if not probable_starters or not away_team or not home_team: return out starters = lookup_pitchers_for_game( away_team=away_team, home_team=home_team, starters_map=probable_starters, ) if not starters: out["projected_starter_source"] = "matchup_not_found" return out projected_home = str(starters.get("home_pitcher") or "").strip() projected_away = str(starters.get("away_pitcher") or "").strip() out.update( { "projected_home_pitcher": projected_home, "projected_away_pitcher": projected_away, "projected_starter_available": bool(projected_home or projected_away), "projected_starter_source": str(starters.get("starter_cache_source") or "probable_starters_matchup"), "projected_home_pitcher_source": str(starters.get("home_pitcher_source") or ""), "projected_away_pitcher_source": str(starters.get("away_pitcher_source") or ""), "starter_cache_source": str(starters.get("starter_cache_source") or "probable_starters_matchup"), "fallback_used": bool(starters.get("fallback_used")), } ) return out def _projected_starter_match_status( resolved_pitcher_name: str, projected_home_pitcher: str, projected_away_pitcher: str, ) -> str: resolved = str(resolved_pitcher_name or "").strip() if not projected_home_pitcher and not projected_away_pitcher: return "projected_starter_unavailable" if not resolved: return "projected_starter_available_but_unresolved" if _names_match(projected_home_pitcher, resolved): return "matched_projected_home" if _names_match(projected_away_pitcher, resolved): return "matched_projected_away" return "resolved_pitcher_mismatch" def _resolve_pitcher_team_and_opponent( row: Any, pitcher_name: str, probable_starters: dict | None, ) -> tuple[str, str]: away_team = str(row.get("away_team") or "").strip() home_team = str(row.get("home_team") or "").strip() if not away_team or not home_team or not pitcher_name or not probable_starters: return ("", "") starters = lookup_pitchers_for_game( away_team=away_team, home_team=home_team, starters_map=probable_starters, ) if not starters: return ("", "") away_pitcher = str(starters.get("away_pitcher") or "").strip() home_pitcher = str(starters.get("home_pitcher") or "").strip() if _names_match(away_pitcher, pitcher_name): return (away_team, home_team) if _names_match(home_pitcher, pitcher_name): return (home_team, away_team) return ("", "") def _resolve_strikeout_pitcher_name( row: Any, probable_starters: dict | None, ) -> tuple[str, str, str]: explicit_pitcher = _to_display_name(row.get("player_name_raw") or row.get("player_name")) away_team = str(row.get("away_team") or "").strip() home_team = str(row.get("home_team") or "").strip() if not explicit_pitcher and not probable_starters: return ("", "missing_pitcher_name", "unresolved") if not probable_starters or not away_team or not home_team: return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved") starters = lookup_pitchers_for_game( away_team=away_team, home_team=home_team, starters_map=probable_starters, ) if not starters: return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved") projected_home = str(starters.get("home_pitcher") or "").strip() projected_away = str(starters.get("away_pitcher") or "").strip() if explicit_pitcher and ( _names_match(projected_home, explicit_pitcher) or _names_match(projected_away, explicit_pitcher) ): return (explicit_pitcher, "row_explicit_validated", "resolved") if projected_home and not projected_away: return (projected_home, "probable_starters_single_side", "resolved") if projected_away and not projected_home: return (projected_away, "probable_starters_single_side", "resolved") if projected_home and projected_away: return ("", "row_explicit_mismatch", "unresolved") return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved") def _extract_team_batters_from_statcast( team_name: str, batter_statcast_df: pd.DataFrame | None, max_players: int = 9, ) -> list[str]: if ( not team_name or batter_statcast_df is None or batter_statcast_df.empty or "player_name" not in batter_statcast_df.columns ): return [] team_norm = _normalize_team_name(team_name) if not team_norm: return [] working = batter_statcast_df.copy() if "source_season" in working.columns: current_rows = working[pd.to_numeric(working["source_season"], errors="coerce") == 2026].copy() if not current_rows.empty: working = current_rows players = ( working.get("player_name", pd.Series(dtype="object")) .dropna() .astype(str) .tolist() ) if not players: return [] matched_names: list[str] = [] seen_norms: set[str] = set() for player_name in players: inferred_team = _infer_batter_team(player_name, working) if inferred_team != team_norm: continue player_norm = _normalize_person_name(player_name) if not player_norm or player_norm in seen_norms: continue seen_norms.add(player_norm) matched_names.append(player_name) if len(matched_names) >= max_players: break return matched_names def _lookup_baseline_metadata( statcast_df: pd.DataFrame | None, player_name: str, ) -> dict[str, Any]: default = { "baseline_mode": None, "prior_sample_size": None, "season_2026_sample_size": None, "prior_weight": None, "season_2026_weight": None, "baseline_driver": None, "rolling_overlay_active": None, } if ( statcast_df is None or statcast_df.empty or not player_name or "player_name" not in statcast_df.columns ): return default normalized_target = _normalize_person_name(player_name) if not normalized_target: return default normalized_series = statcast_df["player_name"].astype(str).map(_normalize_person_name) rows = statcast_df[normalized_series == normalized_target].copy() if rows.empty: return default first_row = rows.iloc[0] return { "baseline_mode": first_row.get("baseline_mode"), "prior_sample_size": first_row.get("prior_sample_size"), "season_2026_sample_size": first_row.get("season_2026_sample_size"), "prior_weight": first_row.get("prior_weight"), "season_2026_weight": first_row.get("season_2026_weight"), "baseline_driver": first_row.get("baseline_driver"), "rolling_overlay_active": first_row.get("rolling_overlay_active"), } def get_player_hr_prob( player_name_normalized: str, statcast_df: pd.DataFrame, _name_index: dict[str, str] | None = None, ) -> tuple[float | None, str]: """ Backward-compatible wrapper for callers expecting (prob, source). """ name_index = _name_index if _name_index is not None else _build_statcast_name_index(statcast_df) statcast_name = name_index.get(player_name_normalized, player_name_normalized) result = build_hr_probability_result( batter_statcast_df=statcast_df, batter_name=statcast_name, mode="pregame", ) prob = result.get("calibrated_hr_prob") if prob is None: return (None, "unavailable") return (float(prob), "shared_pregame_engine") def map_hr_props_to_model( props_df: pd.DataFrame, statcast_df: pd.DataFrame, prob_fn: Callable[..., Any] | None = None, pitcher_stats_df: pd.DataFrame | None = None, pitcher_statcast_df: pd.DataFrame | None = None, probable_starters: dict | None = None, ) -> pd.DataFrame: """ Join HR prop rows to shared-engine HR probabilities and compute edge. Adds columns: implied_prob, model_hr_prob, model_hr_prob_source, edge and shared-engine diagnostics: baseline_hr_prob, pregame_hr_prob, probability_mode, component adjustment columns, applied_layers, skipped_layers """ del prob_fn if props_df.empty: return pd.DataFrame() hr_df = props_df[props_df["market"] == "hr"].copy() if hr_df.empty: return pd.DataFrame() pitcher_df = ( pitcher_statcast_df if pitcher_statcast_df is not None else pitcher_stats_df if pitcher_stats_df is not None else statcast_df ) name_index = _build_statcast_name_index(statcast_df) runtime_cache: dict[str, Any] = {"name_index": name_index} projected_starter_cache: dict[tuple[str, str, str], dict[str, Any]] = {} batter_team_cache: dict[tuple[str, str, str, str], tuple[str, str]] = {} pitcher_resolution_cache: dict[tuple[str, str, str, str], tuple[str, str, str]] = {} pitcher_hand_cache: dict[str, tuple[Any, Any]] = {} baseline_meta_cache: dict[tuple[int, str], dict[str, Any]] = {} lineup_slot_cache: dict[tuple[str, str, str], tuple[Any, Any]] = {} team_total_cache: dict[tuple[str, str, str, str], tuple[Any, Any]] = {} mapped_rows: list[dict[str, Any]] = [] for _, row in hr_df.iterrows(): odds = row.get("odds_american") batter_name_normalized = str(row.get("player_name") or "").strip() batter_name = name_index.get(batter_name_normalized, batter_name_normalized) threshold = row.get("threshold") try: threshold_int = int(threshold) if threshold is not None and str(threshold).strip() not in {"", "nan", "None"} else 1 except Exception: threshold_int = 1 is_modeled = bool(row.get("is_modeled")) if pd.notna(row.get("is_modeled")) else threshold_int == 1 batter_team_key = ( str(row.get("away_team") or "").strip().lower(), str(row.get("home_team") or "").strip().lower(), str(batter_name or "").strip().lower(), str(row.get("event_id") or "").strip(), ) if batter_team_key not in batter_team_cache: batter_team_cache[batter_team_key] = _resolve_batter_team( row=row, batter_name=batter_name, batter_statcast_df=statcast_df, ) batter_team, batter_team_source = batter_team_cache[batter_team_key] starter_key = ( str(row.get("away_team") or "").strip().lower(), str(row.get("home_team") or "").strip().lower(), str(row.get("event_id") or "").strip(), ) if starter_key not in projected_starter_cache: projected_starter_cache[starter_key] = _lookup_projected_starter_context( row=row, probable_starters=probable_starters, ) projected_starter_context = projected_starter_cache[starter_key] pitcher_resolution_key = ( starter_key[0], starter_key[1], str(batter_team or "").strip().lower(), str(row.get("pitcher_name") or row.get("pitcher") or "").strip().lower(), ) if pitcher_resolution_key not in pitcher_resolution_cache: pitcher_resolution_cache[pitcher_resolution_key] = _resolve_pitcher_name( row=row, batter_team=batter_team, probable_starters=probable_starters, ) pitcher_name, resolved_pitcher_source, pitcher_resolution_status = pitcher_resolution_cache[pitcher_resolution_key] projected_starter_match_status = _projected_starter_match_status( resolved_pitcher_name=pitcher_name, projected_home_pitcher=str(projected_starter_context.get("projected_home_pitcher") or ""), projected_away_pitcher=str(projected_starter_context.get("projected_away_pitcher") or ""), ) pitcher_hand_key = str(pitcher_name or "").strip().lower() if pitcher_hand_key not in pitcher_hand_cache: pitcher_hand_cache[pitcher_hand_key] = _resolve_pitcher_hand( pitcher_name=pitcher_name, pitcher_statcast_df=pitcher_df, ) pitcher_hand, pitcher_hand_source = pitcher_hand_cache[pitcher_hand_key] batter_meta_key = (id(statcast_df), str(batter_name or "").strip().lower()) if batter_meta_key not in baseline_meta_cache: baseline_meta_cache[batter_meta_key] = _lookup_baseline_metadata(statcast_df, batter_name) batter_baseline_meta = baseline_meta_cache[batter_meta_key] pitcher_meta_key = (id(pitcher_df), str(pitcher_name or "").strip().lower()) if pitcher_meta_key not in baseline_meta_cache: baseline_meta_cache[pitcher_meta_key] = _lookup_baseline_metadata(pitcher_df, pitcher_name) pitcher_baseline_meta = baseline_meta_cache[pitcher_meta_key] lineup_slot_key = ( str(batter_team or "").strip().lower(), str(batter_name or "").strip().lower(), str(pitcher_hand or "").strip().upper(), ) if lineup_slot_key not in lineup_slot_cache: lineup_slot, lineup_slot_source = _infer_lineup_slot( batter_name=batter_name, batter_statcast_df=statcast_df, ) lineup_slot_cache[lineup_slot_key] = (lineup_slot, lineup_slot_source) lineup_slot, lineup_slot_source = lineup_slot_cache[lineup_slot_key] team_total_key = ( str(row.get("away_team") or "").strip().lower(), str(row.get("home_team") or "").strip().lower(), str(batter_team or "").strip().lower(), str(row.get("event_id") or "").strip(), str(row.get("sportsbook") or "").strip().lower(), str(row.get("team_total") or row.get("away_team_total") or row.get("home_team_total") or "").strip(), ) if team_total_key not in team_total_cache: team_total_cache[team_total_key] = _resolve_team_total(row=row, batter_team=batter_team) team_total, team_total_source = team_total_cache[team_total_key] try: implied = american_to_implied_prob(odds) if odds is not None else None except Exception: implied = None if is_modeled: probability_result = build_hr_probability_result( batter_statcast_df=statcast_df, batter_name=batter_name, pitcher_statcast_df=pitcher_df, pitcher_name=pitcher_name, game_row={ **_build_game_context_from_row(row), "lineup_slot": lineup_slot, "lineup_slot_source": lineup_slot_source, "team_total": team_total, "team_total_source": team_total_source, "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), "projected_starter_available": projected_starter_context.get("projected_starter_available"), "projected_starter_match_status": projected_starter_match_status, }, weather_row=None, mode="pregame", runtime_cache=runtime_cache, ) model_prob = probability_result.get("calibrated_hr_prob") if model_prob is not None and implied is not None: edge = compute_edge(model_prob, implied) bet_ev = compute_bet_ev(model_prob, odds) if odds is not None else None source = "shared_pregame_engine" else: edge = None bet_ev = None source = "unavailable" else: probability_result = { "baseline_hr_prob": None, "raw_hr_prob": None, "calibrated_hr_prob": None, "pregame_hr_prob": None, "mode": "pregame", "applied_layers": "", "skipped_layers": "unmodeled_hr_ladder", "confidence_score": None, "confidence_bucket": None, "confidence_reasons": [], "opportunity_hr_adjustment": None, "expected_pa": None, "pa_multiplier": None, "lineup_slot_used": lineup_slot, "lineup_slot_source": lineup_slot_source, "team_total_used": team_total, "team_total_source": team_total_source, "opportunity_mode": None, "opportunity_reason": None, "pregame_pitcher_context_adj": None, "pregame_park_context_adj": None, "pregame_weather_context_adj": None, "pregame_context_applied": False, "pitcher_hr_adjustment": None, "trend_hr_adjustment": None, "zone_hr_adjustment": None, "family_zone_hr_adjustment": None, "arsenal_hr_adjustment": None, "pulled_contact_hr_adjustment": None, "env_hr_adjustment": None, "park_hr_adjustment": None, "weather_hr_adjustment": None, "platoon_hr_adjustment": None, "trajectory_hr_adjustment": None, "rolling_hr_adjustment": None, "pitcher_reliability": None, "trend_reliability": None, "zone_reliability": None, "family_zone_reliability": None, "arsenal_reliability": None, "pulled_contact_reliability": None, "environment_reliability": None, "trajectory_reliability": None, "rolling_reliability": None, "opportunity_reliability": None, "matchup_platoon_multiplier": None, "matchup_platoon_reason": "unmodeled_hr_ladder", "pitcher_resolution_status": "unmodeled_hr_ladder", "zone_status": "unmodeled_hr_ladder", "family_zone_status": "unmodeled_hr_ladder", "arsenal_status": "unmodeled_hr_ladder", "reason_candidate_count": 0, "zone_store_sample_size": None, "family_zone_batter_sample_size": None, "family_zone_pitcher_sample_size": None, "arsenal_batter_sample_size": None, "arsenal_pitcher_sample_size": None, "model_voice_reason_candidates": [], "model_voice_tags": [], } model_prob = None edge = None bet_ev = None source = "unmodeled_hr_ladder" probability_status = _classify_hr_probability_status( threshold_int=threshold_int, is_modeled=is_modeled, model_prob=model_prob, implied=implied, probability_result=probability_result, statcast_df=statcast_df, pitcher_name=pitcher_name, ) row_dict = row.to_dict() row_dict.update( { "implied_prob": implied, "model_hr_prob": model_prob, "fair_prob": model_prob, "model_hr_prob_source": source, "model_hr_prob_source_detail": probability_result.get("applied_layers", ""), "edge": edge, "bet_ev": bet_ev, "baseline_hr_prob": probability_result.get("baseline_hr_prob"), "raw_hr_prob": probability_result.get("raw_hr_prob"), "calibrated_hr_prob": probability_result.get("calibrated_hr_prob"), "pregame_hr_prob": probability_result.get("pregame_hr_prob"), "probability_mode": probability_result.get("mode"), "formula_version": probability_result.get("formula_version"), "is_modeled": is_modeled, "threshold": threshold_int, "confidence_score": probability_result.get("confidence_score"), "confidence_bucket": probability_result.get("confidence_bucket"), "confidence_reasons": probability_result.get("confidence_reasons"), "opportunity_hr_adjustment": probability_result.get("opportunity_hr_adjustment"), "expected_pa": probability_result.get("expected_pa"), "pa_multiplier": probability_result.get("pa_multiplier"), "lineup_slot_used": probability_result.get("lineup_slot_used", lineup_slot), "lineup_slot_source": probability_result.get("lineup_slot_source", lineup_slot_source), "team_total_used": probability_result.get("team_total_used", team_total), "team_total_source": probability_result.get("team_total_source", team_total_source), "opportunity_mode": probability_result.get("opportunity_mode"), "opportunity_reason": probability_result.get("opportunity_reason"), "pregame_pitcher_context_adj": probability_result.get("pregame_pitcher_context_adj"), "pregame_park_context_adj": probability_result.get("pregame_park_context_adj"), "pregame_weather_context_adj": probability_result.get("pregame_weather_context_adj"), "pregame_context_applied": probability_result.get("pregame_context_applied", False), "pitcher_hr_adjustment": probability_result.get("pitcher_hr_adjustment"), "trend_hr_adjustment": probability_result.get("trend_hr_adjustment"), "zone_hr_adjustment": probability_result.get("zone_hr_adjustment"), "family_zone_hr_adjustment": probability_result.get("family_zone_hr_adjustment"), "arsenal_hr_adjustment": probability_result.get("arsenal_hr_adjustment"), "pulled_contact_hr_adjustment": probability_result.get("pulled_contact_hr_adjustment"), "env_hr_adjustment": probability_result.get("env_hr_adjustment"), "park_hr_adjustment": probability_result.get("park_hr_adjustment"), "weather_hr_adjustment": probability_result.get("weather_hr_adjustment"), "platoon_hr_adjustment": probability_result.get("platoon_hr_adjustment"), "trajectory_hr_adjustment": probability_result.get("trajectory_hr_adjustment"), "rolling_hr_adjustment": probability_result.get("rolling_hr_adjustment"), "damage_zone_alignment_subscore": probability_result.get("damage_zone_alignment_subscore"), "pitch_mix_exposure_subscore": probability_result.get("pitch_mix_exposure_subscore"), "tunnel_damage_subscore": probability_result.get("tunnel_damage_subscore"), "count_pattern_damage_subscore": probability_result.get("count_pattern_damage_subscore"), "handedness_damage_subscore": probability_result.get("handedness_damage_subscore"), "arsenal_fit_subscore": probability_result.get("arsenal_fit_subscore"), "environment_amplification_subscore": probability_result.get("environment_amplification_subscore"), "hr_opportunity_projection": probability_result.get("hr_opportunity_projection"), "matchup_coverage_confidence": probability_result.get("matchup_coverage_confidence"), "component_source_map": probability_result.get("component_source_map"), "expected_pitch_mix_by_count": probability_result.get("expected_pitch_mix_by_count"), "expected_zone_mix_by_count": probability_result.get("expected_zone_mix_by_count"), "expected_pitch_zone_mix_by_count": probability_result.get("expected_pitch_zone_mix_by_count"), "tunnel_pair_scores": probability_result.get("tunnel_pair_scores"), "predicted_attack_regions": probability_result.get("predicted_attack_regions"), "predicted_damage_regions": probability_result.get("predicted_damage_regions"), "predicted_whiff_regions": probability_result.get("predicted_whiff_regions"), "pitcher_reliability": probability_result.get("pitcher_reliability"), "trend_reliability": probability_result.get("trend_reliability"), "zone_reliability": probability_result.get("zone_reliability"), "family_zone_reliability": probability_result.get("family_zone_reliability"), "arsenal_reliability": probability_result.get("arsenal_reliability"), "pulled_contact_reliability": probability_result.get("pulled_contact_reliability"), "environment_reliability": probability_result.get("environment_reliability"), "trajectory_reliability": probability_result.get("trajectory_reliability"), "rolling_reliability": probability_result.get("rolling_reliability"), "opportunity_reliability": probability_result.get("opportunity_reliability"), "applied_layers": probability_result.get("applied_layers"), "skipped_layers": probability_result.get("skipped_layers"), "matchup_platoon_multiplier": probability_result.get("matchup_platoon_multiplier"), "matchup_platoon_reason": probability_result.get("matchup_platoon_reason"), "resolved_pitcher_name": pitcher_name, "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), "projected_starter_available": projected_starter_context.get("projected_starter_available"), "projected_starter_source": projected_starter_context.get("projected_starter_source"), "projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"), "projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"), "starter_cache_source": projected_starter_context.get("starter_cache_source"), "fallback_used": projected_starter_context.get("fallback_used"), "projected_starter_match_status": projected_starter_match_status, "batter_team": batter_team, "batter_team_source": batter_team_source, "resolved_pitcher_source": resolved_pitcher_source, "pitcher_resolution_status": probability_result.get("pitcher_resolution_status", pitcher_resolution_status), "pitcher_hand": pitcher_hand, "pitcher_hand_source": pitcher_hand_source, "zone_status": probability_result.get("zone_status"), "family_zone_status": probability_result.get("family_zone_status"), "arsenal_status": probability_result.get("arsenal_status"), "reason_candidate_count": probability_result.get("reason_candidate_count"), "zone_store_sample_size": probability_result.get("zone_store_sample_size"), "family_zone_batter_sample_size": probability_result.get("family_zone_batter_sample_size"), "family_zone_pitcher_sample_size": probability_result.get("family_zone_pitcher_sample_size"), "arsenal_batter_sample_size": probability_result.get("arsenal_batter_sample_size"), "arsenal_pitcher_sample_size": probability_result.get("arsenal_pitcher_sample_size"), "model_voice_reason_candidates": probability_result.get("model_voice_reason_candidates", []), "model_voice_tags": probability_result.get("model_voice_tags", []), "selection_scope": row.get("selection_scope") or "player", "expected_modeled_hr_row": bool(threshold_int == 1 and str(row.get("market_family") or row.get("market") or "").strip().lower() == "hr"), "has_model_probability": model_prob is not None, "has_modeled_edge": edge is not None, "model_probability_status": probability_status, "modeled_row_available": model_prob is not None, "modeled_row_missing_reason": None if model_prob is not None else probability_status, "baseline_mode": batter_baseline_meta.get("baseline_mode"), "prior_sample_size": batter_baseline_meta.get("prior_sample_size"), "season_2026_sample_size": batter_baseline_meta.get("season_2026_sample_size"), "prior_weight": batter_baseline_meta.get("prior_weight"), "season_2026_weight": batter_baseline_meta.get("season_2026_weight"), "baseline_driver": batter_baseline_meta.get("baseline_driver"), "rolling_overlay_active": batter_baseline_meta.get("rolling_overlay_active"), "pitcher_baseline_mode": pitcher_baseline_meta.get("baseline_mode"), "pitcher_prior_sample_size": pitcher_baseline_meta.get("prior_sample_size"), "pitcher_season_2026_sample_size": pitcher_baseline_meta.get("season_2026_sample_size"), "pitcher_prior_weight": pitcher_baseline_meta.get("prior_weight"), "pitcher_season_2026_weight": pitcher_baseline_meta.get("season_2026_weight"), "pitcher_baseline_driver": pitcher_baseline_meta.get("baseline_driver"), "pitcher_rolling_overlay_active": pitcher_baseline_meta.get("rolling_overlay_active"), } ) row_dict["verdict"] = _compute_verdict( bet_ev=bet_ev, edge=edge, confidence_score=row_dict.get("confidence_score"), is_modeled=is_modeled, ) row_dict.update(build_hr_model_voice(row_dict)) mapped_rows.append(row_dict) result = pd.DataFrame(mapped_rows) if result.empty: return result has_edge = result["edge"].notna() with_edge = result[has_edge].sort_values("edge", ascending=False) without_edge = result[~has_edge] ordered = pd.concat([with_edge, without_edge], ignore_index=True) try: from analytics.execution_layer import enrich_with_execution_layer return enrich_with_execution_layer(ordered) except Exception: return ordered def map_strikeout_props_to_model( props_df: pd.DataFrame, batter_statcast_df: pd.DataFrame, pitcher_statcast_df: pd.DataFrame | None = None, probable_starters: dict | None = None, ) -> pd.DataFrame: if props_df.empty: return pd.DataFrame() k_df = props_df[props_df["market"].astype(str).str.lower() == "k"].copy() if k_df.empty: return pd.DataFrame() pitcher_df = pitcher_statcast_df if pitcher_statcast_df is not None else batter_statcast_df runtime_cache: dict[str, Any] = {} projected_starter_cache: dict[tuple[str, str, str], dict[str, Any]] = {} pitcher_resolution_cache: dict[tuple[str, str, str], tuple[str, str, str]] = {} team_context_cache: dict[tuple[str, str, str], tuple[str, str]] = {} pitcher_hand_cache: dict[str, tuple[Any, Any]] = {} baseline_meta_cache: dict[tuple[int, str], dict[str, Any]] = {} lineup_cache: dict[str, list[str]] = {} strikeout_probability_cache: dict[tuple[Any, ...], dict[str, Any]] = {} mapped_rows: list[dict[str, Any]] = [] for _, row in k_df.iterrows(): line = row.get("line") selection_side = str(row.get("selection_side") or "").strip().lower() try: implied = american_to_implied_prob(row.get("odds_american")) if row.get("odds_american") is not None else None except Exception: implied = None starter_key = ( str(row.get("away_team") or "").strip().lower(), str(row.get("home_team") or "").strip().lower(), str(row.get("event_id") or "").strip(), ) if starter_key not in projected_starter_cache: projected_starter_cache[starter_key] = _lookup_projected_starter_context( row=row, probable_starters=probable_starters, ) projected_starter_context = projected_starter_cache[starter_key] pitcher_resolution_key = ( starter_key[0], starter_key[1], str(row.get("pitcher_name") or row.get("pitcher") or row.get("player_name") or "").strip().lower(), ) if pitcher_resolution_key not in pitcher_resolution_cache: pitcher_resolution_cache[pitcher_resolution_key] = _resolve_strikeout_pitcher_name( row=row, probable_starters=probable_starters, ) pitcher_name, resolved_pitcher_source, pitcher_resolution_status = pitcher_resolution_cache[pitcher_resolution_key] if pitcher_resolution_key not in team_context_cache: team_context_cache[pitcher_resolution_key] = _resolve_pitcher_team_and_opponent( row=row, pitcher_name=pitcher_name, probable_starters=probable_starters, ) pitcher_team, opponent_team = team_context_cache[pitcher_resolution_key] projected_starter_match_status = _projected_starter_match_status( resolved_pitcher_name=pitcher_name, projected_home_pitcher=str(projected_starter_context.get("projected_home_pitcher") or ""), projected_away_pitcher=str(projected_starter_context.get("projected_away_pitcher") or ""), ) pitcher_hand_key = str(pitcher_name or "").strip().lower() if pitcher_hand_key not in pitcher_hand_cache: pitcher_hand_cache[pitcher_hand_key] = _resolve_pitcher_hand(pitcher_name=pitcher_name, pitcher_statcast_df=pitcher_df) pitcher_hand, _ = pitcher_hand_cache[pitcher_hand_key] pitcher_meta_key = (id(pitcher_df), str(pitcher_name or "").strip().lower()) if pitcher_meta_key not in baseline_meta_cache: baseline_meta_cache[pitcher_meta_key] = _lookup_baseline_metadata(pitcher_df, pitcher_name) pitcher_baseline_meta = baseline_meta_cache[pitcher_meta_key] lineup_key = str(opponent_team or "").strip().lower() if lineup_key not in lineup_cache: lineup_cache[lineup_key] = _extract_team_batters_from_statcast( team_name=opponent_team, batter_statcast_df=batter_statcast_df, ) opponent_batters = lineup_cache[lineup_key] canonical_game_row = _build_game_context_from_row(row) canonical_game_row.update( { "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), "projected_starter_available": projected_starter_context.get("projected_starter_available"), "projected_starter_source": projected_starter_context.get("projected_starter_source"), "projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"), "projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"), "starter_cache_source": projected_starter_context.get("starter_cache_source"), "fallback_used": projected_starter_context.get("fallback_used"), "projected_starter_match_status": projected_starter_match_status, "resolved_pitcher_name": pitcher_name, "resolved_pitcher_source": resolved_pitcher_source, "pitcher_resolution_status": pitcher_resolution_status, "pitcher_team": pitcher_team, "opponent_team": opponent_team, } ) line_value = float(line) if line is not None and str(line).strip() not in {"", "nan", "None"} else None probability_cache_key = ( str(pitcher_name or "").strip().lower(), tuple(str(name or "").strip().lower() for name in opponent_batters), str(opponent_team or "").strip().lower(), line_value, str(selection_side or "").strip().lower(), str(canonical_game_row.get("away_team") or "").strip().lower(), str(canonical_game_row.get("home_team") or "").strip().lower(), str(canonical_game_row.get("projected_starter_match_status") or "").strip().lower(), ) if probability_cache_key not in strikeout_probability_cache: strikeout_probability_cache[probability_cache_key] = build_strikeout_probability_result_v2( pitcher_statcast_df=pitcher_df, pitcher_name=pitcher_name, batter_statcast_df=batter_statcast_df, opponent_batters=opponent_batters, opponent_team=opponent_team, line=line_value, selection_side=selection_side, game_row=canonical_game_row, runtime_cache=runtime_cache, ) probability_result_v2 = strikeout_probability_cache[probability_cache_key] confidence_payload = _build_strikeout_confidence_payload( probability_result=probability_result_v2, ) fair_prob = probability_result_v2.get("fair_prob") probability_status = _classify_strikeout_probability_status( fair_prob=fair_prob, implied=implied, pitcher_name=pitcher_name, probability_result={ **probability_result_v2, "pitcher_resolution_status": pitcher_resolution_status, "projected_starter_match_status": projected_starter_match_status, }, ) if fair_prob is not None and implied is not None: edge = compute_edge(fair_prob, implied) bet_ev = compute_bet_ev(fair_prob, row.get("odds_american")) if row.get("odds_american") is not None else None source = "shared_strikeout_engine_v2" is_modeled = True else: edge = None bet_ev = None source = "unavailable" is_modeled = False row_dict = row.to_dict() row_dict.update( { "selection_scope": row.get("selection_scope") or "pitcher", "is_modeled": is_modeled, "implied_prob": implied, "fair_prob": fair_prob, "model_k_prob": fair_prob, "bet_ev": bet_ev, "edge": edge, "confidence_score": confidence_payload.get("confidence_score_display"), "confidence_bucket": confidence_payload.get("confidence_bucket_display"), "confidence_reasons": confidence_payload.get("confidence_reasons"), "confidence_score_raw": confidence_payload.get("confidence_score_raw"), "confidence_score_display": confidence_payload.get("confidence_score_display"), "confidence_source": confidence_payload.get("confidence_source"), "confidence_component_bonuses": confidence_payload.get("confidence_component_bonuses"), "confidence_component_penalties": confidence_payload.get("confidence_component_penalties"), "confidence_primary_driver": confidence_payload.get("confidence_primary_driver"), "confidence_summary_label": confidence_payload.get("confidence_summary_label"), "confidence_bucket_raw": confidence_payload.get("confidence_bucket_raw"), "confidence_bucket_display": confidence_payload.get("confidence_bucket_display"), "expected_strikeouts": probability_result_v2.get("expected_strikeouts"), "expected_strikeouts_v2": probability_result_v2.get("expected_strikeouts_v2"), "projected_pitch_count": probability_result_v2.get("projected_pitch_count"), "projected_batters_faced": probability_result_v2.get("projected_batters_faced"), "projected_innings": probability_result_v2.get("projected_innings"), "pitches_per_bf": probability_result_v2.get("pitches_per_bf"), "opportunity_confidence": probability_result_v2.get("opportunity_confidence"), "opportunity_reasons": probability_result_v2.get("opportunity_reasons"), "projected_k_rate": probability_result_v2.get("projected_k_rate"), "fair_prob_v2": probability_result_v2.get("fair_prob_v2"), "raw_k_prob_v2": probability_result_v2.get("raw_k_prob_v2"), "calibrated_k_prob_v2": probability_result_v2.get("calibrated_k_prob_v2"), "confidence_score_v2": probability_result_v2.get("confidence_score_v2"), "confidence_score_raw_v2": probability_result_v2.get("confidence_score_raw_v2"), "confidence_score_display_v2": probability_result_v2.get("confidence_score_display_v2"), "confidence_source_v2": probability_result_v2.get("confidence_source_v2"), "confidence_bucket_v2": probability_result_v2.get("confidence_bucket_v2"), "confidence_reasons_v2": probability_result_v2.get("confidence_reasons_v2"), "confidence_component_bonuses_v2": probability_result_v2.get("confidence_component_bonuses_v2"), "confidence_component_penalties_v2": probability_result_v2.get("confidence_component_penalties_v2"), "confidence_primary_driver_v2": probability_result_v2.get("confidence_primary_driver_v2"), "confidence_summary_label_v2": probability_result_v2.get("confidence_summary_label_v2"), "k_rate_pitch_signal": probability_result_v2.get("k_rate_pitch_signal"), "k_rate_anchor": probability_result_v2.get("k_rate_anchor"), "bb_rate_anchor": probability_result_v2.get("bb_rate_anchor"), "command_efficiency_signal": probability_result_v2.get("command_efficiency_signal"), "swing_miss_subscore": probability_result_v2.get("swing_miss_subscore"), "called_strike_subscore": probability_result_v2.get("called_strike_subscore"), "command_efficiency_subscore": probability_result_v2.get("command_efficiency_subscore"), "lineup_whiff_subscore": probability_result_v2.get("lineup_whiff_subscore"), "zone_matchup_subscore": probability_result_v2.get("zone_matchup_subscore"), "family_zone_matchup_subscore": probability_result_v2.get("family_zone_matchup_subscore"), "arsenal_fit_subscore": probability_result_v2.get("arsenal_fit_subscore"), "tunneling_subscore": probability_result_v2.get("tunneling_subscore"), "release_consistency_subscore": probability_result_v2.get("release_consistency_subscore"), "sequencing_subscore": probability_result_v2.get("sequencing_subscore"), "count_leverage_subscore": probability_result_v2.get("count_leverage_subscore"), "leash_risk_subscore": probability_result_v2.get("leash_risk_subscore"), "role_certainty_score": probability_result_v2.get("role_certainty_score"), "times_through_order_penalty": probability_result_v2.get("times_through_order_penalty"), "telemetry_path_status": probability_result_v2.get("telemetry_path_status"), "model_tier": probability_result_v2.get("model_tier"), "variance_band_low": probability_result_v2.get("variance_band_low"), "variance_band_high": probability_result_v2.get("variance_band_high"), "matchup_coverage_confidence": probability_result_v2.get("matchup_coverage_confidence"), "component_source_map": probability_result_v2.get("component_source_map"), "predicted_whiff_regions": probability_result_v2.get("predicted_whiff_regions"), "predicted_attack_regions": probability_result_v2.get("predicted_attack_regions"), "predicted_damage_regions": probability_result_v2.get("predicted_damage_regions"), "tunnel_pair_scores": probability_result_v2.get("tunnel_pair_scores"), "formula_version": probability_result_v2.get("formula_version"), "pitcher_swstr_rate": probability_result_v2.get("pitcher_swstr_rate"), "pitcher_csw_rate": probability_result_v2.get("pitcher_csw_rate"), "pitcher_ball_rate": probability_result_v2.get("pitcher_ball_rate"), "arsenal_whiff_risk": probability_result_v2.get("arsenal_fit_subscore"), "family_zone_whiff_risk": probability_result_v2.get("family_zone_matchup_subscore"), "zone_whiff_risk": probability_result_v2.get("zone_matchup_subscore"), "trajectory_tunnel_score": probability_result_v2.get("tunneling_subscore"), "trajectory_release_consistency_score": probability_result_v2.get("release_consistency_subscore"), "sequencing_score": probability_result_v2.get("sequencing_subscore"), "applied_layers": probability_result_v2.get("applied_layers"), "skipped_layers": probability_result_v2.get("skipped_layers"), "model_k_prob_source": source, "model_k_prob_source_detail": probability_result_v2.get("applied_layers", ""), "resolved_pitcher_name": pitcher_name, "resolved_pitcher_source": resolved_pitcher_source, "projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"), "projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"), "projected_starter_available": projected_starter_context.get("projected_starter_available"), "projected_starter_source": projected_starter_context.get("projected_starter_source"), "projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"), "projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"), "starter_cache_source": projected_starter_context.get("starter_cache_source"), "fallback_used": projected_starter_context.get("fallback_used"), "projected_starter_match_status": projected_starter_match_status, "pitcher_resolution_status": pitcher_resolution_status, "pitcher_team": pitcher_team, "opponent_team": opponent_team, "has_model_probability": fair_prob is not None, "has_modeled_edge": edge is not None, "model_probability_status": probability_status, "modeled_row_available": fair_prob is not None, "modeled_row_missing_reason": None if fair_prob is not None else probability_status, "baseline_mode": pitcher_baseline_meta.get("baseline_mode"), "prior_sample_size": pitcher_baseline_meta.get("prior_sample_size"), "season_2026_sample_size": pitcher_baseline_meta.get("season_2026_sample_size"), "prior_weight": pitcher_baseline_meta.get("prior_weight"), "season_2026_weight": pitcher_baseline_meta.get("season_2026_weight"), "baseline_driver": pitcher_baseline_meta.get("baseline_driver"), "rolling_overlay_active": pitcher_baseline_meta.get("rolling_overlay_active"), } ) row_dict["verdict"] = _compute_verdict( bet_ev=bet_ev, edge=edge, confidence_score=row_dict.get("confidence_score"), is_modeled=is_modeled, ) row_dict.update(build_strikeout_model_voice(row_dict)) mapped_rows.append(row_dict) return pd.DataFrame(mapped_rows) def map_no_home_run_props( props_df: pd.DataFrame, ) -> pd.DataFrame: if props_df.empty: return pd.DataFrame() no_hr_df = props_df[props_df["market_family"].astype(str).str.lower() == "no_hr"].copy() if no_hr_df.empty: return pd.DataFrame() for idx, row in no_hr_df.iterrows(): implied = american_to_implied_prob(row.get("odds_american")) if row.get("odds_american") is not None else None no_hr_df.at[idx, "selection_scope"] = "game" no_hr_df.at[idx, "implied_prob"] = implied no_hr_df.at[idx, "fair_prob"] = None no_hr_df.at[idx, "edge"] = None no_hr_df.at[idx, "bet_ev"] = None no_hr_df.at[idx, "confidence_score"] = None no_hr_df.at[idx, "confidence_bucket"] = None no_hr_df.at[idx, "confidence_reasons"] = ["No-HR fair probability model not active yet"] no_hr_df.at[idx, "verdict"] = "tracked" no_hr_df.at[idx, "model_voice_for"] = "Market is tracked for future release" no_hr_df.at[idx, "model_voice_against"] = "No-HR fair probability model is not active yet" return no_hr_df def map_props_to_models( props_df: pd.DataFrame, statcast_df: pd.DataFrame, pitcher_statcast_df: pd.DataFrame | None = None, probable_starters: dict | None = None, ) -> pd.DataFrame: frames: list[pd.DataFrame] = [] hr_df = map_hr_props_to_model( props_df, statcast_df, pitcher_statcast_df=pitcher_statcast_df, probable_starters=probable_starters, ) if not hr_df.empty: frames.append(hr_df) k_df = map_strikeout_props_to_model( props_df, batter_statcast_df=statcast_df, pitcher_statcast_df=pitcher_statcast_df, probable_starters=probable_starters, ) if not k_df.empty: frames.append(k_df) no_hr_df = map_no_home_run_props(props_df) if not no_hr_df.empty: frames.append(no_hr_df) if not frames: return pd.DataFrame() return pd.concat(frames, ignore_index=True, sort=False)