2026_MLB_Model / analytics /props_mapper.py
Syntrex's picture
Fix pitcher resolution: add MLB roster fallback for batter team lookup
f2d8d50
raw
history blame
71.1 kB
"""
analytics/props_mapper.py
Maps sportsbook HR prop rows to the shared HR probability engine and computes
edge for the Props page.
"""
from __future__ import annotations
from typing import Any, Callable
import pandas as pd
from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge
from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice
from data.mlb_starters import lookup_pitchers_for_game, lookup_batter_current_team
from data.odds_name_map import map_odds_name_to_model_name
from models.hr_probability_engine import build_hr_probability_result
from models.pitcher_adjustment import build_pitcher_feature_row
from visualization.cards.player_identity import normalize_for_matching, to_canonical_name
def build_strikeout_probability_result_v2(*args, **kwargs):
from models.strikeout_probability_engine_v2 import (
build_strikeout_probability_result_v2 as _build_strikeout_probability_result_v2,
)
return _build_strikeout_probability_result_v2(*args, **kwargs)
def _build_statcast_name_index(statcast_df: pd.DataFrame) -> dict[str, str]:
if statcast_df.empty or "player_name" not in statcast_df.columns:
return {}
index: dict[str, str] = {}
for name in statcast_df["player_name"].astype(str).unique():
normalized = map_odds_name_to_model_name(name)
if normalized not in index:
index[normalized] = name
canonical = to_canonical_name(name)
if canonical != name:
canonical_norm = map_odds_name_to_model_name(canonical)
if canonical_norm not in index:
index[canonical_norm] = name
suffix_stripped = normalize_for_matching(canonical)
if suffix_stripped and suffix_stripped not in index:
index[suffix_stripped] = name
return index
def _build_game_context_from_row(row: Any) -> dict[str, Any]:
return {
"away_team": str(row.get("away_team", "") or "").strip(),
"home_team": str(row.get("home_team", "") or "").strip(),
"venue": str(
row.get("venue")
or row.get("stadium")
or row.get("venue_name")
or row.get("park")
or ""
).strip(),
"game_datetime_utc": str(
row.get("game_datetime_utc")
or row.get("commence_time")
or ""
).strip(),
"game_date": str(row.get("game_date", "") or "").strip(),
"lineup_slot": row.get("lineup_slot"),
"lineup_slot_source": row.get("lineup_slot_source"),
"team_total": row.get("team_total"),
"team_total_source": row.get("team_total_source"),
}
def _normalize_team_name(value: Any) -> str:
return " ".join(str(value or "").strip().lower().split())
def _to_display_name(value: Any) -> str:
return str(value or "").strip()
def _normalize_person_name(value: Any) -> str:
return normalize_for_matching(to_canonical_name(str(value or "").strip()))
def _names_match(left: Any, right: Any) -> bool:
left_norm = _normalize_person_name(left)
right_norm = _normalize_person_name(right)
return bool(left_norm and right_norm and left_norm == right_norm)
def _compute_verdict(
bet_ev: float | None,
edge: float | None,
confidence_score: float | None,
is_modeled: bool,
) -> str:
if not is_modeled:
return "tracked"
try:
ev = float(bet_ev if bet_ev is not None else -9.0)
ed = float(edge if edge is not None else -9.0)
conf = float(confidence_score if confidence_score is not None else 0.0)
except Exception:
return "pass"
if ev >= 0.05 and ed >= 0.01 and conf >= 62:
return "bet"
if ev >= -0.03 and ed >= -0.01 and conf >= 45:
return "watch"
return "pass"
def _confidence_display_remap(raw_score: float | None) -> float | None:
try:
raw = float(raw_score)
except Exception:
return None
if raw <= 40.0:
return max(1.0, min(100.0, raw))
return max(1.0, min(100.0, 40.0 + ((raw - 40.0) * 1.45)))
def _normalize_confidence_components(value: Any) -> list[dict[str, Any]]:
if not isinstance(value, list):
return []
normalized: list[dict[str, Any]] = []
for item in value:
if not isinstance(item, dict):
continue
label = str(item.get("label") or "").strip()
if not label:
continue
try:
component_value = float(item.get("value") or 0.0)
except Exception:
component_value = 0.0
normalized.append(
{
"label": label,
"value": round(component_value, 1),
"direction": str(item.get("direction") or "").strip().lower() or None,
}
)
return normalized
def _select_confidence_primary_driver(
penalties: list[dict[str, Any]],
bonuses: list[dict[str, Any]],
) -> dict[str, Any] | None:
penalty_candidates = [item for item in penalties if float(item.get("value") or 0.0) > 0.0]
bonus_candidates = [item for item in bonuses if float(item.get("value") or 0.0) > 0.0]
if penalty_candidates:
return max(penalty_candidates, key=lambda item: float(item.get("value") or 0.0))
if bonus_candidates:
return max(bonus_candidates, key=lambda item: float(item.get("value") or 0.0))
return None
def _build_strikeout_confidence_payload(
probability_result: dict[str, Any],
) -> dict[str, Any]:
source = str(probability_result.get("confidence_source") or "strikeout_v2_live")
raw_score = probability_result.get("confidence_score_raw", probability_result.get("confidence_score"))
raw_bucket = probability_result.get("confidence_bucket")
reasons = list(probability_result.get("confidence_reasons") or [])
bonuses = _normalize_confidence_components(probability_result.get("confidence_component_bonuses"))
penalties = _normalize_confidence_components(probability_result.get("confidence_component_penalties"))
raw_score_float = float(raw_score) if raw_score is not None else None
display_score = _confidence_display_remap(raw_score_float)
display_bucket = None
if display_score is not None:
if display_score >= 75:
display_bucket = "high"
elif display_score >= 55:
display_bucket = "medium"
else:
display_bucket = "low"
primary_driver = _select_confidence_primary_driver(penalties, bonuses)
summary_label = str((primary_driver or {}).get("label") or "").strip() or None
return {
"confidence_score_raw": round(raw_score_float, 1) if raw_score_float is not None else None,
"confidence_score_display": round(display_score, 1) if display_score is not None else None,
"confidence_source": source,
"confidence_component_bonuses": bonuses,
"confidence_component_penalties": penalties,
"confidence_primary_driver": primary_driver,
"confidence_summary_label": summary_label,
"confidence_bucket_raw": raw_bucket,
"confidence_bucket_display": display_bucket,
"confidence_reasons": reasons[:5],
}
def _classify_strikeout_probability_status(
*,
fair_prob: float | None,
implied: float | None,
pitcher_name: str,
probability_result: dict[str, Any],
) -> str:
if fair_prob is not None:
return "modeled_ok" if implied is not None else "missing_implied_prob"
if not str(pitcher_name or "").strip():
return "missing_pitcher_context"
if str(probability_result.get("pitcher_resolution_status") or "").strip().lower() == "unresolved":
return "missing_pitcher_context"
if str(probability_result.get("projected_starter_match_status") or "").strip().lower() == "resolved_pitcher_mismatch":
return "projected_starter_mismatch"
return "empty_probability_result"
def _classify_hr_probability_status(
*,
threshold_int: int,
is_modeled: bool,
model_prob: float | None,
implied: float | None,
probability_result: dict[str, Any],
statcast_df: pd.DataFrame | None,
pitcher_name: str,
) -> str:
if threshold_int != 1 or not is_modeled:
return "unmodeled_ladder"
if model_prob is not None:
return "modeled_ok" if implied is not None else "missing_implied_prob"
if statcast_df is None or statcast_df.empty:
return "missing_baseline"
baseline_prob = probability_result.get("baseline_hr_prob")
pitcher_status = str(probability_result.get("pitcher_resolution_status") or "").strip().lower()
skipped_layers = str(probability_result.get("skipped_layers") or "").strip().lower()
batter_rows_missing = baseline_prob is None
if batter_rows_missing:
return "missing_baseline"
if implied is None:
return "missing_implied_prob"
if not str(pitcher_name or "").strip():
return "missing_pitcher_context"
if pitcher_status in {"pitcher_missing", "unresolved", "matchup_incomplete"}:
return "missing_pitcher_context"
if "pitcher_missing" in skipped_layers or "matchup_incomplete" in skipped_layers:
return "missing_pitcher_context"
if baseline_prob is not None:
return "empty_probability_result"
return "unknown"
def _infer_batter_team(
batter_name: str,
batter_statcast_df: pd.DataFrame,
) -> str:
if (
batter_statcast_df is None
or batter_statcast_df.empty
or not batter_name
or "player_name" not in batter_statcast_df.columns
):
return ""
normalized_target = _normalize_person_name(batter_name)
player_rows = batter_statcast_df[
batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target
].copy()
if player_rows.empty:
return ""
team_values: list[str] = []
if {"inning_topbot", "home_team", "away_team"}.issubset(player_rows.columns):
inning_half = player_rows["inning_topbot"].fillna("").astype(str).str.lower()
top_mask = inning_half.str.contains("top")
bottom_mask = inning_half.str.contains("bot|bottom")
if top_mask.any():
team_values.extend(
player_rows.loc[top_mask, "away_team"].dropna().astype(str).tolist()
)
if bottom_mask.any():
team_values.extend(
player_rows.loc[bottom_mask, "home_team"].dropna().astype(str).tolist()
)
for col in ["team", "batter_team", "team_name"]:
if col in player_rows.columns:
team_values.extend(player_rows[col].dropna().astype(str).tolist())
normalized = [_normalize_team_name(v) for v in team_values if str(v).strip()]
if not normalized:
return ""
return pd.Series(normalized).mode().iloc[0]
def _resolve_batter_team(
row: Any,
batter_name: str,
batter_statcast_df: pd.DataFrame,
) -> tuple[str, str]:
row_team, row_source = _resolve_batter_team_from_row_context(
row=row,
batter_name=batter_name,
)
if row_team:
return (row_team, row_source)
away_team = _to_display_name(row.get("away_team"))
home_team = _to_display_name(row.get("home_team"))
away_norm = _normalize_team_name(away_team)
home_norm = _normalize_team_name(home_team)
statcast_ok = (
batter_statcast_df is not None
and not batter_statcast_df.empty
and batter_name
and "player_name" in batter_statcast_df.columns
)
if statcast_ok:
normalized_target = _normalize_person_name(batter_name)
player_rows = batter_statcast_df[
batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target
].copy()
if not player_rows.empty:
if "source_season" in player_rows.columns:
current_rows = player_rows[pd.to_numeric(player_rows["source_season"], errors="coerce") == 2026].copy()
current_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=current_rows)
if current_team:
if current_team == away_norm and away_team:
return (away_team, "current_season_statcast")
if current_team == home_norm and home_team:
return (home_team, "current_season_statcast")
historical_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=player_rows)
if historical_team:
if historical_team == away_norm and away_team:
return (away_team, "historical_statcast")
if historical_team == home_norm and home_team:
return (home_team, "historical_statcast")
# historical_team doesn't match either current game team (player changed teams);
# fall through to roster lookup instead of returning a stale team name
# Level 4: current-season MLB roster lookup (handles offseason moves and new players)
if batter_name:
roster_team = lookup_batter_current_team(batter_name, away_team or "", home_team or "")
if roster_team:
return (roster_team, "mlb_roster_lookup")
return ("", "unresolved")
def _resolve_batter_team_from_row_context(
row: Any,
batter_name: str,
) -> tuple[str, str]:
away_team = _to_display_name(row.get("away_team"))
home_team = _to_display_name(row.get("home_team"))
away_norm = _normalize_team_name(away_team)
home_norm = _normalize_team_name(home_team)
for key in ("batter_team", "player_team", "team", "team_name"):
value = _to_display_name(row.get(key))
value_norm = _normalize_team_name(value)
if value_norm == away_norm and away_team:
return (away_team, f"row_{key}")
if value_norm == home_norm and home_team:
return (home_team, f"row_{key}")
return ("", "unknown")
def _infer_lineup_slot(
batter_name: str,
batter_statcast_df: pd.DataFrame,
) -> tuple[int | None, str]:
if (
batter_statcast_df is None
or batter_statcast_df.empty
or not batter_name
or "player_name" not in batter_statcast_df.columns
):
return (None, "unknown")
player_rows = batter_statcast_df[
batter_statcast_df["player_name"].astype(str).str.casefold() == batter_name.casefold()
].copy()
if player_rows.empty:
return (None, "unknown")
for col in ["lineup_slot", "lineup_position", "batting_order", "bat_order"]:
if col not in player_rows.columns:
continue
numeric = pd.to_numeric(player_rows[col], errors="coerce").dropna()
numeric = numeric[(numeric >= 1) & (numeric <= 9)]
if not numeric.empty:
mode = numeric.round().astype(int).mode()
if not mode.empty:
return (int(mode.iloc[0]), "projected")
return (None, "unknown")
def _resolve_pitcher_hand(
pitcher_name: str,
pitcher_statcast_df: pd.DataFrame | None,
) -> tuple[str, str]:
if not pitcher_name or pitcher_statcast_df is None or pitcher_statcast_df.empty:
return ("", "unavailable")
if {"player_name", "p_throws"}.issubset(pitcher_statcast_df.columns):
direct_rows = pitcher_statcast_df[
pitcher_statcast_df["player_name"].astype(str).map(_normalize_person_name) == _normalize_person_name(pitcher_name)
].copy()
if not direct_rows.empty:
direct_hand = str(direct_rows.iloc[0].get("p_throws") or "").strip().upper()
if direct_hand:
return (direct_hand, "statcast_direct")
try:
pitcher_row = build_pitcher_feature_row(
statcast_df=pitcher_statcast_df,
pitcher_name=pitcher_name,
)
hand = str(pitcher_row.get("p_throws") or "").strip().upper()
return (hand, "pitcher_feature_row" if hand else "unavailable")
except Exception:
return ("", "unavailable")
def _resolve_team_total(
row: Any,
batter_team: str,
) -> tuple[float | None, str]:
direct_keys = ["team_total", "implied_team_total", "batter_team_total"]
for key in direct_keys:
value = row.get(key)
try:
if value is not None and str(value).strip() not in {"", "nan", "None"}:
return (float(value), "projected")
except Exception:
continue
away_norm = _normalize_team_name(row.get("away_team"))
home_norm = _normalize_team_name(row.get("home_team"))
batter_team_norm = _normalize_team_name(batter_team)
if batter_team_norm and batter_team_norm == away_norm:
for key in ["away_team_total", "away_implied_total"]:
value = row.get(key)
try:
if value is not None and str(value).strip() not in {"", "nan", "None"}:
return (float(value), "projected")
except Exception:
continue
if batter_team_norm and batter_team_norm == home_norm:
for key in ["home_team_total", "home_implied_total"]:
value = row.get(key)
try:
if value is not None and str(value).strip() not in {"", "nan", "None"}:
return (float(value), "projected")
except Exception:
continue
return (None, "unknown")
def _resolve_pitcher_name(
row: Any,
batter_team: str,
probable_starters: dict | None,
) -> tuple[str, str, str]:
explicit_pitcher = str(
row.get("pitcher_name")
or row.get("pitcher")
or row.get("opposing_pitcher")
or ""
).strip()
away_team = str(row.get("away_team") or "").strip()
home_team = str(row.get("home_team") or "").strip()
if explicit_pitcher and (not away_team or not home_team or not probable_starters):
return (explicit_pitcher, "row_explicit", "resolved")
if not probable_starters:
return ("", "probable_starters_unavailable", "unresolved")
if not away_team or not home_team:
return ("", "matchup_incomplete", "unresolved")
starters = lookup_pitchers_for_game(
away_team=away_team,
home_team=home_team,
starters_map=probable_starters,
)
if not starters:
return ("", "matchup_not_found", "unresolved")
away_norm = _normalize_team_name(away_team)
home_norm = _normalize_team_name(home_team)
batter_team_norm = _normalize_team_name(batter_team)
home_pitcher = str(starters.get("home_pitcher") or "").strip()
away_pitcher = str(starters.get("away_pitcher") or "").strip()
if explicit_pitcher:
if _names_match(home_pitcher, explicit_pitcher) or _names_match(away_pitcher, explicit_pitcher):
return (explicit_pitcher, "row_explicit_validated", "resolved")
if batter_team_norm and batter_team_norm == away_norm:
return (
home_pitcher,
"probable_starters_matchup",
"resolved",
)
if batter_team_norm and batter_team_norm == home_norm:
return (
away_pitcher,
"probable_starters_matchup",
"resolved",
)
if home_pitcher and not away_pitcher:
return (home_pitcher, "probable_starters_single_side", "resolved")
if away_pitcher and not home_pitcher:
return (away_pitcher, "probable_starters_single_side", "resolved")
if explicit_pitcher:
return (explicit_pitcher, "row_explicit_unvalidated", "resolved")
return ("", "batter_team_unresolved", "unresolved")
def _lookup_projected_starter_context(
row: Any,
probable_starters: dict | None,
) -> dict[str, Any]:
away_team = str(row.get("away_team") or "").strip()
home_team = str(row.get("home_team") or "").strip()
out = {
"projected_home_pitcher": "",
"projected_away_pitcher": "",
"projected_starter_available": False,
"projected_starter_source": "probable_starters_unavailable" if not probable_starters else "matchup_incomplete",
"projected_home_pitcher_source": "",
"projected_away_pitcher_source": "",
"starter_cache_source": "probable_starters_unavailable" if not probable_starters else "matchup_incomplete",
"fallback_used": False,
}
if not probable_starters or not away_team or not home_team:
return out
starters = lookup_pitchers_for_game(
away_team=away_team,
home_team=home_team,
starters_map=probable_starters,
)
if not starters:
out["projected_starter_source"] = "matchup_not_found"
return out
projected_home = str(starters.get("home_pitcher") or "").strip()
projected_away = str(starters.get("away_pitcher") or "").strip()
out.update(
{
"projected_home_pitcher": projected_home,
"projected_away_pitcher": projected_away,
"projected_starter_available": bool(projected_home or projected_away),
"projected_starter_source": str(starters.get("starter_cache_source") or "probable_starters_matchup"),
"projected_home_pitcher_source": str(starters.get("home_pitcher_source") or ""),
"projected_away_pitcher_source": str(starters.get("away_pitcher_source") or ""),
"starter_cache_source": str(starters.get("starter_cache_source") or "probable_starters_matchup"),
"fallback_used": bool(starters.get("fallback_used")),
}
)
return out
def _projected_starter_match_status(
resolved_pitcher_name: str,
projected_home_pitcher: str,
projected_away_pitcher: str,
) -> str:
resolved = str(resolved_pitcher_name or "").strip()
if not projected_home_pitcher and not projected_away_pitcher:
return "projected_starter_unavailable"
if not resolved:
return "projected_starter_available_but_unresolved"
if _names_match(projected_home_pitcher, resolved):
return "matched_projected_home"
if _names_match(projected_away_pitcher, resolved):
return "matched_projected_away"
return "resolved_pitcher_mismatch"
def _resolve_pitcher_team_and_opponent(
row: Any,
pitcher_name: str,
probable_starters: dict | None,
) -> tuple[str, str]:
away_team = str(row.get("away_team") or "").strip()
home_team = str(row.get("home_team") or "").strip()
if not away_team or not home_team or not pitcher_name or not probable_starters:
return ("", "")
starters = lookup_pitchers_for_game(
away_team=away_team,
home_team=home_team,
starters_map=probable_starters,
)
if not starters:
return ("", "")
away_pitcher = str(starters.get("away_pitcher") or "").strip()
home_pitcher = str(starters.get("home_pitcher") or "").strip()
if _names_match(away_pitcher, pitcher_name):
return (away_team, home_team)
if _names_match(home_pitcher, pitcher_name):
return (home_team, away_team)
return ("", "")
def _resolve_strikeout_pitcher_name(
row: Any,
probable_starters: dict | None,
) -> tuple[str, str, str]:
explicit_pitcher = _to_display_name(row.get("player_name_raw") or row.get("player_name"))
away_team = str(row.get("away_team") or "").strip()
home_team = str(row.get("home_team") or "").strip()
if not explicit_pitcher and not probable_starters:
return ("", "missing_pitcher_name", "unresolved")
if not probable_starters or not away_team or not home_team:
return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved")
starters = lookup_pitchers_for_game(
away_team=away_team,
home_team=home_team,
starters_map=probable_starters,
)
if not starters:
return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved")
projected_home = str(starters.get("home_pitcher") or "").strip()
projected_away = str(starters.get("away_pitcher") or "").strip()
if explicit_pitcher and (
_names_match(projected_home, explicit_pitcher)
or _names_match(projected_away, explicit_pitcher)
):
return (explicit_pitcher, "row_explicit_validated", "resolved")
if projected_home and not projected_away:
return (projected_home, "probable_starters_single_side", "resolved")
if projected_away and not projected_home:
return (projected_away, "probable_starters_single_side", "resolved")
if projected_home and projected_away:
return ("", "row_explicit_mismatch", "unresolved")
return (explicit_pitcher, "row_explicit", "resolved" if explicit_pitcher else "unresolved")
def _extract_team_batters_from_statcast(
team_name: str,
batter_statcast_df: pd.DataFrame | None,
max_players: int = 9,
) -> list[str]:
if (
not team_name
or batter_statcast_df is None
or batter_statcast_df.empty
or "player_name" not in batter_statcast_df.columns
):
return []
team_norm = _normalize_team_name(team_name)
if not team_norm:
return []
working = batter_statcast_df.copy()
if "source_season" in working.columns:
current_rows = working[pd.to_numeric(working["source_season"], errors="coerce") == 2026].copy()
if not current_rows.empty:
working = current_rows
players = (
working.get("player_name", pd.Series(dtype="object"))
.dropna()
.astype(str)
.tolist()
)
if not players:
return []
matched_names: list[str] = []
seen_norms: set[str] = set()
for player_name in players:
inferred_team = _infer_batter_team(player_name, working)
if inferred_team != team_norm:
continue
player_norm = _normalize_person_name(player_name)
if not player_norm or player_norm in seen_norms:
continue
seen_norms.add(player_norm)
matched_names.append(player_name)
if len(matched_names) >= max_players:
break
return matched_names
def _lookup_baseline_metadata(
statcast_df: pd.DataFrame | None,
player_name: str,
) -> dict[str, Any]:
default = {
"baseline_mode": None,
"prior_sample_size": None,
"season_2026_sample_size": None,
"prior_weight": None,
"season_2026_weight": None,
"baseline_driver": None,
"rolling_overlay_active": None,
}
if (
statcast_df is None
or statcast_df.empty
or not player_name
or "player_name" not in statcast_df.columns
):
return default
normalized_target = _normalize_person_name(player_name)
if not normalized_target:
return default
normalized_series = statcast_df["player_name"].astype(str).map(_normalize_person_name)
rows = statcast_df[normalized_series == normalized_target].copy()
if rows.empty:
return default
first_row = rows.iloc[0]
return {
"baseline_mode": first_row.get("baseline_mode"),
"prior_sample_size": first_row.get("prior_sample_size"),
"season_2026_sample_size": first_row.get("season_2026_sample_size"),
"prior_weight": first_row.get("prior_weight"),
"season_2026_weight": first_row.get("season_2026_weight"),
"baseline_driver": first_row.get("baseline_driver"),
"rolling_overlay_active": first_row.get("rolling_overlay_active"),
}
def get_player_hr_prob(
player_name_normalized: str,
statcast_df: pd.DataFrame,
_name_index: dict[str, str] | None = None,
) -> tuple[float | None, str]:
"""
Backward-compatible wrapper for callers expecting (prob, source).
"""
name_index = _name_index if _name_index is not None else _build_statcast_name_index(statcast_df)
statcast_name = name_index.get(player_name_normalized, player_name_normalized)
result = build_hr_probability_result(
batter_statcast_df=statcast_df,
batter_name=statcast_name,
mode="pregame",
)
prob = result.get("calibrated_hr_prob")
if prob is None:
return (None, "unavailable")
return (float(prob), "shared_pregame_engine")
def map_hr_props_to_model(
props_df: pd.DataFrame,
statcast_df: pd.DataFrame,
prob_fn: Callable[..., Any] | None = None,
pitcher_stats_df: pd.DataFrame | None = None,
pitcher_statcast_df: pd.DataFrame | None = None,
probable_starters: dict | None = None,
) -> pd.DataFrame:
"""
Join HR prop rows to shared-engine HR probabilities and compute edge.
Adds columns:
implied_prob, model_hr_prob, model_hr_prob_source, edge
and shared-engine diagnostics:
baseline_hr_prob, pregame_hr_prob, probability_mode,
component adjustment columns, applied_layers, skipped_layers
"""
del prob_fn
if props_df.empty:
return pd.DataFrame()
hr_df = props_df[props_df["market"] == "hr"].copy()
if hr_df.empty:
return pd.DataFrame()
pitcher_df = (
pitcher_statcast_df
if pitcher_statcast_df is not None
else pitcher_stats_df
if pitcher_stats_df is not None
else statcast_df
)
name_index = _build_statcast_name_index(statcast_df)
runtime_cache: dict[str, Any] = {"name_index": name_index}
projected_starter_cache: dict[tuple[str, str, str], dict[str, Any]] = {}
batter_team_cache: dict[tuple[str, str, str, str], tuple[str, str]] = {}
pitcher_resolution_cache: dict[tuple[str, str, str, str], tuple[str, str, str]] = {}
pitcher_hand_cache: dict[str, tuple[Any, Any]] = {}
baseline_meta_cache: dict[tuple[int, str], dict[str, Any]] = {}
lineup_slot_cache: dict[tuple[str, str, str], tuple[Any, Any]] = {}
team_total_cache: dict[tuple[str, str, str, str], tuple[Any, Any]] = {}
mapped_rows: list[dict[str, Any]] = []
for _, row in hr_df.iterrows():
odds = row.get("odds_american")
batter_name_normalized = str(row.get("player_name") or "").strip()
batter_name = name_index.get(batter_name_normalized, batter_name_normalized)
threshold = row.get("threshold")
try:
threshold_int = int(threshold) if threshold is not None and str(threshold).strip() not in {"", "nan", "None"} else 1
except Exception:
threshold_int = 1
is_modeled = bool(row.get("is_modeled")) if pd.notna(row.get("is_modeled")) else threshold_int == 1
batter_team_key = (
str(row.get("away_team") or "").strip().lower(),
str(row.get("home_team") or "").strip().lower(),
str(batter_name or "").strip().lower(),
str(row.get("event_id") or "").strip(),
)
if batter_team_key not in batter_team_cache:
batter_team_cache[batter_team_key] = _resolve_batter_team(
row=row,
batter_name=batter_name,
batter_statcast_df=statcast_df,
)
batter_team, batter_team_source = batter_team_cache[batter_team_key]
starter_key = (
str(row.get("away_team") or "").strip().lower(),
str(row.get("home_team") or "").strip().lower(),
str(row.get("event_id") or "").strip(),
)
if starter_key not in projected_starter_cache:
projected_starter_cache[starter_key] = _lookup_projected_starter_context(
row=row,
probable_starters=probable_starters,
)
projected_starter_context = projected_starter_cache[starter_key]
pitcher_resolution_key = (
starter_key[0],
starter_key[1],
str(batter_team or "").strip().lower(),
str(row.get("pitcher_name") or row.get("pitcher") or "").strip().lower(),
)
if pitcher_resolution_key not in pitcher_resolution_cache:
pitcher_resolution_cache[pitcher_resolution_key] = _resolve_pitcher_name(
row=row,
batter_team=batter_team,
probable_starters=probable_starters,
)
pitcher_name, resolved_pitcher_source, pitcher_resolution_status = pitcher_resolution_cache[pitcher_resolution_key]
projected_starter_match_status = _projected_starter_match_status(
resolved_pitcher_name=pitcher_name,
projected_home_pitcher=str(projected_starter_context.get("projected_home_pitcher") or ""),
projected_away_pitcher=str(projected_starter_context.get("projected_away_pitcher") or ""),
)
pitcher_hand_key = str(pitcher_name or "").strip().lower()
if pitcher_hand_key not in pitcher_hand_cache:
pitcher_hand_cache[pitcher_hand_key] = _resolve_pitcher_hand(
pitcher_name=pitcher_name,
pitcher_statcast_df=pitcher_df,
)
pitcher_hand, pitcher_hand_source = pitcher_hand_cache[pitcher_hand_key]
batter_meta_key = (id(statcast_df), str(batter_name or "").strip().lower())
if batter_meta_key not in baseline_meta_cache:
baseline_meta_cache[batter_meta_key] = _lookup_baseline_metadata(statcast_df, batter_name)
batter_baseline_meta = baseline_meta_cache[batter_meta_key]
pitcher_meta_key = (id(pitcher_df), str(pitcher_name or "").strip().lower())
if pitcher_meta_key not in baseline_meta_cache:
baseline_meta_cache[pitcher_meta_key] = _lookup_baseline_metadata(pitcher_df, pitcher_name)
pitcher_baseline_meta = baseline_meta_cache[pitcher_meta_key]
lineup_slot_key = (
str(batter_team or "").strip().lower(),
str(batter_name or "").strip().lower(),
str(pitcher_hand or "").strip().upper(),
)
if lineup_slot_key not in lineup_slot_cache:
lineup_slot, lineup_slot_source = _infer_lineup_slot(
batter_name=batter_name,
batter_statcast_df=statcast_df,
)
lineup_slot_cache[lineup_slot_key] = (lineup_slot, lineup_slot_source)
lineup_slot, lineup_slot_source = lineup_slot_cache[lineup_slot_key]
team_total_key = (
str(row.get("away_team") or "").strip().lower(),
str(row.get("home_team") or "").strip().lower(),
str(batter_team or "").strip().lower(),
str(row.get("event_id") or "").strip(),
str(row.get("sportsbook") or "").strip().lower(),
str(row.get("team_total") or row.get("away_team_total") or row.get("home_team_total") or "").strip(),
)
if team_total_key not in team_total_cache:
team_total_cache[team_total_key] = _resolve_team_total(row=row, batter_team=batter_team)
team_total, team_total_source = team_total_cache[team_total_key]
try:
implied = american_to_implied_prob(odds) if odds is not None else None
except Exception:
implied = None
if is_modeled:
probability_result = build_hr_probability_result(
batter_statcast_df=statcast_df,
batter_name=batter_name,
pitcher_statcast_df=pitcher_df,
pitcher_name=pitcher_name,
game_row={
**_build_game_context_from_row(row),
"lineup_slot": lineup_slot,
"lineup_slot_source": lineup_slot_source,
"team_total": team_total,
"team_total_source": team_total_source,
"projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"),
"projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"),
"projected_starter_available": projected_starter_context.get("projected_starter_available"),
"projected_starter_match_status": projected_starter_match_status,
},
weather_row=None,
mode="pregame",
runtime_cache=runtime_cache,
)
model_prob = probability_result.get("calibrated_hr_prob")
if model_prob is not None and implied is not None:
edge = compute_edge(model_prob, implied)
bet_ev = compute_bet_ev(model_prob, odds) if odds is not None else None
source = "shared_pregame_engine"
else:
edge = None
bet_ev = None
source = "unavailable"
else:
probability_result = {
"baseline_hr_prob": None,
"raw_hr_prob": None,
"calibrated_hr_prob": None,
"pregame_hr_prob": None,
"mode": "pregame",
"applied_layers": "",
"skipped_layers": "unmodeled_hr_ladder",
"confidence_score": None,
"confidence_bucket": None,
"confidence_reasons": [],
"opportunity_hr_adjustment": None,
"expected_pa": None,
"pa_multiplier": None,
"lineup_slot_used": lineup_slot,
"lineup_slot_source": lineup_slot_source,
"team_total_used": team_total,
"team_total_source": team_total_source,
"opportunity_mode": None,
"opportunity_reason": None,
"pregame_pitcher_context_adj": None,
"pregame_park_context_adj": None,
"pregame_weather_context_adj": None,
"pregame_context_applied": False,
"pitcher_hr_adjustment": None,
"trend_hr_adjustment": None,
"zone_hr_adjustment": None,
"family_zone_hr_adjustment": None,
"arsenal_hr_adjustment": None,
"pulled_contact_hr_adjustment": None,
"env_hr_adjustment": None,
"park_hr_adjustment": None,
"weather_hr_adjustment": None,
"platoon_hr_adjustment": None,
"trajectory_hr_adjustment": None,
"rolling_hr_adjustment": None,
"pitcher_reliability": None,
"trend_reliability": None,
"zone_reliability": None,
"family_zone_reliability": None,
"arsenal_reliability": None,
"pulled_contact_reliability": None,
"environment_reliability": None,
"trajectory_reliability": None,
"rolling_reliability": None,
"opportunity_reliability": None,
"matchup_platoon_multiplier": None,
"matchup_platoon_reason": "unmodeled_hr_ladder",
"pitcher_resolution_status": "unmodeled_hr_ladder",
"zone_status": "unmodeled_hr_ladder",
"family_zone_status": "unmodeled_hr_ladder",
"arsenal_status": "unmodeled_hr_ladder",
"reason_candidate_count": 0,
"zone_store_sample_size": None,
"family_zone_batter_sample_size": None,
"family_zone_pitcher_sample_size": None,
"arsenal_batter_sample_size": None,
"arsenal_pitcher_sample_size": None,
"model_voice_reason_candidates": [],
"model_voice_tags": [],
}
model_prob = None
edge = None
bet_ev = None
source = "unmodeled_hr_ladder"
probability_status = _classify_hr_probability_status(
threshold_int=threshold_int,
is_modeled=is_modeled,
model_prob=model_prob,
implied=implied,
probability_result=probability_result,
statcast_df=statcast_df,
pitcher_name=pitcher_name,
)
row_dict = row.to_dict()
row_dict.update(
{
"implied_prob": implied,
"model_hr_prob": model_prob,
"fair_prob": model_prob,
"model_hr_prob_source": source,
"model_hr_prob_source_detail": probability_result.get("applied_layers", ""),
"edge": edge,
"bet_ev": bet_ev,
"baseline_hr_prob": probability_result.get("baseline_hr_prob"),
"raw_hr_prob": probability_result.get("raw_hr_prob"),
"calibrated_hr_prob": probability_result.get("calibrated_hr_prob"),
"pregame_hr_prob": probability_result.get("pregame_hr_prob"),
"probability_mode": probability_result.get("mode"),
"formula_version": probability_result.get("formula_version"),
"is_modeled": is_modeled,
"threshold": threshold_int,
"confidence_score": probability_result.get("confidence_score"),
"confidence_bucket": probability_result.get("confidence_bucket"),
"confidence_reasons": probability_result.get("confidence_reasons"),
"opportunity_hr_adjustment": probability_result.get("opportunity_hr_adjustment"),
"expected_pa": probability_result.get("expected_pa"),
"pa_multiplier": probability_result.get("pa_multiplier"),
"lineup_slot_used": probability_result.get("lineup_slot_used", lineup_slot),
"lineup_slot_source": probability_result.get("lineup_slot_source", lineup_slot_source),
"team_total_used": probability_result.get("team_total_used", team_total),
"team_total_source": probability_result.get("team_total_source", team_total_source),
"opportunity_mode": probability_result.get("opportunity_mode"),
"opportunity_reason": probability_result.get("opportunity_reason"),
"pregame_pitcher_context_adj": probability_result.get("pregame_pitcher_context_adj"),
"pregame_park_context_adj": probability_result.get("pregame_park_context_adj"),
"pregame_weather_context_adj": probability_result.get("pregame_weather_context_adj"),
"pregame_context_applied": probability_result.get("pregame_context_applied", False),
"pitcher_hr_adjustment": probability_result.get("pitcher_hr_adjustment"),
"trend_hr_adjustment": probability_result.get("trend_hr_adjustment"),
"zone_hr_adjustment": probability_result.get("zone_hr_adjustment"),
"family_zone_hr_adjustment": probability_result.get("family_zone_hr_adjustment"),
"arsenal_hr_adjustment": probability_result.get("arsenal_hr_adjustment"),
"pulled_contact_hr_adjustment": probability_result.get("pulled_contact_hr_adjustment"),
"env_hr_adjustment": probability_result.get("env_hr_adjustment"),
"park_hr_adjustment": probability_result.get("park_hr_adjustment"),
"weather_hr_adjustment": probability_result.get("weather_hr_adjustment"),
"platoon_hr_adjustment": probability_result.get("platoon_hr_adjustment"),
"trajectory_hr_adjustment": probability_result.get("trajectory_hr_adjustment"),
"rolling_hr_adjustment": probability_result.get("rolling_hr_adjustment"),
"damage_zone_alignment_subscore": probability_result.get("damage_zone_alignment_subscore"),
"pitch_mix_exposure_subscore": probability_result.get("pitch_mix_exposure_subscore"),
"tunnel_damage_subscore": probability_result.get("tunnel_damage_subscore"),
"count_pattern_damage_subscore": probability_result.get("count_pattern_damage_subscore"),
"handedness_damage_subscore": probability_result.get("handedness_damage_subscore"),
"arsenal_fit_subscore": probability_result.get("arsenal_fit_subscore"),
"environment_amplification_subscore": probability_result.get("environment_amplification_subscore"),
"hr_opportunity_projection": probability_result.get("hr_opportunity_projection"),
"matchup_coverage_confidence": probability_result.get("matchup_coverage_confidence"),
"component_source_map": probability_result.get("component_source_map"),
"expected_pitch_mix_by_count": probability_result.get("expected_pitch_mix_by_count"),
"expected_zone_mix_by_count": probability_result.get("expected_zone_mix_by_count"),
"expected_pitch_zone_mix_by_count": probability_result.get("expected_pitch_zone_mix_by_count"),
"tunnel_pair_scores": probability_result.get("tunnel_pair_scores"),
"predicted_attack_regions": probability_result.get("predicted_attack_regions"),
"predicted_damage_regions": probability_result.get("predicted_damage_regions"),
"predicted_whiff_regions": probability_result.get("predicted_whiff_regions"),
"pitcher_reliability": probability_result.get("pitcher_reliability"),
"trend_reliability": probability_result.get("trend_reliability"),
"zone_reliability": probability_result.get("zone_reliability"),
"family_zone_reliability": probability_result.get("family_zone_reliability"),
"arsenal_reliability": probability_result.get("arsenal_reliability"),
"pulled_contact_reliability": probability_result.get("pulled_contact_reliability"),
"environment_reliability": probability_result.get("environment_reliability"),
"trajectory_reliability": probability_result.get("trajectory_reliability"),
"rolling_reliability": probability_result.get("rolling_reliability"),
"opportunity_reliability": probability_result.get("opportunity_reliability"),
"applied_layers": probability_result.get("applied_layers"),
"skipped_layers": probability_result.get("skipped_layers"),
"matchup_platoon_multiplier": probability_result.get("matchup_platoon_multiplier"),
"matchup_platoon_reason": probability_result.get("matchup_platoon_reason"),
"resolved_pitcher_name": pitcher_name,
"projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"),
"projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"),
"projected_starter_available": projected_starter_context.get("projected_starter_available"),
"projected_starter_source": projected_starter_context.get("projected_starter_source"),
"projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"),
"projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"),
"starter_cache_source": projected_starter_context.get("starter_cache_source"),
"fallback_used": projected_starter_context.get("fallback_used"),
"projected_starter_match_status": projected_starter_match_status,
"batter_team": batter_team,
"batter_team_source": batter_team_source,
"resolved_pitcher_source": resolved_pitcher_source,
"pitcher_resolution_status": probability_result.get("pitcher_resolution_status", pitcher_resolution_status),
"pitcher_hand": pitcher_hand,
"pitcher_hand_source": pitcher_hand_source,
"zone_status": probability_result.get("zone_status"),
"family_zone_status": probability_result.get("family_zone_status"),
"arsenal_status": probability_result.get("arsenal_status"),
"reason_candidate_count": probability_result.get("reason_candidate_count"),
"zone_store_sample_size": probability_result.get("zone_store_sample_size"),
"family_zone_batter_sample_size": probability_result.get("family_zone_batter_sample_size"),
"family_zone_pitcher_sample_size": probability_result.get("family_zone_pitcher_sample_size"),
"arsenal_batter_sample_size": probability_result.get("arsenal_batter_sample_size"),
"arsenal_pitcher_sample_size": probability_result.get("arsenal_pitcher_sample_size"),
"model_voice_reason_candidates": probability_result.get("model_voice_reason_candidates", []),
"model_voice_tags": probability_result.get("model_voice_tags", []),
"selection_scope": row.get("selection_scope") or "player",
"expected_modeled_hr_row": bool(threshold_int == 1 and str(row.get("market_family") or row.get("market") or "").strip().lower() == "hr"),
"has_model_probability": model_prob is not None,
"has_modeled_edge": edge is not None,
"model_probability_status": probability_status,
"modeled_row_available": model_prob is not None,
"modeled_row_missing_reason": None if model_prob is not None else probability_status,
"baseline_mode": batter_baseline_meta.get("baseline_mode"),
"prior_sample_size": batter_baseline_meta.get("prior_sample_size"),
"season_2026_sample_size": batter_baseline_meta.get("season_2026_sample_size"),
"prior_weight": batter_baseline_meta.get("prior_weight"),
"season_2026_weight": batter_baseline_meta.get("season_2026_weight"),
"baseline_driver": batter_baseline_meta.get("baseline_driver"),
"rolling_overlay_active": batter_baseline_meta.get("rolling_overlay_active"),
"pitcher_baseline_mode": pitcher_baseline_meta.get("baseline_mode"),
"pitcher_prior_sample_size": pitcher_baseline_meta.get("prior_sample_size"),
"pitcher_season_2026_sample_size": pitcher_baseline_meta.get("season_2026_sample_size"),
"pitcher_prior_weight": pitcher_baseline_meta.get("prior_weight"),
"pitcher_season_2026_weight": pitcher_baseline_meta.get("season_2026_weight"),
"pitcher_baseline_driver": pitcher_baseline_meta.get("baseline_driver"),
"pitcher_rolling_overlay_active": pitcher_baseline_meta.get("rolling_overlay_active"),
}
)
row_dict["verdict"] = _compute_verdict(
bet_ev=bet_ev,
edge=edge,
confidence_score=row_dict.get("confidence_score"),
is_modeled=is_modeled,
)
row_dict.update(build_hr_model_voice(row_dict))
mapped_rows.append(row_dict)
result = pd.DataFrame(mapped_rows)
if result.empty:
return result
has_edge = result["edge"].notna()
with_edge = result[has_edge].sort_values("edge", ascending=False)
without_edge = result[~has_edge]
ordered = pd.concat([with_edge, without_edge], ignore_index=True)
try:
from analytics.execution_layer import enrich_with_execution_layer
return enrich_with_execution_layer(ordered)
except Exception:
return ordered
def map_strikeout_props_to_model(
props_df: pd.DataFrame,
batter_statcast_df: pd.DataFrame,
pitcher_statcast_df: pd.DataFrame | None = None,
probable_starters: dict | None = None,
) -> pd.DataFrame:
if props_df.empty:
return pd.DataFrame()
k_df = props_df[props_df["market"].astype(str).str.lower() == "k"].copy()
if k_df.empty:
return pd.DataFrame()
pitcher_df = pitcher_statcast_df if pitcher_statcast_df is not None else batter_statcast_df
runtime_cache: dict[str, Any] = {}
projected_starter_cache: dict[tuple[str, str, str], dict[str, Any]] = {}
pitcher_resolution_cache: dict[tuple[str, str, str], tuple[str, str, str]] = {}
team_context_cache: dict[tuple[str, str, str], tuple[str, str]] = {}
pitcher_hand_cache: dict[str, tuple[Any, Any]] = {}
baseline_meta_cache: dict[tuple[int, str], dict[str, Any]] = {}
lineup_cache: dict[str, list[str]] = {}
strikeout_probability_cache: dict[tuple[Any, ...], dict[str, Any]] = {}
mapped_rows: list[dict[str, Any]] = []
for _, row in k_df.iterrows():
line = row.get("line")
selection_side = str(row.get("selection_side") or "").strip().lower()
try:
implied = american_to_implied_prob(row.get("odds_american")) if row.get("odds_american") is not None else None
except Exception:
implied = None
starter_key = (
str(row.get("away_team") or "").strip().lower(),
str(row.get("home_team") or "").strip().lower(),
str(row.get("event_id") or "").strip(),
)
if starter_key not in projected_starter_cache:
projected_starter_cache[starter_key] = _lookup_projected_starter_context(
row=row,
probable_starters=probable_starters,
)
projected_starter_context = projected_starter_cache[starter_key]
pitcher_resolution_key = (
starter_key[0],
starter_key[1],
str(row.get("pitcher_name") or row.get("pitcher") or row.get("player_name") or "").strip().lower(),
)
if pitcher_resolution_key not in pitcher_resolution_cache:
pitcher_resolution_cache[pitcher_resolution_key] = _resolve_strikeout_pitcher_name(
row=row,
probable_starters=probable_starters,
)
pitcher_name, resolved_pitcher_source, pitcher_resolution_status = pitcher_resolution_cache[pitcher_resolution_key]
if pitcher_resolution_key not in team_context_cache:
team_context_cache[pitcher_resolution_key] = _resolve_pitcher_team_and_opponent(
row=row,
pitcher_name=pitcher_name,
probable_starters=probable_starters,
)
pitcher_team, opponent_team = team_context_cache[pitcher_resolution_key]
projected_starter_match_status = _projected_starter_match_status(
resolved_pitcher_name=pitcher_name,
projected_home_pitcher=str(projected_starter_context.get("projected_home_pitcher") or ""),
projected_away_pitcher=str(projected_starter_context.get("projected_away_pitcher") or ""),
)
pitcher_hand_key = str(pitcher_name or "").strip().lower()
if pitcher_hand_key not in pitcher_hand_cache:
pitcher_hand_cache[pitcher_hand_key] = _resolve_pitcher_hand(pitcher_name=pitcher_name, pitcher_statcast_df=pitcher_df)
pitcher_hand, _ = pitcher_hand_cache[pitcher_hand_key]
pitcher_meta_key = (id(pitcher_df), str(pitcher_name or "").strip().lower())
if pitcher_meta_key not in baseline_meta_cache:
baseline_meta_cache[pitcher_meta_key] = _lookup_baseline_metadata(pitcher_df, pitcher_name)
pitcher_baseline_meta = baseline_meta_cache[pitcher_meta_key]
lineup_key = str(opponent_team or "").strip().lower()
if lineup_key not in lineup_cache:
lineup_cache[lineup_key] = _extract_team_batters_from_statcast(
team_name=opponent_team,
batter_statcast_df=batter_statcast_df,
)
opponent_batters = lineup_cache[lineup_key]
canonical_game_row = _build_game_context_from_row(row)
canonical_game_row.update(
{
"projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"),
"projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"),
"projected_starter_available": projected_starter_context.get("projected_starter_available"),
"projected_starter_source": projected_starter_context.get("projected_starter_source"),
"projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"),
"projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"),
"starter_cache_source": projected_starter_context.get("starter_cache_source"),
"fallback_used": projected_starter_context.get("fallback_used"),
"projected_starter_match_status": projected_starter_match_status,
"resolved_pitcher_name": pitcher_name,
"resolved_pitcher_source": resolved_pitcher_source,
"pitcher_resolution_status": pitcher_resolution_status,
"pitcher_team": pitcher_team,
"opponent_team": opponent_team,
}
)
line_value = float(line) if line is not None and str(line).strip() not in {"", "nan", "None"} else None
probability_cache_key = (
str(pitcher_name or "").strip().lower(),
tuple(str(name or "").strip().lower() for name in opponent_batters),
str(opponent_team or "").strip().lower(),
line_value,
str(selection_side or "").strip().lower(),
str(canonical_game_row.get("away_team") or "").strip().lower(),
str(canonical_game_row.get("home_team") or "").strip().lower(),
str(canonical_game_row.get("projected_starter_match_status") or "").strip().lower(),
)
if probability_cache_key not in strikeout_probability_cache:
strikeout_probability_cache[probability_cache_key] = build_strikeout_probability_result_v2(
pitcher_statcast_df=pitcher_df,
pitcher_name=pitcher_name,
batter_statcast_df=batter_statcast_df,
opponent_batters=opponent_batters,
opponent_team=opponent_team,
line=line_value,
selection_side=selection_side,
game_row=canonical_game_row,
runtime_cache=runtime_cache,
)
probability_result_v2 = strikeout_probability_cache[probability_cache_key]
confidence_payload = _build_strikeout_confidence_payload(
probability_result=probability_result_v2,
)
fair_prob = probability_result_v2.get("fair_prob")
probability_status = _classify_strikeout_probability_status(
fair_prob=fair_prob,
implied=implied,
pitcher_name=pitcher_name,
probability_result={
**probability_result_v2,
"pitcher_resolution_status": pitcher_resolution_status,
"projected_starter_match_status": projected_starter_match_status,
},
)
if fair_prob is not None and implied is not None:
edge = compute_edge(fair_prob, implied)
bet_ev = compute_bet_ev(fair_prob, row.get("odds_american")) if row.get("odds_american") is not None else None
source = "shared_strikeout_engine_v2"
is_modeled = True
else:
edge = None
bet_ev = None
source = "unavailable"
is_modeled = False
row_dict = row.to_dict()
row_dict.update(
{
"selection_scope": row.get("selection_scope") or "pitcher",
"is_modeled": is_modeled,
"implied_prob": implied,
"fair_prob": fair_prob,
"model_k_prob": fair_prob,
"bet_ev": bet_ev,
"edge": edge,
"confidence_score": confidence_payload.get("confidence_score_display"),
"confidence_bucket": confidence_payload.get("confidence_bucket_display"),
"confidence_reasons": confidence_payload.get("confidence_reasons"),
"confidence_score_raw": confidence_payload.get("confidence_score_raw"),
"confidence_score_display": confidence_payload.get("confidence_score_display"),
"confidence_source": confidence_payload.get("confidence_source"),
"confidence_component_bonuses": confidence_payload.get("confidence_component_bonuses"),
"confidence_component_penalties": confidence_payload.get("confidence_component_penalties"),
"confidence_primary_driver": confidence_payload.get("confidence_primary_driver"),
"confidence_summary_label": confidence_payload.get("confidence_summary_label"),
"confidence_bucket_raw": confidence_payload.get("confidence_bucket_raw"),
"confidence_bucket_display": confidence_payload.get("confidence_bucket_display"),
"expected_strikeouts": probability_result_v2.get("expected_strikeouts"),
"expected_strikeouts_v2": probability_result_v2.get("expected_strikeouts_v2"),
"projected_pitch_count": probability_result_v2.get("projected_pitch_count"),
"projected_batters_faced": probability_result_v2.get("projected_batters_faced"),
"projected_innings": probability_result_v2.get("projected_innings"),
"pitches_per_bf": probability_result_v2.get("pitches_per_bf"),
"opportunity_confidence": probability_result_v2.get("opportunity_confidence"),
"opportunity_reasons": probability_result_v2.get("opportunity_reasons"),
"projected_k_rate": probability_result_v2.get("projected_k_rate"),
"fair_prob_v2": probability_result_v2.get("fair_prob_v2"),
"raw_k_prob_v2": probability_result_v2.get("raw_k_prob_v2"),
"calibrated_k_prob_v2": probability_result_v2.get("calibrated_k_prob_v2"),
"confidence_score_v2": probability_result_v2.get("confidence_score_v2"),
"confidence_score_raw_v2": probability_result_v2.get("confidence_score_raw_v2"),
"confidence_score_display_v2": probability_result_v2.get("confidence_score_display_v2"),
"confidence_source_v2": probability_result_v2.get("confidence_source_v2"),
"confidence_bucket_v2": probability_result_v2.get("confidence_bucket_v2"),
"confidence_reasons_v2": probability_result_v2.get("confidence_reasons_v2"),
"confidence_component_bonuses_v2": probability_result_v2.get("confidence_component_bonuses_v2"),
"confidence_component_penalties_v2": probability_result_v2.get("confidence_component_penalties_v2"),
"confidence_primary_driver_v2": probability_result_v2.get("confidence_primary_driver_v2"),
"confidence_summary_label_v2": probability_result_v2.get("confidence_summary_label_v2"),
"k_rate_pitch_signal": probability_result_v2.get("k_rate_pitch_signal"),
"k_rate_anchor": probability_result_v2.get("k_rate_anchor"),
"bb_rate_anchor": probability_result_v2.get("bb_rate_anchor"),
"command_efficiency_signal": probability_result_v2.get("command_efficiency_signal"),
"swing_miss_subscore": probability_result_v2.get("swing_miss_subscore"),
"called_strike_subscore": probability_result_v2.get("called_strike_subscore"),
"command_efficiency_subscore": probability_result_v2.get("command_efficiency_subscore"),
"lineup_whiff_subscore": probability_result_v2.get("lineup_whiff_subscore"),
"zone_matchup_subscore": probability_result_v2.get("zone_matchup_subscore"),
"family_zone_matchup_subscore": probability_result_v2.get("family_zone_matchup_subscore"),
"arsenal_fit_subscore": probability_result_v2.get("arsenal_fit_subscore"),
"tunneling_subscore": probability_result_v2.get("tunneling_subscore"),
"release_consistency_subscore": probability_result_v2.get("release_consistency_subscore"),
"sequencing_subscore": probability_result_v2.get("sequencing_subscore"),
"count_leverage_subscore": probability_result_v2.get("count_leverage_subscore"),
"leash_risk_subscore": probability_result_v2.get("leash_risk_subscore"),
"role_certainty_score": probability_result_v2.get("role_certainty_score"),
"times_through_order_penalty": probability_result_v2.get("times_through_order_penalty"),
"telemetry_path_status": probability_result_v2.get("telemetry_path_status"),
"model_tier": probability_result_v2.get("model_tier"),
"variance_band_low": probability_result_v2.get("variance_band_low"),
"variance_band_high": probability_result_v2.get("variance_band_high"),
"matchup_coverage_confidence": probability_result_v2.get("matchup_coverage_confidence"),
"component_source_map": probability_result_v2.get("component_source_map"),
"predicted_whiff_regions": probability_result_v2.get("predicted_whiff_regions"),
"predicted_attack_regions": probability_result_v2.get("predicted_attack_regions"),
"predicted_damage_regions": probability_result_v2.get("predicted_damage_regions"),
"tunnel_pair_scores": probability_result_v2.get("tunnel_pair_scores"),
"formula_version": probability_result_v2.get("formula_version"),
"pitcher_swstr_rate": probability_result_v2.get("pitcher_swstr_rate"),
"pitcher_csw_rate": probability_result_v2.get("pitcher_csw_rate"),
"pitcher_ball_rate": probability_result_v2.get("pitcher_ball_rate"),
"arsenal_whiff_risk": probability_result_v2.get("arsenal_fit_subscore"),
"family_zone_whiff_risk": probability_result_v2.get("family_zone_matchup_subscore"),
"zone_whiff_risk": probability_result_v2.get("zone_matchup_subscore"),
"trajectory_tunnel_score": probability_result_v2.get("tunneling_subscore"),
"trajectory_release_consistency_score": probability_result_v2.get("release_consistency_subscore"),
"sequencing_score": probability_result_v2.get("sequencing_subscore"),
"applied_layers": probability_result_v2.get("applied_layers"),
"skipped_layers": probability_result_v2.get("skipped_layers"),
"model_k_prob_source": source,
"model_k_prob_source_detail": probability_result_v2.get("applied_layers", ""),
"resolved_pitcher_name": pitcher_name,
"resolved_pitcher_source": resolved_pitcher_source,
"projected_home_pitcher": projected_starter_context.get("projected_home_pitcher"),
"projected_away_pitcher": projected_starter_context.get("projected_away_pitcher"),
"projected_starter_available": projected_starter_context.get("projected_starter_available"),
"projected_starter_source": projected_starter_context.get("projected_starter_source"),
"projected_home_pitcher_source": projected_starter_context.get("projected_home_pitcher_source"),
"projected_away_pitcher_source": projected_starter_context.get("projected_away_pitcher_source"),
"starter_cache_source": projected_starter_context.get("starter_cache_source"),
"fallback_used": projected_starter_context.get("fallback_used"),
"projected_starter_match_status": projected_starter_match_status,
"pitcher_resolution_status": pitcher_resolution_status,
"pitcher_team": pitcher_team,
"opponent_team": opponent_team,
"has_model_probability": fair_prob is not None,
"has_modeled_edge": edge is not None,
"model_probability_status": probability_status,
"modeled_row_available": fair_prob is not None,
"modeled_row_missing_reason": None if fair_prob is not None else probability_status,
"baseline_mode": pitcher_baseline_meta.get("baseline_mode"),
"prior_sample_size": pitcher_baseline_meta.get("prior_sample_size"),
"season_2026_sample_size": pitcher_baseline_meta.get("season_2026_sample_size"),
"prior_weight": pitcher_baseline_meta.get("prior_weight"),
"season_2026_weight": pitcher_baseline_meta.get("season_2026_weight"),
"baseline_driver": pitcher_baseline_meta.get("baseline_driver"),
"rolling_overlay_active": pitcher_baseline_meta.get("rolling_overlay_active"),
}
)
row_dict["verdict"] = _compute_verdict(
bet_ev=bet_ev,
edge=edge,
confidence_score=row_dict.get("confidence_score"),
is_modeled=is_modeled,
)
row_dict.update(build_strikeout_model_voice(row_dict))
mapped_rows.append(row_dict)
return pd.DataFrame(mapped_rows)
def map_no_home_run_props(
props_df: pd.DataFrame,
) -> pd.DataFrame:
if props_df.empty:
return pd.DataFrame()
no_hr_df = props_df[props_df["market_family"].astype(str).str.lower() == "no_hr"].copy()
if no_hr_df.empty:
return pd.DataFrame()
for idx, row in no_hr_df.iterrows():
implied = american_to_implied_prob(row.get("odds_american")) if row.get("odds_american") is not None else None
no_hr_df.at[idx, "selection_scope"] = "game"
no_hr_df.at[idx, "implied_prob"] = implied
no_hr_df.at[idx, "fair_prob"] = None
no_hr_df.at[idx, "edge"] = None
no_hr_df.at[idx, "bet_ev"] = None
no_hr_df.at[idx, "confidence_score"] = None
no_hr_df.at[idx, "confidence_bucket"] = None
no_hr_df.at[idx, "confidence_reasons"] = ["No-HR fair probability model not active yet"]
no_hr_df.at[idx, "verdict"] = "tracked"
no_hr_df.at[idx, "model_voice_for"] = "Market is tracked for future release"
no_hr_df.at[idx, "model_voice_against"] = "No-HR fair probability model is not active yet"
return no_hr_df
def map_props_to_models(
props_df: pd.DataFrame,
statcast_df: pd.DataFrame,
pitcher_statcast_df: pd.DataFrame | None = None,
probable_starters: dict | None = None,
) -> pd.DataFrame:
frames: list[pd.DataFrame] = []
hr_df = map_hr_props_to_model(
props_df,
statcast_df,
pitcher_statcast_df=pitcher_statcast_df,
probable_starters=probable_starters,
)
if not hr_df.empty:
frames.append(hr_df)
k_df = map_strikeout_props_to_model(
props_df,
batter_statcast_df=statcast_df,
pitcher_statcast_df=pitcher_statcast_df,
probable_starters=probable_starters,
)
if not k_df.empty:
frames.append(k_df)
no_hr_df = map_no_home_run_props(props_df)
if not no_hr_df.empty:
frames.append(no_hr_df)
if not frames:
return pd.DataFrame()
return pd.concat(frames, ignore_index=True, sort=False)