2026_MLB_Model / analytics /recommendation_engine.py
Syntrex's picture
Merge remote: resolve mlb_starters.py conflict — keep conn param + remote team canonical map
abe5c7e
raw
history blame
9.96 kB
from __future__ import annotations
import pandas as pd
from analytics.confidence import compute_confidence
from analytics.recommendation_rules import apply_recommendation_rules
from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge, kelly_fraction, remove_vig_single_side
from models.fair_odds import probability_to_american
from models.live_fair_simulator_v3 import build_upcoming_simulated_rows
from models.opportunity_model import estimate_plate_appearance_probability
from utils.logger import logger
def _lineup_distance_from_slot(slot: str) -> int:
s = str(slot or "").strip().lower()
if s in {"current", "current batter", "current_batter", "now"}:
return 0
if s in {"on deck", "on_deck", "ondeck", "on-deck"}:
return 1
if s in {"in hole", "in_hole", "inhole", "in-hole"}:
return 2
if s in {"3 away", "three away", "3_away", "three_away", "three-away", "3-away"}:
return 3
return 0
def _apply_opportunity_badges(recommendations: list[dict]) -> list[dict]:
if not recommendations:
return recommendations
rows = [dict(r) for r in recommendations]
for row in rows:
row["opportunity_badges"] = []
def _best_row_index(metric: str) -> int | None:
best_idx = None
best_val = None
for idx, row in enumerate(rows):
value = row.get(metric)
try:
numeric_value = float(value)
except Exception:
continue
if best_val is None or numeric_value > best_val:
best_val = numeric_value
best_idx = idx
return best_idx
best_overall_idx = _best_row_index("priority_score")
best_hr_idx = _best_row_index("hr_edge")
best_hit_idx = _best_row_index("hit_edge")
best_tb_idx = _best_row_index("tb2p_edge")
if best_overall_idx is not None:
rows[best_overall_idx]["opportunity_badges"].append("BEST OVERALL")
if best_hr_idx is not None:
rows[best_hr_idx]["opportunity_badges"].append("BEST HR EDGE")
if best_hit_idx is not None:
rows[best_hit_idx]["opportunity_badges"].append("BEST HIT EDGE")
if best_tb_idx is not None:
rows[best_tb_idx]["opportunity_badges"].append("BEST TB EDGE")
for row in rows:
tier = str(row.get("recommendation_tier", "") or "").strip().lower()
if tier == "bet":
row["opportunity_badges"].append("BET")
elif tier == "watch":
row["opportunity_badges"].append("WATCH")
# de-duplicate while preserving order
seen = set()
deduped = []
for badge in row["opportunity_badges"]:
if badge in seen:
continue
seen.add(badge)
deduped.append(badge)
row["opportunity_badges"] = deduped[:3]
return rows
def build_upcoming_hitter_recommendations(
game_row: dict,
statcast_df: pd.DataFrame,
pitcher_statcast_df: pd.DataFrame | None = None,
odds_df: pd.DataFrame | None = None,
prop_odds_df: pd.DataFrame | None = None,
weather_row: dict | None = None,
) -> list[dict]:
"""
Decision-layer wrapper.
Uses simulated fair rows, then applies:
1) opportunity adjustment
2) confidence
3) recommendation tier
Suppresses low-value PASS rows unless nothing else exists.
"""
rows = build_upcoming_simulated_rows(
game_row=game_row,
statcast_df=statcast_df,
pitcher_statcast_df=pitcher_statcast_df,
weather_row=weather_row,
)
# Build lookup: normalized_player_name → best HR american odds from real prop feed
_prop_odds_lookup: dict[str, int] = {}
if prop_odds_df is not None and not prop_odds_df.empty:
try:
from data.odds_name_map import map_odds_name_to_model_name
hr_props = (
prop_odds_df[prop_odds_df["market"].isin(["batter_home_runs", "hr"])]
if "market" in prop_odds_df.columns
else prop_odds_df
)
if not hr_props.empty and "odds_american" in hr_props.columns and "player_name" in hr_props.columns:
# Explicit sort: MAX(odds_american) per player = best price for bettor
best_hr = (
hr_props
.sort_values("odds_american", ascending=False)
.drop_duplicates(subset=["player_name"])
)
for _, prow in best_hr.iterrows():
norm_name = map_odds_name_to_model_name(str(prow.get("player_name") or ""))
odds_val = prow.get("odds_american")
if norm_name and odds_val is not None:
try:
_prop_odds_lookup[norm_name] = int(float(odds_val))
except (TypeError, ValueError):
pass
except Exception as exc:
logger.warning("[prop_odds_lookup] build failure: %s", exc)
recommendations: list[dict] = []
for row in rows:
# Inject real book HR odds if available; fall back to simulator placeholder
if _prop_odds_lookup:
from data.odds_name_map import map_odds_name_to_model_name
_norm_batter = map_odds_name_to_model_name(str(row.get("batter_name") or ""))
_real_hr_odds = _prop_odds_lookup.get(_norm_batter)
# Fallback: raw name match if normalized mapping misses
if _real_hr_odds is None:
_real_hr_odds = _prop_odds_lookup.get(str(row.get("batter_name") or ""))
if _real_hr_odds is not None:
row["book_hr_odds_source"] = "live_feed_unmapped"
if _real_hr_odds is not None:
row["book_hr_odds"] = _real_hr_odds
row.setdefault("book_hr_odds_source", "live_feed")
else:
row.setdefault("book_hr_odds_source", "placeholder")
if prop_odds_df is not None and not prop_odds_df.empty:
logger.warning(
"[prop_odds_mapping_miss] batter=%s",
row.get("batter_name"),
)
else:
row.setdefault("book_hr_odds_source", "placeholder")
slot = row.get("slot", "Current")
lineup_distance = _lineup_distance_from_slot(slot)
opportunity = estimate_plate_appearance_probability(
outs=game_row.get("outs", 0),
lineup_distance=lineup_distance,
)
expected_pa = float(opportunity.get("expected_pa", 1.0) or 1.0)
# Apply opportunity adjustment to the simulated probabilities
for prob_col in ["hit_prob", "hr_prob", "tb2p_prob"]:
if prob_col in row and row.get(prob_col) is not None:
try:
raw_prob = float(row.get(prob_col))
row[prob_col] = min(0.95, max(0.001, raw_prob * expected_pa))
except Exception as e:
logger.warning(f"[prob_opportunity_adjust] failure: {e}", exc_info=True)
# Recalculate fair odds and edges after probability adjustment
if row.get("hit_prob") is not None:
row["fair_hit_odds"] = probability_to_american(row["hit_prob"])
if row.get("hr_prob") is not None:
row["fair_hr_odds"] = probability_to_american(row["hr_prob"])
if row.get("tb2p_prob") is not None:
row["fair_tb2p_odds"] = probability_to_american(row["tb2p_prob"])
try:
book_hit_odds = float(row.get("book_hit_odds"))
row["hit_edge"] = compute_edge(row["hit_prob"], american_to_implied_prob(book_hit_odds))
row["hit_bet_ev"] = compute_bet_ev(row["hit_prob"], int(book_hit_odds))
except Exception as e:
logger.warning(f"[hit_edge_compute] failure: {e}", exc_info=True)
try:
book_hr_odds = float(row.get("book_hr_odds"))
# HR props are single-sided markets — de-vig with flat margin, not two-way normalization
hr_book_prob_novig = remove_vig_single_side(american_to_implied_prob(book_hr_odds))
row["hr_edge"] = compute_edge(row["hr_prob"], hr_book_prob_novig)
row["hr_bet_ev"] = compute_bet_ev(row["hr_prob"], int(book_hr_odds))
row["kelly_pct"] = kelly_fraction(row["hr_prob"], int(book_hr_odds))
except Exception as e:
logger.warning(f"[hr_edge_compute] failure: {e}", exc_info=True)
try:
book_tb2p_odds = float(row.get("book_tb2p_odds"))
row["tb2p_edge"] = compute_edge(row["tb2p_prob"], american_to_implied_prob(book_tb2p_odds))
row["tb2p_bet_ev"] = compute_bet_ev(row["tb2p_prob"], int(book_tb2p_odds))
except Exception as e:
logger.warning(f"[tb2p_edge_compute] failure: {e}", exc_info=True)
# Carry diagnostics forward
row["lineup_distance"] = lineup_distance
row["pa_prob_this_inning"] = opportunity.get("pa_prob_this_inning")
row["pa_prob_next_two_innings"] = opportunity.get("pa_prob_next_two_innings")
row["expected_pa"] = expected_pa
confidence_block = compute_confidence(row, game_row=game_row)
row.update(confidence_block)
rules_block = apply_recommendation_rules(row)
row.update(rules_block)
recommendations.append(row)
# Preserve lineup-order display, but compute badges from model outputs.
recommendations = sorted(
recommendations,
key=lambda x: _lineup_distance_from_slot(x.get("slot", "")),
)
recommendations = _apply_opportunity_badges(recommendations)
surfaced = [
row for row in recommendations
if str(row.get("recommendation_tier", "")).lower() in {"bet", "watch"}
]
if surfaced:
return surfaced
return recommendations[:1]