Spaces:

Syntrex
/

2026_MLB_Model

Running

App Files Files

2026_MLB_Model / analytics /recommendation_engine.py

Syntrex

Merge remote: resolve mlb_starters.py conflict — keep conn param + remote team canonical map

abe5c7e about 2 months ago

raw

history blame

9.96 kB

	from __future__ import annotations

	import pandas as pd

	from analytics.confidence import compute_confidence
	from analytics.recommendation_rules import apply_recommendation_rules
	from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge, kelly_fraction, remove_vig_single_side
	from models.fair_odds import probability_to_american
	from models.live_fair_simulator_v3 import build_upcoming_simulated_rows
	from models.opportunity_model import estimate_plate_appearance_probability
	from utils.logger import logger

	def _lineup_distance_from_slot(slot: str) -> int:
	s = str(slot or "").strip().lower()

	if s in {"current", "current batter", "current_batter", "now"}:
	return 0
	if s in {"on deck", "on_deck", "ondeck", "on-deck"}:
	return 1
	if s in {"in hole", "in_hole", "inhole", "in-hole"}:
	return 2
	if s in {"3 away", "three away", "3_away", "three_away", "three-away", "3-away"}:
	return 3

	return 0

	def _apply_opportunity_badges(recommendations: list[dict]) -> list[dict]:
	if not recommendations:
	return recommendations

	rows = [dict(r) for r in recommendations]

	for row in rows:
	row["opportunity_badges"] = []

	def _best_row_index(metric: str) -> int \| None:
	best_idx = None
	best_val = None

	for idx, row in enumerate(rows):
	value = row.get(metric)
	try:
	numeric_value = float(value)
	except Exception:
	continue

	if best_val is None or numeric_value > best_val:
	best_val = numeric_value
	best_idx = idx

	return best_idx

	best_overall_idx = _best_row_index("priority_score")
	best_hr_idx = _best_row_index("hr_edge")
	best_hit_idx = _best_row_index("hit_edge")
	best_tb_idx = _best_row_index("tb2p_edge")

	if best_overall_idx is not None:
	rows[best_overall_idx]["opportunity_badges"].append("BEST OVERALL")

	if best_hr_idx is not None:
	rows[best_hr_idx]["opportunity_badges"].append("BEST HR EDGE")

	if best_hit_idx is not None:
	rows[best_hit_idx]["opportunity_badges"].append("BEST HIT EDGE")

	if best_tb_idx is not None:
	rows[best_tb_idx]["opportunity_badges"].append("BEST TB EDGE")

	for row in rows:
	tier = str(row.get("recommendation_tier", "") or "").strip().lower()

	if tier == "bet":
	row["opportunity_badges"].append("BET")
	elif tier == "watch":
	row["opportunity_badges"].append("WATCH")

	# de-duplicate while preserving order
	seen = set()
	deduped = []
	for badge in row["opportunity_badges"]:
	if badge in seen:
	continue
	seen.add(badge)
	deduped.append(badge)

	row["opportunity_badges"] = deduped[:3]

	return rows

	def build_upcoming_hitter_recommendations(
	game_row: dict,
	statcast_df: pd.DataFrame,
	pitcher_statcast_df: pd.DataFrame \| None = None,
	odds_df: pd.DataFrame \| None = None,
	prop_odds_df: pd.DataFrame \| None = None,
	weather_row: dict \| None = None,
	) -> list[dict]:
	"""
	Decision-layer wrapper.
	Uses simulated fair rows, then applies:
	1) opportunity adjustment
	2) confidence
	3) recommendation tier

	Suppresses low-value PASS rows unless nothing else exists.
	"""
	rows = build_upcoming_simulated_rows(
	game_row=game_row,
	statcast_df=statcast_df,
	pitcher_statcast_df=pitcher_statcast_df,
	weather_row=weather_row,
	)

	# Build lookup: normalized_player_name → best HR american odds from real prop feed
	_prop_odds_lookup: dict[str, int] = {}
	if prop_odds_df is not None and not prop_odds_df.empty:
	try:
	from data.odds_name_map import map_odds_name_to_model_name
	hr_props = (
	prop_odds_df[prop_odds_df["market"].isin(["batter_home_runs", "hr"])]
	if "market" in prop_odds_df.columns
	else prop_odds_df
	)
	if not hr_props.empty and "odds_american" in hr_props.columns and "player_name" in hr_props.columns:
	# Explicit sort: MAX(odds_american) per player = best price for bettor
	best_hr = (
	hr_props
	.sort_values("odds_american", ascending=False)
	.drop_duplicates(subset=["player_name"])
	)
	for _, prow in best_hr.iterrows():
	norm_name = map_odds_name_to_model_name(str(prow.get("player_name") or ""))
	odds_val = prow.get("odds_american")
	if norm_name and odds_val is not None:
	try:
	_prop_odds_lookup[norm_name] = int(float(odds_val))
	except (TypeError, ValueError):
	pass
	except Exception as exc:
	logger.warning("[prop_odds_lookup] build failure: %s", exc)

	recommendations: list[dict] = []

	for row in rows:
	# Inject real book HR odds if available; fall back to simulator placeholder
	if _prop_odds_lookup:
	from data.odds_name_map import map_odds_name_to_model_name
	_norm_batter = map_odds_name_to_model_name(str(row.get("batter_name") or ""))
	_real_hr_odds = _prop_odds_lookup.get(_norm_batter)

	# Fallback: raw name match if normalized mapping misses
	if _real_hr_odds is None:
	_real_hr_odds = _prop_odds_lookup.get(str(row.get("batter_name") or ""))
	if _real_hr_odds is not None:
	row["book_hr_odds_source"] = "live_feed_unmapped"

	if _real_hr_odds is not None:
	row["book_hr_odds"] = _real_hr_odds
	row.setdefault("book_hr_odds_source", "live_feed")
	else:
	row.setdefault("book_hr_odds_source", "placeholder")
	if prop_odds_df is not None and not prop_odds_df.empty:
	logger.warning(
	"[prop_odds_mapping_miss] batter=%s",
	row.get("batter_name"),
	)
	else:
	row.setdefault("book_hr_odds_source", "placeholder")

	slot = row.get("slot", "Current")
	lineup_distance = _lineup_distance_from_slot(slot)

	opportunity = estimate_plate_appearance_probability(
	outs=game_row.get("outs", 0),
	lineup_distance=lineup_distance,
	)

	expected_pa = float(opportunity.get("expected_pa", 1.0) or 1.0)

	# Apply opportunity adjustment to the simulated probabilities
	for prob_col in ["hit_prob", "hr_prob", "tb2p_prob"]:
	if prob_col in row and row.get(prob_col) is not None:
	try:
	raw_prob = float(row.get(prob_col))
	row[prob_col] = min(0.95, max(0.001, raw_prob * expected_pa))
	except Exception as e:
	logger.warning(f"[prob_opportunity_adjust] failure: {e}", exc_info=True)

	# Recalculate fair odds and edges after probability adjustment
	if row.get("hit_prob") is not None:
	row["fair_hit_odds"] = probability_to_american(row["hit_prob"])
	if row.get("hr_prob") is not None:
	row["fair_hr_odds"] = probability_to_american(row["hr_prob"])
	if row.get("tb2p_prob") is not None:
	row["fair_tb2p_odds"] = probability_to_american(row["tb2p_prob"])

	try:
	book_hit_odds = float(row.get("book_hit_odds"))
	row["hit_edge"] = compute_edge(row["hit_prob"], american_to_implied_prob(book_hit_odds))
	row["hit_bet_ev"] = compute_bet_ev(row["hit_prob"], int(book_hit_odds))
	except Exception as e:
	logger.warning(f"[hit_edge_compute] failure: {e}", exc_info=True)

	try:
	book_hr_odds = float(row.get("book_hr_odds"))
	# HR props are single-sided markets — de-vig with flat margin, not two-way normalization
	hr_book_prob_novig = remove_vig_single_side(american_to_implied_prob(book_hr_odds))
	row["hr_edge"] = compute_edge(row["hr_prob"], hr_book_prob_novig)
	row["hr_bet_ev"] = compute_bet_ev(row["hr_prob"], int(book_hr_odds))
	row["kelly_pct"] = kelly_fraction(row["hr_prob"], int(book_hr_odds))
	except Exception as e:
	logger.warning(f"[hr_edge_compute] failure: {e}", exc_info=True)

	try:
	book_tb2p_odds = float(row.get("book_tb2p_odds"))
	row["tb2p_edge"] = compute_edge(row["tb2p_prob"], american_to_implied_prob(book_tb2p_odds))
	row["tb2p_bet_ev"] = compute_bet_ev(row["tb2p_prob"], int(book_tb2p_odds))
	except Exception as e:
	logger.warning(f"[tb2p_edge_compute] failure: {e}", exc_info=True)

	# Carry diagnostics forward
	row["lineup_distance"] = lineup_distance
	row["pa_prob_this_inning"] = opportunity.get("pa_prob_this_inning")
	row["pa_prob_next_two_innings"] = opportunity.get("pa_prob_next_two_innings")
	row["expected_pa"] = expected_pa

	confidence_block = compute_confidence(row, game_row=game_row)
	row.update(confidence_block)

	rules_block = apply_recommendation_rules(row)
	row.update(rules_block)

	recommendations.append(row)

	# Preserve lineup-order display, but compute badges from model outputs.
	recommendations = sorted(
	recommendations,
	key=lambda x: _lineup_distance_from_slot(x.get("slot", "")),
	)

	recommendations = _apply_opportunity_badges(recommendations)

	surfaced = [
	row for row in recommendations
	if str(row.get("recommendation_tier", "")).lower() in {"bet", "watch"}
	]

	if surfaced:
	return surfaced

	return recommendations[:1]