Spaces:

Syntrex
/

2026_MLB_Model

Running

2026_MLB_Model

File size: 9,964 Bytes

from __future__ import annotations

import pandas as pd

from analytics.confidence import compute_confidence
from analytics.recommendation_rules import apply_recommendation_rules
from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge, kelly_fraction, remove_vig_single_side
from models.fair_odds import probability_to_american
from models.live_fair_simulator_v3 import build_upcoming_simulated_rows
from models.opportunity_model import estimate_plate_appearance_probability
from utils.logger import logger

def _lineup_distance_from_slot(slot: str) -> int:
    s = str(slot or "").strip().lower()

    if s in {"current", "current batter", "current_batter", "now"}:
        return 0
    if s in {"on deck", "on_deck", "ondeck", "on-deck"}:
        return 1
    if s in {"in hole", "in_hole", "inhole", "in-hole"}:
        return 2
    if s in {"3 away", "three away", "3_away", "three_away", "three-away", "3-away"}:
        return 3

    return 0

def _apply_opportunity_badges(recommendations: list[dict]) -> list[dict]:
    if not recommendations:
        return recommendations

    rows = [dict(r) for r in recommendations]

    for row in rows:
        row["opportunity_badges"] = []

    def _best_row_index(metric: str) -> int | None:
        best_idx = None
        best_val = None

        for idx, row in enumerate(rows):
            value = row.get(metric)
            try:
                numeric_value = float(value)
            except Exception:
                continue

            if best_val is None or numeric_value > best_val:
                best_val = numeric_value
                best_idx = idx

        return best_idx

    best_overall_idx = _best_row_index("priority_score")
    best_hr_idx = _best_row_index("hr_edge")
    best_hit_idx = _best_row_index("hit_edge")
    best_tb_idx = _best_row_index("tb2p_edge")

    if best_overall_idx is not None:
        rows[best_overall_idx]["opportunity_badges"].append("BEST OVERALL")

    if best_hr_idx is not None:
        rows[best_hr_idx]["opportunity_badges"].append("BEST HR EDGE")

    if best_hit_idx is not None:
        rows[best_hit_idx]["opportunity_badges"].append("BEST HIT EDGE")

    if best_tb_idx is not None:
        rows[best_tb_idx]["opportunity_badges"].append("BEST TB EDGE")

    for row in rows:
        tier = str(row.get("recommendation_tier", "") or "").strip().lower()

        if tier == "bet":
            row["opportunity_badges"].append("BET")
        elif tier == "watch":
            row["opportunity_badges"].append("WATCH")

        # de-duplicate while preserving order
        seen = set()
        deduped = []
        for badge in row["opportunity_badges"]:
            if badge in seen:
                continue
            seen.add(badge)
            deduped.append(badge)

        row["opportunity_badges"] = deduped[:3]

    return rows

def build_upcoming_hitter_recommendations(
    game_row: dict,
    statcast_df: pd.DataFrame,
    pitcher_statcast_df: pd.DataFrame | None = None,
    odds_df: pd.DataFrame | None = None,
    prop_odds_df: pd.DataFrame | None = None,
    weather_row: dict | None = None,
) -> list[dict]:
    """
    Decision-layer wrapper.
    Uses simulated fair rows, then applies:
    1) opportunity adjustment
    2) confidence
    3) recommendation tier

    Suppresses low-value PASS rows unless nothing else exists.
    """
    rows = build_upcoming_simulated_rows(
        game_row=game_row,
        statcast_df=statcast_df,
        pitcher_statcast_df=pitcher_statcast_df,
        weather_row=weather_row,
    )

    # Build lookup: normalized_player_name → best HR american odds from real prop feed
    _prop_odds_lookup: dict[str, int] = {}
    if prop_odds_df is not None and not prop_odds_df.empty:
        try:
            from data.odds_name_map import map_odds_name_to_model_name
            hr_props = (
                prop_odds_df[prop_odds_df["market"].isin(["batter_home_runs", "hr"])]
                if "market" in prop_odds_df.columns
                else prop_odds_df
            )
            if not hr_props.empty and "odds_american" in hr_props.columns and "player_name" in hr_props.columns:
                # Explicit sort: MAX(odds_american) per player = best price for bettor
                best_hr = (
                    hr_props
                    .sort_values("odds_american", ascending=False)
                    .drop_duplicates(subset=["player_name"])
                )
                for _, prow in best_hr.iterrows():
                    norm_name = map_odds_name_to_model_name(str(prow.get("player_name") or ""))
                    odds_val = prow.get("odds_american")
                    if norm_name and odds_val is not None:
                        try:
                            _prop_odds_lookup[norm_name] = int(float(odds_val))
                        except (TypeError, ValueError):
                            pass
        except Exception as exc:
            logger.warning("[prop_odds_lookup] build failure: %s", exc)

    recommendations: list[dict] = []

    for row in rows:
        # Inject real book HR odds if available; fall back to simulator placeholder
        if _prop_odds_lookup:
            from data.odds_name_map import map_odds_name_to_model_name
            _norm_batter = map_odds_name_to_model_name(str(row.get("batter_name") or ""))
            _real_hr_odds = _prop_odds_lookup.get(_norm_batter)

            # Fallback: raw name match if normalized mapping misses
            if _real_hr_odds is None:
                _real_hr_odds = _prop_odds_lookup.get(str(row.get("batter_name") or ""))
                if _real_hr_odds is not None:
                    row["book_hr_odds_source"] = "live_feed_unmapped"

            if _real_hr_odds is not None:
                row["book_hr_odds"] = _real_hr_odds
                row.setdefault("book_hr_odds_source", "live_feed")
            else:
                row.setdefault("book_hr_odds_source", "placeholder")
                if prop_odds_df is not None and not prop_odds_df.empty:
                    logger.warning(
                        "[prop_odds_mapping_miss] batter=%s",
                        row.get("batter_name"),
                    )
        else:
            row.setdefault("book_hr_odds_source", "placeholder")

        slot = row.get("slot", "Current")
        lineup_distance = _lineup_distance_from_slot(slot)

        opportunity = estimate_plate_appearance_probability(
            outs=game_row.get("outs", 0),
            lineup_distance=lineup_distance,
        )

        expected_pa = float(opportunity.get("expected_pa", 1.0) or 1.0)

        # Apply opportunity adjustment to the simulated probabilities
        for prob_col in ["hit_prob", "hr_prob", "tb2p_prob"]:
            if prob_col in row and row.get(prob_col) is not None:
                try:
                    raw_prob = float(row.get(prob_col))
                    row[prob_col] = min(0.95, max(0.001, raw_prob * expected_pa))
                except Exception as e:
                    logger.warning(f"[prob_opportunity_adjust] failure: {e}", exc_info=True)

        # Recalculate fair odds and edges after probability adjustment
        if row.get("hit_prob") is not None:
            row["fair_hit_odds"] = probability_to_american(row["hit_prob"])
        if row.get("hr_prob") is not None:
            row["fair_hr_odds"] = probability_to_american(row["hr_prob"])
        if row.get("tb2p_prob") is not None:
            row["fair_tb2p_odds"] = probability_to_american(row["tb2p_prob"])

        try:
            book_hit_odds = float(row.get("book_hit_odds"))
            row["hit_edge"] = compute_edge(row["hit_prob"], american_to_implied_prob(book_hit_odds))
            row["hit_bet_ev"] = compute_bet_ev(row["hit_prob"], int(book_hit_odds))
        except Exception as e:
            logger.warning(f"[hit_edge_compute] failure: {e}", exc_info=True)

        try:
            book_hr_odds = float(row.get("book_hr_odds"))
            # HR props are single-sided markets — de-vig with flat margin, not two-way normalization
            hr_book_prob_novig = remove_vig_single_side(american_to_implied_prob(book_hr_odds))
            row["hr_edge"] = compute_edge(row["hr_prob"], hr_book_prob_novig)
            row["hr_bet_ev"] = compute_bet_ev(row["hr_prob"], int(book_hr_odds))
            row["kelly_pct"] = kelly_fraction(row["hr_prob"], int(book_hr_odds))
        except Exception as e:
            logger.warning(f"[hr_edge_compute] failure: {e}", exc_info=True)

        try:
            book_tb2p_odds = float(row.get("book_tb2p_odds"))
            row["tb2p_edge"] = compute_edge(row["tb2p_prob"], american_to_implied_prob(book_tb2p_odds))
            row["tb2p_bet_ev"] = compute_bet_ev(row["tb2p_prob"], int(book_tb2p_odds))
        except Exception as e:
            logger.warning(f"[tb2p_edge_compute] failure: {e}", exc_info=True)

        # Carry diagnostics forward
        row["lineup_distance"] = lineup_distance
        row["pa_prob_this_inning"] = opportunity.get("pa_prob_this_inning")
        row["pa_prob_next_two_innings"] = opportunity.get("pa_prob_next_two_innings")
        row["expected_pa"] = expected_pa

        confidence_block = compute_confidence(row, game_row=game_row)
        row.update(confidence_block)

        rules_block = apply_recommendation_rules(row)
        row.update(rules_block)

        recommendations.append(row)

        # Preserve lineup-order display, but compute badges from model outputs.
    recommendations = sorted(
        recommendations,
        key=lambda x: _lineup_distance_from_slot(x.get("slot", "")),
    )

    recommendations = _apply_opportunity_badges(recommendations)

    surfaced = [
        row for row in recommendations
        if str(row.get("recommendation_tier", "")).lower() in {"bet", "watch"}
    ]

    if surfaced:
        return surfaced

    return recommendations[:1]