2026_MLB_Model / analytics /recommendation_logger.py
Syntrex's picture
Batch 12.5D: XGBoost shadow model + evaluation pipeline + lineup_slot JOIN fix
71c16c1
raw
history blame
4.27 kB
from __future__ import annotations
from typing import Any
import pandas as pd
_SLOT_LINEUP = {
"On Deck": "on_deck", "on_deck": "on_deck",
"In Hole": "in_hole", "in_hole": "in_hole",
"3 Away": "three_away","three_away": "three_away",
}
def build_recommendation_log_rows(
recommendations: list[dict[str, Any]],
game_row: dict[str, Any],
created_at: str,
) -> pd.DataFrame:
rows: list[dict[str, Any]] = []
away_team = str(game_row.get("away_team", "") or "").strip()
home_team = str(game_row.get("home_team", "") or "").strip()
status = str(game_row.get("status", "") or "").strip()
game_pk = str(game_row.get("game_pk", "") or "").strip()
for rec in recommendations:
rows.append(
{
"created_at": created_at,
"game_pk": game_pk,
"away_team": away_team,
"home_team": home_team,
"status": status,
"slot": rec.get("slot"),
"batter_name": rec.get("batter_name"),
"pitcher_name": rec.get("pitcher_name"),
"ev90": rec.get("ev90"),
"hit_prob": rec.get("hit_prob"),
"hr_prob": rec.get("hr_prob"),
"tb2p_prob": rec.get("tb2p_prob"),
"fair_hit_odds": rec.get("fair_hit_odds"),
"fair_hr_odds": rec.get("fair_hr_odds"),
"fair_tb2p_odds": rec.get("fair_tb2p_odds"),
"book_hit_odds": rec.get("book_hit_odds"),
"book_hr_odds": rec.get("book_hr_odds"),
"book_tb2p_odds": rec.get("book_tb2p_odds"),
"hit_edge": rec.get("hit_edge"),
"hr_edge": rec.get("hr_edge"),
"tb2p_edge": rec.get("tb2p_edge"),
"adjusted_edge": rec.get("adjusted_edge"),
"hit_bet_ev": rec.get("hit_bet_ev"),
"hr_bet_ev": rec.get("hr_bet_ev"),
"tb2p_bet_ev": rec.get("tb2p_bet_ev"),
"confidence": rec.get("confidence"),
"confidence_bucket": rec.get("confidence_bucket"),
"recommendation_tier": rec.get("recommendation_tier"),
"priority_score": rec.get("priority_score"),
"reason_tags": " | ".join(rec.get("reason_tags", []) or []),
"starter_stays_next_batter_prob": rec.get("starter_stays_next_batter_prob"),
"starter_stays_next_inning_prob": rec.get("starter_stays_next_inning_prob"),
"bullpen_entry_prob": rec.get("bullpen_entry_prob"),
"xgb_hr_delta": rec.get("xgb_hr_delta"),
"xgb_hr_adjusted": rec.get("xgb_hr_adjusted"),
"xgb_shadow_active": rec.get("xgb_shadow_active", False),
"lineup_slot": _SLOT_LINEUP.get(rec.get("slot", ""), rec.get("slot")),
}
)
return pd.DataFrame(rows)
def build_recommendation_outcome_rows(
game_row: dict[str, Any],
graded_at: str,
) -> pd.DataFrame:
"""
Placeholder outcome scaffold.
Realized fields are left null until a later grading source is connected.
"""
away_team = str(game_row.get("away_team", "") or "").strip()
home_team = str(game_row.get("home_team", "") or "").strip()
game_pk = str(game_row.get("game_pk", "") or "").strip()
rows = []
for slot_key, slot_label in [
("on_deck_name", "On Deck"),
("in_hole_name", "In Hole"),
("three_away_name", "3 Away"),
]:
batter_name = str(game_row.get(slot_key, "") or "").strip()
if not batter_name:
continue
rows.append(
{
"created_at": None,
"game_pk": game_pk,
"away_team": away_team,
"home_team": home_team,
"batter_name": batter_name,
"slot": slot_label,
"market": "hr",
"realized_hit": None,
"realized_hr": None,
"realized_tb2p": None,
"graded_at": graded_at,
"outcome_source": "placeholder",
"lineup_slot": slot_key.replace("_name", ""),
}
)
return pd.DataFrame(rows)