Spaces:
Running
Running
| from __future__ import annotations | |
| """ | |
| Batch 13 — Full Debug Dashboard | |
| Renders the Debug navigation page. All model-layer diagnostics, | |
| adjustment ladders, signal attribution, admin tools, and audit | |
| metrics are consolidated here, replacing the Debug expander that | |
| previously lived inside render_dashboard(). | |
| """ | |
| import json | |
| from typing import Any, Callable | |
| import pandas as pd | |
| import streamlit as st | |
| from analytics.evaluation_metrics import ( | |
| build_clv_by_tier_table, | |
| build_clv_table, | |
| build_confidence_table, | |
| build_edge_bucket_table, | |
| build_ere_by_confidence_bucket_table, | |
| build_ere_by_edge_bucket_table, | |
| build_ere_by_tier_table, | |
| build_ere_table, | |
| build_hr_calibration_table, | |
| build_tier_performance_table, | |
| ) | |
| from analytics.batter_audit_metrics import ( | |
| build_batter_hr_tier_table, | |
| build_batter_hr_confidence_table, | |
| build_batter_hr_edge_table, | |
| ) | |
| from analytics.props_view_model import build_hr_props_view_model | |
| from analytics.recommendation_engine import build_upcoming_hitter_recommendations | |
| import threading | |
| from data.mlb_starters import ( | |
| build_oddsapi_starter_fallback_map, | |
| merge_probable_starters_with_odds_fallback, | |
| ) | |
| from database.db import ( | |
| get_connection, | |
| read_batter_prop_audit_view, | |
| read_batter_prop_outcomes, | |
| read_cached_probable_starters, | |
| read_cached_probable_starters_meta, | |
| read_cached_upcoming_props_bundle, | |
| read_cached_schedule_for_date, | |
| read_cached_odds, | |
| read_game_outcomes, | |
| read_pitcher_resolution_log, | |
| read_recommendation_audit_view, | |
| read_recommendation_logs_recent, | |
| ) | |
| from models.live_fair_simulator_v3 import build_upcoming_simulated_rows | |
| from models.pitcher_adjustment import build_pitcher_feature_row | |
| from utils.dates import current_wbc_date_str | |
| from visualization.props_page import ( | |
| _build_props_market_debug_payload, | |
| _ensure_props_market_payloads, | |
| ) | |
| def _load_upcoming_props_coverage_probe() -> dict[str, pd.DataFrame]: | |
| from data.live_prop_odds import fetch_upcoming_props_coverage_probe | |
| return fetch_upcoming_props_coverage_probe( | |
| sportsbooks=["draftkings", "fanduel", "betmgm"], | |
| markets=["batter_home_runs", "batter_hits", "pitcher_strikeouts"], | |
| max_events=5, | |
| ) | |
| def _load_debug_cached_source_bundle(date_str: str) -> dict[str, Any]: | |
| conn = get_connection() | |
| try: | |
| return { | |
| "schedule_cached": read_cached_schedule_for_date(conn, date_str), | |
| "odds_cached": read_cached_odds(conn), | |
| "starters_meta": read_cached_probable_starters_meta(conn), | |
| "props_cache": read_cached_upcoming_props_bundle(conn, cache_key="default"), | |
| } | |
| finally: | |
| try: | |
| conn.close() | |
| except Exception: | |
| pass | |
| def _load_debug_audit_bundle() -> dict[str, pd.DataFrame]: | |
| conn = get_connection() | |
| try: | |
| return { | |
| "batter_prop_outcomes": read_batter_prop_outcomes(conn), | |
| "game_outcomes": read_game_outcomes(conn), | |
| "recommendation_logs": read_recommendation_logs_recent(conn, limit=2000), | |
| "recommendation_audit": read_recommendation_audit_view(conn), | |
| "batter_prop_audit": read_batter_prop_audit_view(conn), | |
| } | |
| finally: | |
| try: | |
| conn.close() | |
| except Exception: | |
| pass | |
| def _props_modeled_bundle_signature(bundle: dict[str, Any] | None) -> tuple[Any, ...]: | |
| if not isinstance(bundle, dict) or not bundle: | |
| return tuple() | |
| signature: list[tuple[Any, ...]] = [] | |
| for market, payload in sorted(bundle.items()): | |
| payload = payload or {} | |
| mapped = payload.get("mapped", pd.DataFrame()) | |
| baseline_debug = payload.get("baseline_debug") or {} | |
| batter_df = payload.get("statcast_df", pd.DataFrame()) | |
| pitcher_df = payload.get("pitcher_statcast_df", pd.DataFrame()) | |
| signature.append( | |
| ( | |
| str(market or "").strip().lower(), | |
| int(len(mapped)) if isinstance(mapped, pd.DataFrame) else 0, | |
| int(len(batter_df)) if isinstance(batter_df, pd.DataFrame) else 0, | |
| int(len(pitcher_df)) if isinstance(pitcher_df, pd.DataFrame) else 0, | |
| str(baseline_debug.get("baseline_source") or ""), | |
| int(baseline_debug.get("requested_hitter_count") or 0), | |
| int(baseline_debug.get("requested_pitcher_count") or 0), | |
| ) | |
| ) | |
| return tuple(signature) | |
| def _get_props_market_debug_bundle(props_modeled_market_bundle: dict[str, Any] | None) -> dict[str, Any]: | |
| if not isinstance(props_modeled_market_bundle, dict) or not props_modeled_market_bundle: | |
| return st.session_state.get("props_market_debug_bundle") or {} | |
| signature = _props_modeled_bundle_signature(props_modeled_market_bundle) | |
| cached_signature = st.session_state.get("_props_market_debug_signature") | |
| cached_bundle = st.session_state.get("props_market_debug_bundle") | |
| if cached_signature == signature and isinstance(cached_bundle, dict) and cached_bundle: | |
| return cached_bundle | |
| derived_bundle = { | |
| market: _build_props_market_debug_payload(market_type=market, payload=payload) | |
| for market, payload in props_modeled_market_bundle.items() | |
| } | |
| st.session_state["_props_market_debug_signature"] = signature | |
| st.session_state["props_market_debug_bundle"] = derived_bundle | |
| return derived_bundle | |
| def _get_combined_props_exec_df( | |
| props_modeled_market_bundle: dict[str, Any] | None, | |
| active_exec_df: pd.DataFrame | None, | |
| ) -> pd.DataFrame | None: | |
| if not isinstance(props_modeled_market_bundle, dict) or not props_modeled_market_bundle: | |
| return active_exec_df | |
| signature = _props_modeled_bundle_signature(props_modeled_market_bundle) | |
| cached_signature = st.session_state.get("_props_exec_df_signature") | |
| cached_exec = st.session_state.get("_props_exec_df_combined") | |
| if cached_signature == signature and isinstance(cached_exec, pd.DataFrame): | |
| return cached_exec | |
| combined_exec_frames: list[pd.DataFrame] = [] | |
| for payload in props_modeled_market_bundle.values(): | |
| mapped = payload.get("mapped", pd.DataFrame()) if isinstance(payload, dict) else pd.DataFrame() | |
| if isinstance(mapped, pd.DataFrame) and not mapped.empty: | |
| combined_exec_frames.append(mapped.copy()) | |
| exec_df = ( | |
| pd.concat(combined_exec_frames, ignore_index=True, sort=False) | |
| if combined_exec_frames | |
| else active_exec_df | |
| ) | |
| st.session_state["_props_exec_df_signature"] = signature | |
| st.session_state["_props_exec_df_combined"] = exec_df | |
| return exec_df | |
| # --------------------------------------------------------------------------- | |
| # Ladder definition — HR prob checkpoint fields in output dict order | |
| # --------------------------------------------------------------------------- | |
| _LADDER_HR_FIELDS = [ | |
| ("Baseline", "snap_baseline_hr"), | |
| ("After Trend", "snap_after_trend_hr"), | |
| ("After Zone/Family Dedup", "snap_after_zone_dedup_hr"), | |
| ("After Arsenal", "snap_after_arsenal_hr"), | |
| ("After Pulled Contact", "snap_after_pulled_contact_hr"), | |
| ("After Env", "snap_after_env_hr"), | |
| ("After Platoon", "snap_after_platoon_hr"), | |
| ("After Trajectory", "snap_after_traj_hr"), | |
| ("After Rolling", "snap_after_rolling_hr"), | |
| ("After Opportunity", "snap_after_opportunity_hr"), | |
| ("After Drift", "snap_after_drift_hr"), | |
| ("Final (simulated)", "hr_prob"), | |
| ] | |
| _LADDER_HIT_FIELDS = [ | |
| ("Baseline", "snap_baseline_hit"), | |
| ("After Trend", "snap_after_trend_hit"), | |
| ("After Zone/Family Dedup", "snap_after_zone_dedup_hit"), | |
| ("After Arsenal", "snap_after_arsenal_hit"), | |
| ("After Pulled Contact", "snap_after_pulled_contact_hit"), | |
| ("After Env", "snap_after_env_hit"), | |
| ("After Platoon", "snap_after_platoon_hit"), | |
| ("After Trajectory", "snap_after_traj_hit"), | |
| ("After Rolling", "snap_after_rolling_hit"), | |
| ("After Opportunity", "snap_after_opportunity_hit"), | |
| ("After Drift", "snap_after_drift_hit"), | |
| ("Final (simulated)", "hit_prob"), | |
| ] | |
| _LADDER_TB2P_FIELDS = [ | |
| ("Baseline", "snap_baseline_tb2p"), | |
| ("After Trend", "snap_after_trend_tb2p"), | |
| ("After Zone/Family Dedup", "snap_after_zone_dedup_tb2p"), | |
| ("After Arsenal", "snap_after_arsenal_tb2p"), | |
| ("After Pulled Contact", "snap_after_pulled_contact_tb2p"), | |
| ("After Env", "snap_after_env_tb2p"), | |
| ("After Platoon", "snap_after_platoon_tb2p"), | |
| ("After Trajectory", "snap_after_traj_tb2p"), | |
| ("After Rolling", "snap_after_rolling_tb2p"), | |
| ("After Opportunity", "snap_after_opportunity_tb2p"), | |
| ("After Drift", "snap_after_drift_tb2p"), | |
| ("Final (simulated)", "tb2p_prob"), | |
| ] | |
| _MODEL_RUBRIC_WEIGHTS = { | |
| "shared_telemetry": 18, | |
| "explicit_opportunity": 16, | |
| "explicit_components": 18, | |
| "pitch_level_backbone": 12, | |
| "hr_damage_modeling": 12, | |
| "k_shadow_v2": 10, | |
| "provenance_debug": 8, | |
| "uncertainty_outputs": 6, | |
| } | |
| def _build_model_upgrade_rubric( | |
| props_hr_health_debug: dict[str, Any] | None, | |
| shared_component_debug: dict[str, Any] | None, | |
| ) -> tuple[pd.DataFrame, dict[str, Any]]: | |
| shared_component_debug = shared_component_debug or {} | |
| props_hr_health_debug = props_hr_health_debug or {} | |
| rows = pd.DataFrame(shared_component_debug.get("rows") or []) | |
| has_shared = not rows.empty | |
| has_hr_components = has_shared and any( | |
| col in rows.columns | |
| for col in [ | |
| "damage_zone_alignment_subscore", | |
| "pitch_mix_exposure_subscore", | |
| "tunnel_damage_subscore", | |
| "count_pattern_damage_subscore", | |
| ] | |
| ) | |
| has_k_v2 = has_shared and ( | |
| ("market_family" in rows.columns and rows["market_family"].astype(str).str.lower().eq("k").any()) | |
| or "expected_strikeouts_v2" in rows.columns | |
| or "expected_strikeouts" in rows.columns | |
| ) | |
| has_opportunity = has_shared and any( | |
| col in rows.columns | |
| for col in ["projected_pitch_count", "projected_batters_faced", "projected_innings"] | |
| ) | |
| has_uncertainty = has_shared and any( | |
| col in rows.columns | |
| for col in ["variance_band_low", "variance_band_high", "matchup_coverage_confidence"] | |
| ) | |
| has_provenance = has_shared and "component_source_map" in rows.columns | |
| has_pitch_backbone = has_shared and any( | |
| col in rows.columns | |
| for col in [ | |
| "zone_matchup_subscore", | |
| "family_zone_matchup_subscore", | |
| "arsenal_fit_subscore", | |
| "tunneling_subscore", | |
| "sequencing_subscore", | |
| ] | |
| ) | |
| has_explicit_components = has_shared and any( | |
| col in rows.columns | |
| for col in [ | |
| "damage_zone_alignment_subscore", | |
| "arsenal_fit_subscore", | |
| "zone_matchup_subscore", | |
| "count_leverage_subscore", | |
| ] | |
| ) | |
| grade_rows = [ | |
| { | |
| "category": "Shared telemetry framework", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["shared_telemetry"], | |
| "old_system": 2, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["shared_telemetry"] if has_shared else 0, | |
| "status": "active" if has_shared else "missing", | |
| "evidence": "shared_matchup_engine + shared diagnostics" if has_shared else "not captured yet", | |
| }, | |
| { | |
| "category": "Explicit opportunity modeling", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["explicit_opportunity"], | |
| "old_system": 3, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["explicit_opportunity"] if has_opportunity else 0, | |
| "status": "active" if has_opportunity else "missing", | |
| "evidence": "projected pitch count / BF / innings" if has_opportunity else "old heuristic only", | |
| }, | |
| { | |
| "category": "Explicit modeled components", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["explicit_components"], | |
| "old_system": 8, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["explicit_components"] if has_explicit_components else 0, | |
| "status": "active" if has_explicit_components else "partial", | |
| "evidence": "named subscores emitted" if has_explicit_components else "implicit heuristics", | |
| }, | |
| { | |
| "category": "Pitch-level backbone", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["pitch_level_backbone"], | |
| "old_system": 8, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["pitch_level_backbone"] if has_pitch_backbone else 0, | |
| "status": "active" if has_pitch_backbone else "missing", | |
| "evidence": "zone/family-zone/arsenal/tunnel/sequencing present" if has_pitch_backbone else "not surfaced", | |
| }, | |
| { | |
| "category": "HR damage-zone modeling", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["hr_damage_modeling"], | |
| "old_system": 8, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["hr_damage_modeling"] if has_hr_components else 0, | |
| "status": "active" if has_hr_components else "missing", | |
| "evidence": "damage-zone and pitch-mix exposure subscores" if has_hr_components else "legacy HR layers only", | |
| }, | |
| { | |
| "category": "K shadow v2 readiness", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["k_shadow_v2"], | |
| "old_system": 0, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["k_shadow_v2"] if has_k_v2 else 0, | |
| "status": "active" if has_k_v2 else "missing", | |
| "evidence": "strikeout v2 outputs captured" if has_k_v2 else "current K engine only", | |
| }, | |
| { | |
| "category": "Provenance and debug traceability", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["provenance_debug"], | |
| "old_system": 4, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["provenance_debug"] if has_provenance else 0, | |
| "status": "active" if has_provenance else "missing", | |
| "evidence": "component_source_map exposed" if has_provenance else "limited traceability", | |
| }, | |
| { | |
| "category": "Uncertainty outputs", | |
| "weight": _MODEL_RUBRIC_WEIGHTS["uncertainty_outputs"], | |
| "old_system": 3, | |
| "current_system": _MODEL_RUBRIC_WEIGHTS["uncertainty_outputs"] if has_uncertainty else 0, | |
| "status": "active" if has_uncertainty else "missing", | |
| "evidence": "variance bands / coverage confidence" if has_uncertainty else "point estimate only", | |
| }, | |
| ] | |
| rubric_df = pd.DataFrame(grade_rows) | |
| summary = { | |
| "old_architecture_score": int(rubric_df["old_system"].sum()), | |
| "current_architecture_score": int(rubric_df["current_system"].sum()), | |
| "max_score": int(rubric_df["weight"].sum()), | |
| "modeled_hr_rows_total": int(props_hr_health_debug.get("modeled_hr_rows_total") or 0), | |
| } | |
| return rubric_df, summary | |
| # --------------------------------------------------------------------------- | |
| # Private diagnostic helpers | |
| # --------------------------------------------------------------------------- | |
| def _query_db_inventory(conn) -> pd.DataFrame: | |
| """ | |
| List all BASE TABLEs in the public schema with their row counts. | |
| Queries information_schema.tables, then runs COUNT(*) per table. | |
| Returns DataFrame with columns [table_name, row_count], sorted by table_name. | |
| """ | |
| from sqlalchemy import text as _t | |
| try: | |
| names_df = pd.read_sql( | |
| _t(""" | |
| SELECT table_name | |
| FROM information_schema.tables | |
| WHERE table_schema = 'public' | |
| AND table_type = 'BASE TABLE' | |
| ORDER BY table_name | |
| """), | |
| conn, | |
| ) | |
| except Exception: | |
| return pd.DataFrame(columns=["table_name", "row_count"]) | |
| rows = [] | |
| for tbl in names_df["table_name"].tolist(): | |
| try: | |
| n = conn.execute(_t(f"SELECT COUNT(*) FROM {tbl}")).scalar() | |
| except Exception: | |
| n = None | |
| rows.append({"table_name": tbl, "row_count": n}) | |
| return pd.DataFrame(rows) | |
| def _get_table_columns(conn, table_name: str) -> set: | |
| """Return the set of column names for a table via information_schema.""" | |
| from sqlalchemy import text as _t | |
| try: | |
| df = pd.read_sql( | |
| _t(""" | |
| SELECT column_name | |
| FROM information_schema.columns | |
| WHERE table_schema = 'public' | |
| AND table_name = :tbl | |
| """), | |
| conn, | |
| params={"tbl": table_name}, | |
| ) | |
| return set(df["column_name"].tolist()) | |
| except Exception: | |
| return set() | |
| def _build_coverage_diagnostics(conn) -> list[dict]: | |
| """ | |
| For each key baseball table, audit columns at runtime then collect only | |
| the metrics that the table supports. Never guesses column names. | |
| Returns a list of dicts for display. | |
| """ | |
| from sqlalchemy import text as _t | |
| KEY_TABLES = [ | |
| "game_outcomes", | |
| "statcast_event_core", | |
| "live_pitch_mix_2026", | |
| "live_batter_game_log_2026", | |
| "batter_zone_events", | |
| "pitcher_inning_first_seed_events", | |
| ] | |
| results = [] | |
| for tbl in KEY_TABLES: | |
| cols = _get_table_columns(conn, tbl) | |
| if not cols: | |
| results.append({"table": tbl, "status": "not found or empty schema"}) | |
| continue | |
| info: dict = {"table": tbl} | |
| # Row count — always available | |
| try: | |
| info["row_count"] = conn.execute(_t(f"SELECT COUNT(*) FROM {tbl}")).scalar() | |
| except Exception as e: | |
| info["row_count"] = f"error: {e}" | |
| # Distinct game_pk count (column may be TEXT or BIGINT) | |
| if "game_pk" in cols: | |
| try: | |
| info["distinct_game_pks"] = conn.execute( | |
| _t(f"SELECT COUNT(DISTINCT game_pk) FROM {tbl} WHERE game_pk IS NOT NULL") | |
| ).scalar() | |
| except Exception: | |
| info["distinct_game_pks"] = "error" | |
| # Latest game_date | |
| if "game_date" in cols: | |
| try: | |
| info["latest_game_date"] = conn.execute( | |
| _t(f"SELECT MAX(game_date) FROM {tbl}") | |
| ).scalar() | |
| except Exception: | |
| info["latest_game_date"] = "error" | |
| # Latest graded_at | |
| if "graded_at" in cols: | |
| try: | |
| info["latest_graded_at"] = conn.execute( | |
| _t(f"SELECT MAX(graded_at) FROM {tbl}") | |
| ).scalar() | |
| except Exception: | |
| info["latest_graded_at"] = "error" | |
| # Latest source_season | |
| if "source_season" in cols: | |
| try: | |
| info["latest_source_season"] = conn.execute( | |
| _t(f"SELECT MAX(source_season) FROM {tbl}") | |
| ).scalar() | |
| except Exception: | |
| info["latest_source_season"] = "error" | |
| results.append(info) | |
| return results | |
| def _build_overlap_diagnostics(conn) -> dict: | |
| """ | |
| Compare game_outcomes.game_pk (TEXT) against statcast_event_core, live_pitch_mix_2026, | |
| and live_batter_game_log_2026. Returns counts for: total final games, covered, missing. | |
| Returns empty dict if required columns are missing. Builds EXISTS clauses only for | |
| tables that actually exist, so missing tables never raise UndefinedTable errors. | |
| """ | |
| from sqlalchemy import text as _t | |
| go_cols = _get_table_columns(conn, "game_outcomes") | |
| sc_cols = _get_table_columns(conn, "statcast_event_core") | |
| lpm_cols = _get_table_columns(conn, "live_pitch_mix_2026") | |
| lpa_cols = _get_table_columns(conn, "live_batter_game_log_2026") | |
| if "game_pk" not in go_cols: | |
| return {} | |
| exists_clauses = [] | |
| if "game_pk" in sc_cols: | |
| exists_clauses.append( | |
| "EXISTS (SELECT 1 FROM statcast_event_core s WHERE s.game_pk = g.game_pk::BIGINT)" | |
| ) | |
| if "game_pk" in lpm_cols: | |
| exists_clauses.append( | |
| "EXISTS (SELECT 1 FROM live_pitch_mix_2026 lpm WHERE lpm.game_pk = g.game_pk::BIGINT)" | |
| ) | |
| if "game_pk" in lpa_cols: | |
| exists_clauses.append( | |
| "EXISTS (SELECT 1 FROM live_batter_game_log_2026 lpa WHERE lpa.game_pk = g.game_pk::BIGINT)" | |
| ) | |
| if not exists_clauses: | |
| return {} | |
| try: | |
| total = conn.execute( | |
| _t("SELECT COUNT(DISTINCT game_pk) FROM game_outcomes WHERE game_pk IS NOT NULL AND game_pk != ''") | |
| ).scalar() | |
| union_sql = " OR ".join(exists_clauses) | |
| covered = conn.execute( | |
| _t(f""" | |
| SELECT COUNT(DISTINCT g.game_pk) | |
| FROM game_outcomes g | |
| WHERE g.game_pk IS NOT NULL | |
| AND g.game_pk != '' | |
| AND ({union_sql}) | |
| """) | |
| ).scalar() | |
| return { | |
| "total_game_outcomes_game_pks": total, | |
| "with_statcast_or_2026_coverage": covered, | |
| "missing_coverage": (total or 0) - (covered or 0), | |
| } | |
| except Exception as exc: | |
| return {"error": str(exc)} | |
| # --------------------------------------------------------------------------- | |
| # Public entry point | |
| # --------------------------------------------------------------------------- | |
| def render_debug( | |
| statcast_df: pd.DataFrame, | |
| pitcher_statcast_df: pd.DataFrame | None, | |
| odds_df: pd.DataFrame | None, | |
| conn: Any, | |
| live_games: pd.DataFrame, | |
| scores_df: pd.DataFrame, | |
| upcoming_props_debug: dict[str, pd.DataFrame] | None = None, | |
| baseline_bundle: dict[str, pd.DataFrame] | None = None, | |
| prepared_live_games_df: pd.DataFrame | None = None, | |
| grade_outcomes_fn: Callable | None = None, | |
| grade_props_fn: Callable | None = None, | |
| fill_realized_fn: Callable | None = None, | |
| debug_event_row_status: dict[str, dict[str, Any]] | None = None, | |
| ) -> None: | |
| """ | |
| Full Debug Dashboard page. | |
| Parameters | |
| ---------- | |
| statcast_df : blended batter baseline dataframe | |
| pitcher_statcast_df : blended pitcher baseline dataframe | |
| odds_df : odds dataframe (may be None / empty) | |
| conn : active DB connection | |
| live_games : raw live games DataFrame | |
| scores_df : scores feed DataFrame | |
| prepared_live_games_df : optional pre-enriched live games (avoids re-enrichment) | |
| grade_outcomes_fn : callable(scores_df) → grade final game outcomes | |
| grade_props_fn : callable() → grade batter prop outcomes from audit | |
| fill_realized_fn : callable(statcast_df) → fill realized batter outcomes | |
| """ | |
| st.header("Debug Dashboard") | |
| st.caption("Model diagnostics, adjustment ladders, signal attribution, and admin tools.") | |
| debug_source_bundle = _load_debug_cached_source_bundle(current_wbc_date_str()) | |
| _audit_result: list[dict] = [] | |
| def _fetch_audit() -> None: | |
| _audit_result.append(_load_debug_audit_bundle()) | |
| _audit_thread = threading.Thread(target=_fetch_audit, daemon=True) | |
| _audit_thread.start() | |
| _audit_thread.join(timeout=15) | |
| debug_audit_bundle: dict = _audit_result[0] if _audit_result else {} | |
| if upcoming_props_debug is not None: | |
| debug_source_bundle["props_cache"] = upcoming_props_debug | |
| # ------------------------------------------------------------------ | |
| # Resolve prepared live games | |
| # ------------------------------------------------------------------ | |
| if prepared_live_games_df is None or prepared_live_games_df.empty: | |
| prep_df = pd.DataFrame() | |
| else: | |
| prep_df = prepared_live_games_df | |
| # ------------------------------------------------------------------ | |
| # SECTION 1 — Filters | |
| # ------------------------------------------------------------------ | |
| st.subheader("Filters") | |
| col_game, col_player, col_team, col_edge = st.columns(4) | |
| with col_game: | |
| game_options: list[str] = [] | |
| if not prep_df.empty and "away_team" in prep_df.columns and "home_team" in prep_df.columns: | |
| game_options = [ | |
| f"{row.get('away_team','?')} @ {row.get('home_team','?')}" | |
| for _, row in prep_df.iterrows() | |
| ] | |
| selected_games = st.multiselect("Games", options=game_options, default=[]) | |
| with col_player: | |
| player_filter = st.text_input("Player filter", value="") | |
| with col_team: | |
| team_options: list[str] = [] | |
| if not prep_df.empty: | |
| for col in ("away_team", "home_team"): | |
| if col in prep_df.columns: | |
| team_options += prep_df[col].dropna().astype(str).unique().tolist() | |
| team_options = sorted(set(team_options)) | |
| selected_teams = st.multiselect("Teams", options=team_options, default=[]) | |
| with col_edge: | |
| edge_threshold = st.slider("Min HR edge (%)", min_value=0, max_value=30, value=0, step=1) | |
| # ------------------------------------------------------------------ | |
| # Run simulator for selected games | |
| # ------------------------------------------------------------------ | |
| all_sim_rows: list[dict] = [] | |
| if not prep_df.empty: | |
| for _, live_row in prep_df.iterrows(): | |
| game = live_row.to_dict() | |
| game_label = f"{game.get('away_team','?')} @ {game.get('home_team','?')}" | |
| # Apply game filter | |
| if selected_games and game_label not in selected_games: | |
| continue | |
| try: | |
| sim_rows = build_upcoming_simulated_rows( | |
| game_row=game, | |
| statcast_df=statcast_df, | |
| pitcher_statcast_df=pitcher_statcast_df, | |
| weather_row=None, | |
| ) | |
| except Exception as e: | |
| all_sim_rows.append({"game": game_label, "batter_name": "ERROR", "debug_note": str(e)}) | |
| continue | |
| for row in (sim_rows or []): | |
| if isinstance(row, dict): | |
| row["_game_label"] = game_label | |
| all_sim_rows.append(row) | |
| # Apply player / team filters | |
| filtered_rows = all_sim_rows | |
| if player_filter.strip(): | |
| pf = player_filter.strip().lower() | |
| filtered_rows = [r for r in filtered_rows if pf in str(r.get("batter_name", "")).lower()] | |
| if selected_teams: | |
| filtered_rows = [ | |
| r for r in filtered_rows | |
| if any(t in r.get("_game_label", "") for t in selected_teams) | |
| ] | |
| sim_df = pd.DataFrame(filtered_rows) if filtered_rows else pd.DataFrame() | |
| # ------------------------------------------------------------------ | |
| # SECTION 2 — Model Snapshot Table | |
| # ------------------------------------------------------------------ | |
| st.subheader("Model Snapshot") | |
| if sim_df.empty: | |
| st.info("No simulation rows available. Load live games and statcast data first.") | |
| else: | |
| snapshot_cols = [ | |
| c for c in [ | |
| "_game_label", "slot", "batter_name", "pitcher_name", | |
| "hit_prob", "hr_prob", "tb2p_prob", | |
| "fair_hr_odds", "book_hr_odds", "hr_edge", | |
| "pa_multiplier", "pitcher_quality_score", "opportunity_mode", | |
| "rolling_combined_form_score", "arsenal_drift_score", | |
| "bullpen_top_candidate", "bullpen_entry_prob", | |
| ] if c in sim_df.columns | |
| ] | |
| display_df = sim_df[snapshot_cols].copy() | |
| # Apply edge threshold filter | |
| if edge_threshold > 0 and "hr_edge" in display_df.columns: | |
| display_df = display_df[ | |
| pd.to_numeric(display_df["hr_edge"], errors="coerce").fillna(0) >= edge_threshold / 100.0 | |
| ] | |
| st.dataframe(display_df, use_container_width=True, hide_index=True) | |
| # ------------------------------------------------------------------ | |
| # SECTION 3 — Adjustment Ladder (per batter, exact checkpoints) | |
| # ------------------------------------------------------------------ | |
| st.subheader("Adjustment Ladder (HR probability)") | |
| if sim_df.empty: | |
| st.info("No simulation data loaded.") | |
| else: | |
| for _, brow in sim_df.iterrows(): | |
| batter = str(brow.get("batter_name", "?")) | |
| game = str(brow.get("_game_label", "")) | |
| label = f"{batter} — {game}" | |
| with st.expander(label, expanded=False): | |
| ladder_metric = st.selectbox( | |
| "Ladder metric", | |
| options=["HR", "Hit", "TB2P"], | |
| index=0, | |
| key=f"ladder_metric_{batter}", | |
| ) | |
| ladder_fields = ( | |
| _LADDER_HR_FIELDS if ladder_metric == "HR" | |
| else _LADDER_HIT_FIELDS if ladder_metric == "Hit" | |
| else _LADDER_TB2P_FIELDS | |
| ) | |
| ladder_rows = [] | |
| prev_val: float | None = None | |
| for layer_name, field in ladder_fields: | |
| val = brow.get(field) | |
| if val is not None: | |
| try: | |
| val_f = float(val) | |
| except (TypeError, ValueError): | |
| val_f = None | |
| else: | |
| val_f = None | |
| delta_str = "" | |
| if val_f is not None and prev_val is not None: | |
| delta = val_f - prev_val | |
| delta_str = f"{delta:+.4f}" | |
| elif val_f is not None and prev_val is None: | |
| delta_str = "—" | |
| ladder_rows.append({ | |
| "Layer": layer_name, | |
| "Delta": delta_str, | |
| f"Cumulative {ladder_metric} prob": f"{val_f:.4f}" if val_f is not None else "—", | |
| }) | |
| if val_f is not None: | |
| prev_val = val_f | |
| st.dataframe( | |
| pd.DataFrame(ladder_rows), | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| # Opportunity mode display | |
| opp_mode = brow.get("opportunity_mode") | |
| if opp_mode: | |
| st.caption( | |
| f"Opportunity mode: **{opp_mode}** | " | |
| f"pa_multiplier={brow.get('pa_multiplier', '?')} | " | |
| f"lineup_slot_used={brow.get('lineup_slot_used', 'None')} | " | |
| f"team_total_used={brow.get('team_total_used', 'None')}" | |
| ) | |
| # ------------------------------------------------------------------ | |
| # SECTION 4 — Full Feature Snapshot | |
| # ------------------------------------------------------------------ | |
| st.subheader("Feature Snapshot (per batter)") | |
| if not prep_df.empty and not sim_df.empty: | |
| batter_names = sim_df["batter_name"].dropna().unique().tolist() if "batter_name" in sim_df.columns else [] | |
| selected_batter = st.selectbox("Select batter", options=["—"] + batter_names) | |
| if selected_batter and selected_batter != "—": | |
| from models.batter_baseline import build_batter_feature_row # local import to avoid circular | |
| try: | |
| batter_features = build_batter_feature_row(statcast_df, selected_batter) | |
| except Exception: | |
| batter_features = {} | |
| # Get pitcher from first sim row for this batter | |
| batter_rows = sim_df[sim_df["batter_name"] == selected_batter] | |
| pitcher_name = batter_rows.iloc[0].get("pitcher_name", "") if not batter_rows.empty else "" | |
| try: | |
| pitcher_row = build_pitcher_feature_row(statcast_df, pitcher_name) | |
| except Exception: | |
| pitcher_row = {} | |
| col_b, col_p = st.columns(2) | |
| with col_b: | |
| with st.expander("Batter features", expanded=True): | |
| st.json({k: (v if v is not None else None) for k, v in batter_features.items()}) | |
| with col_p: | |
| with st.expander("Pitcher row", expanded=True): | |
| st.json({k: (v if v is not None else None) for k, v in pitcher_row.items()}) | |
| # ------------------------------------------------------------------ | |
| # SECTION 5 — Signal Attribution | |
| # ------------------------------------------------------------------ | |
| st.subheader("Signal Attribution") | |
| if not sim_df.empty: | |
| tag_rows = [] | |
| for _, srow in sim_df.iterrows(): | |
| batter = srow.get("batter_name", "?") | |
| game = srow.get("_game_label", "") | |
| for tag_field, source in [ | |
| ("rolling_adjustment_reason_tags", "Rolling"), | |
| ("arsenal_reason_tags", "Drift"), | |
| ("reason_tags", "Pitcher Live"), | |
| ]: | |
| tags_val = srow.get(tag_field, "") | |
| if isinstance(tags_val, list): | |
| tags = tags_val | |
| elif isinstance(tags_val, str) and tags_val: | |
| tags = [t.strip() for t in tags_val.split("|") if t.strip()] | |
| else: | |
| tags = [] | |
| for tag in tags: | |
| tag_rows.append({"Game": game, "Batter": batter, "Source": source, "Tag": tag}) | |
| if tag_rows: | |
| st.dataframe(pd.DataFrame(tag_rows), use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No active signal tags for filtered batters.") | |
| # ------------------------------------------------------------------ | |
| # SECTION 5b — Bullpen Candidates | |
| # ------------------------------------------------------------------ | |
| if not sim_df.empty: | |
| with st.expander("Bullpen Candidates", expanded=False): | |
| bullpen_cols = [ | |
| "batter_name", "pitcher_name", | |
| "bullpen_top_candidate", "bullpen_top_candidate_availability", | |
| "bullpen_top_candidate_handedness_fit", "bullpen_top_candidate_role_fit", | |
| "bullpen_candidate_1", "bullpen_candidate_2", "bullpen_candidate_3", | |
| "bullpen_candidate_summary", "bullpen_selection_mode", | |
| "bullpen_availability_applied", "bullpen_entry_prob", | |
| "starter_stays_next_batter_prob", | |
| ] | |
| available_cols = [c for c in bullpen_cols if c in sim_df.columns] | |
| if available_cols: | |
| st.dataframe(sim_df[available_cols], use_container_width=True) | |
| else: | |
| st.info("Bullpen candidate data not available.") | |
| # ------------------------------------------------------------------ | |
| # SECTION 5c — Execution Layer | |
| # ------------------------------------------------------------------ | |
| active_exec_df = st.session_state.get("props_exec_df") | |
| props_raw_feed = st.session_state.get("props_raw_feed") | |
| props_prepared_bundle = st.session_state.get("props_prepared_bundle") or {} | |
| props_supported_markets = tuple(st.session_state.get("props_supported_markets") or []) | |
| props_modeled_market_bundle = st.session_state.get("props_modeled_market_bundle") or {} | |
| if ( | |
| isinstance(props_raw_feed, pd.DataFrame) | |
| and not props_raw_feed.empty | |
| and isinstance(props_prepared_bundle, dict) | |
| and props_prepared_bundle | |
| and props_supported_markets | |
| ): | |
| missing_markets = tuple( | |
| market for market in props_supported_markets | |
| if str(market).strip().lower() not in props_modeled_market_bundle | |
| ) | |
| if missing_markets: | |
| props_modeled_market_bundle = _ensure_props_market_payloads( | |
| raw=props_raw_feed, | |
| prepared_bundle=props_prepared_bundle, | |
| existing_payloads=props_modeled_market_bundle, | |
| markets=missing_markets, | |
| capture_debug=False, | |
| ) | |
| st.session_state["props_modeled_market_bundle"] = props_modeled_market_bundle | |
| props_market_debug_bundle = _get_props_market_debug_bundle(props_modeled_market_bundle) | |
| exec_df = _get_combined_props_exec_df(props_modeled_market_bundle, active_exec_df) | |
| with st.expander("Execution Layer (Props)", expanded=False): | |
| if exec_df is None or (isinstance(exec_df, pd.DataFrame) and exec_df.empty): | |
| st.info("No execution layer data. Visit the Props tab first.") | |
| else: | |
| exec_cols = [ | |
| "player_name", "sportsbook", | |
| "edge_raw", "edge_filtered", "execution_confidence_score", | |
| "execution_volatility_score", "execution_signal_strength_score", | |
| "market_width", "market_outlier_flag", "stale_book_flag", | |
| "timing_flag", "timing_reason", | |
| "correlation_flag", "correlation_direction", | |
| "final_recommendation_score", "edge_filter_flags", | |
| ] | |
| available = [c for c in exec_cols if c in exec_df.columns] | |
| if available: | |
| sort_col = "final_recommendation_score" | |
| display_exec = exec_df[available].copy() | |
| if sort_col in display_exec.columns: | |
| display_exec = display_exec.sort_values( | |
| sort_col, ascending=False, na_position="last" | |
| ) | |
| st.dataframe(display_exec, use_container_width=True, hide_index=True) | |
| else: | |
| st.info("Execution layer fields not present in props data.") | |
| st.markdown("### Props Page View Model") | |
| with st.container(): | |
| if exec_df is None or (isinstance(exec_df, pd.DataFrame) and exec_df.empty): | |
| st.info("No mapped props data is available. Visit the Props tab first.") | |
| else: | |
| props_vm = st.session_state.get("props_view_model_bundle") | |
| if not isinstance(props_vm, dict) or not props_vm: | |
| hr_payload = (props_modeled_market_bundle.get("hr") or {}) if isinstance(props_modeled_market_bundle, dict) else {} | |
| hr_exec_df = hr_payload.get("mapped", pd.DataFrame()) if isinstance(hr_payload, dict) else pd.DataFrame() | |
| if isinstance(hr_exec_df, pd.DataFrame) and not hr_exec_df.empty: | |
| props_vm = build_hr_props_view_model(hr_exec_df) | |
| else: | |
| hr_fallback_df = exec_df[ | |
| exec_df.get("market_family", pd.Series(index=exec_df.index, dtype="object")) | |
| .astype(str) | |
| .str.lower() | |
| .eq("hr") | |
| ].copy() | |
| props_vm = build_hr_props_view_model(hr_fallback_df) if not hr_fallback_df.empty else {} | |
| vm_tab_normalized, vm_tab_featured, vm_tab_grouped, vm_tab_details, vm_tab_layers = st.tabs( | |
| ["Normalized", "Featured", "Grouped", "Player Detail", "Matchup Layers"] | |
| ) | |
| normalized_cols = [ | |
| "event_id", | |
| "away_team", | |
| "home_team", | |
| "commence_time", | |
| "player_name_raw", | |
| "player_name", | |
| "sportsbook", | |
| "market_family", | |
| "market_variant", | |
| "selection_scope", | |
| "selection_side", | |
| "threshold", | |
| "display_label", | |
| "is_primary_line", | |
| "is_modeled", | |
| "player_event_market_key", | |
| "odds_american", | |
| "implied_prob", | |
| "raw_hr_prob", | |
| "calibrated_hr_prob", | |
| "model_hr_prob", | |
| "fair_prob", | |
| "bet_ev", | |
| "verdict", | |
| "model_voice", | |
| "model_voice_primary_reason", | |
| "model_voice_caveat", | |
| "model_voice_tags", | |
| "model_voice_for", | |
| "model_voice_against", | |
| "confidence_score", | |
| "confidence_bucket", | |
| "opportunity_hr_adjustment", | |
| "expected_pa", | |
| "lineup_slot_used", | |
| "lineup_slot_source", | |
| "team_total_used", | |
| "batter_team", | |
| "batter_team_source", | |
| "projected_home_pitcher", | |
| "projected_away_pitcher", | |
| "projected_starter_available", | |
| "projected_home_pitcher_source", | |
| "projected_away_pitcher_source", | |
| "starter_cache_source", | |
| "fallback_used", | |
| "projected_starter_match_status", | |
| "resolved_pitcher_name", | |
| "resolved_pitcher_source", | |
| "pitcher_resolution_status", | |
| "telemetry_path_status", | |
| "hr_model_tier", | |
| "shared_matchup_available", | |
| "modeled_row_available", | |
| "modeled_row_missing_reason", | |
| "pitcher_hand", | |
| "pitcher_hand_source", | |
| "zone_status", | |
| "family_zone_status", | |
| "arsenal_status", | |
| "zone_store_sample_size", | |
| "family_zone_batter_sample_size", | |
| "family_zone_pitcher_sample_size", | |
| "arsenal_batter_sample_size", | |
| "arsenal_pitcher_sample_size", | |
| "reason_candidate_count", | |
| "edge", | |
| "model_hr_prob_source", | |
| ] | |
| with vm_tab_normalized: | |
| st.write("Normalized props rows") | |
| st.dataframe( | |
| exec_df[[c for c in normalized_cols if c in exec_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| featured_df = props_vm.get("featured_props_df", pd.DataFrame()) | |
| best_on_slate_debug = st.session_state.get("props_best_on_slate_debug") or {} | |
| best_on_slate_df = pd.DataFrame(best_on_slate_debug.get("rows") or []) | |
| with vm_tab_featured: | |
| st.write("Featured props input") | |
| if featured_df.empty: | |
| st.info("No featured props are currently available.") | |
| else: | |
| featured_cols = [ | |
| "event_id", | |
| "player_name_raw", | |
| "sportsbook", | |
| "display_label", | |
| "odds_american", | |
| "implied_prob", | |
| "raw_hr_prob", | |
| "model_hr_prob", | |
| "fair_prob", | |
| "bet_ev", | |
| "verdict", | |
| "confidence_score", | |
| "edge", | |
| "projected_starter_match_status", | |
| "resolved_pitcher_name", | |
| "pitcher_resolution_status", | |
| "telemetry_path_status", | |
| "hr_model_tier", | |
| "modeled_row_available", | |
| "modeled_row_missing_reason", | |
| "zone_status", | |
| "family_zone_status", | |
| "arsenal_status", | |
| "reason_candidate_count", | |
| "final_recommendation_score", | |
| "featured_value_score", | |
| ] | |
| st.dataframe( | |
| featured_df[[c for c in featured_cols if c in featured_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| st.write("Best on slate input") | |
| slate_summary = pd.DataFrame([best_on_slate_debug.get("summary") or {}]) | |
| if not slate_summary.empty and slate_summary.notna().any(axis=None): | |
| st.dataframe(slate_summary, use_container_width=True, hide_index=True) | |
| if best_on_slate_df.empty: | |
| st.info("No slate-wide best-value props are currently available.") | |
| else: | |
| slate_cols = [ | |
| "event_id", | |
| "player_name_raw", | |
| "sportsbook", | |
| "market_family", | |
| "display_label", | |
| "odds_american", | |
| "implied_prob", | |
| "model_hr_prob", | |
| "fair_prob", | |
| "bet_ev", | |
| "verdict", | |
| "confidence_score", | |
| "edge", | |
| "final_recommendation_score", | |
| "featured_value_score", | |
| ] | |
| st.dataframe( | |
| best_on_slate_df[[c for c in slate_cols if c in best_on_slate_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| games_summary_df = props_vm.get("games_summary_df", pd.DataFrame()) | |
| with vm_tab_grouped: | |
| st.write("By-game summary input") | |
| if games_summary_df.empty: | |
| st.info("No grouped game summaries are available.") | |
| else: | |
| st.dataframe(games_summary_df, use_container_width=True, hide_index=True) | |
| st.write("By-game grouped payload") | |
| game_map = props_vm.get("game_player_props_map", {}) | |
| if not game_map: | |
| st.info("No grouped game payload is available.") | |
| else: | |
| summary_rows: list[dict[str, Any]] = [] | |
| for game_key, payload in game_map.items(): | |
| for player_entry in payload.get("players") or []: | |
| summary_rows.append( | |
| { | |
| "game_key": game_key, | |
| "event_id": payload.get("event_id"), | |
| "matchup": f"{payload.get('away_team', '?')} @ {payload.get('home_team', '?')}", | |
| "player_name": player_entry.get("player_name_raw") or player_entry.get("player_name"), | |
| "best_display_label": player_entry.get("best_display_label"), | |
| "best_book": player_entry.get("best_book"), | |
| "best_odds_american": player_entry.get("best_odds_american"), | |
| "best_model_hr_prob": player_entry.get("best_model_hr_prob"), | |
| "best_bet_ev": player_entry.get("best_bet_ev"), | |
| "best_confidence_score": player_entry.get("best_confidence_score"), | |
| "best_verdict": player_entry.get("best_verdict"), | |
| "model_voice": player_entry.get("model_voice"), | |
| "model_voice_primary_reason": player_entry.get("model_voice_primary_reason"), | |
| "model_voice_caveat": player_entry.get("model_voice_caveat"), | |
| "model_voice_for": player_entry.get("model_voice_for"), | |
| "model_voice_against": player_entry.get("model_voice_against"), | |
| "best_edge": player_entry.get("best_edge"), | |
| "has_modeled_row": player_entry.get("has_modeled_row"), | |
| "has_alt_ladders": player_entry.get("has_alt_ladders"), | |
| } | |
| ) | |
| st.dataframe(pd.DataFrame(summary_rows), use_container_width=True, hide_index=True) | |
| with vm_tab_details: | |
| st.write("Player ladder details") | |
| player_detail_map = props_vm.get("player_prop_detail_map", {}) | |
| if not player_detail_map: | |
| st.info("No player detail payload is available.") | |
| else: | |
| detail_rows: list[dict[str, Any]] = [] | |
| for player_key, payload in player_detail_map.items(): | |
| detail_rows.append( | |
| { | |
| "player_key": player_key, | |
| "event_id": payload.get("event_id"), | |
| "player_name": payload.get("player_name_raw") or payload.get("player_name"), | |
| "has_modeled_row": payload.get("has_modeled_row"), | |
| "has_alt_ladders": payload.get("has_alt_ladders"), | |
| "best_book": payload.get("best_book"), | |
| "best_odds_american": payload.get("best_odds_american"), | |
| "best_bet_ev": payload.get("best_bet_ev"), | |
| "best_edge": payload.get("best_edge"), | |
| "primary_rows": len(payload.get("primary_rows") or []), | |
| "alt_rows": len(payload.get("alt_rows") or []), | |
| } | |
| ) | |
| st.dataframe(pd.DataFrame(detail_rows), use_container_width=True, hide_index=True) | |
| with vm_tab_layers: | |
| st.write("Props Matchup Layer Diagnostics") | |
| diag_cols = [ | |
| "player_name_raw", | |
| "sportsbook", | |
| "display_label", | |
| "baseline_mode", | |
| "prior_sample_size", | |
| "season_2026_sample_size", | |
| "prior_weight", | |
| "season_2026_weight", | |
| "baseline_driver", | |
| "rolling_overlay_active", | |
| "pitcher_baseline_mode", | |
| "pitcher_prior_sample_size", | |
| "pitcher_season_2026_sample_size", | |
| "pitcher_prior_weight", | |
| "pitcher_season_2026_weight", | |
| "pitcher_baseline_driver", | |
| "pitcher_rolling_overlay_active", | |
| "batter_team", | |
| "batter_team_source", | |
| "projected_home_pitcher", | |
| "projected_away_pitcher", | |
| "projected_starter_available", | |
| "projected_home_pitcher_source", | |
| "projected_away_pitcher_source", | |
| "starter_cache_source", | |
| "fallback_used", | |
| "projected_starter_match_status", | |
| "resolved_pitcher_name", | |
| "resolved_pitcher_source", | |
| "pitcher_resolution_status", | |
| "telemetry_path_status", | |
| "hr_model_tier", | |
| "shared_matchup_available", | |
| "modeled_row_available", | |
| "modeled_row_missing_reason", | |
| "pitcher_hand", | |
| "pitcher_hand_source", | |
| "applied_layers", | |
| "skipped_layers", | |
| "pitcher_hr_adjustment", | |
| "trend_hr_adjustment", | |
| "zone_hr_adjustment", | |
| "family_zone_hr_adjustment", | |
| "arsenal_hr_adjustment", | |
| "zone_status", | |
| "family_zone_status", | |
| "arsenal_status", | |
| "zone_store_sample_size", | |
| "family_zone_batter_sample_size", | |
| "family_zone_pitcher_sample_size", | |
| "arsenal_batter_sample_size", | |
| "arsenal_pitcher_sample_size", | |
| "reason_candidate_count", | |
| "model_voice_tags", | |
| ] | |
| st.dataframe( | |
| exec_df[[c for c in diag_cols if c in exec_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| with st.expander("Strikeout Confidence Diagnostics", expanded=False): | |
| strikeout_df = exec_df[ | |
| exec_df.get("market_family", pd.Series(index=exec_df.index, dtype="object")) | |
| .astype(str) | |
| .str.lower() | |
| .eq("k") | |
| ].copy() | |
| if strikeout_df.empty: | |
| st.info("No strikeout props are currently available.") | |
| else: | |
| summary_cols = [ | |
| "player_name_raw", | |
| "sportsbook", | |
| "display_label", | |
| "selection_side", | |
| "fair_prob", | |
| "confidence_score", | |
| "confidence_score_raw", | |
| "confidence_score_display", | |
| "confidence_source", | |
| "confidence_bucket", | |
| "confidence_bucket_raw", | |
| "confidence_bucket_display", | |
| "confidence_summary_label", | |
| "confidence_reasons", | |
| "projected_pitch_count", | |
| "pitches_per_bf", | |
| "projected_batters_faced", | |
| "projected_innings", | |
| "expected_strikeouts", | |
| "opportunity_confidence", | |
| "opportunity_reasons", | |
| "telemetry_path_status", | |
| "model_tier", | |
| "projected_starter_match_status", | |
| "resolved_pitcher_name", | |
| ] | |
| st.write("Card-facing strikeout confidence rows") | |
| st.dataframe( | |
| strikeout_df[[c for c in summary_cols if c in strikeout_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| component_rows: list[dict[str, Any]] = [] | |
| for _, row in strikeout_df.iterrows(): | |
| player_name = row.get("player_name_raw") or row.get("player_name") | |
| display_label = row.get("display_label") | |
| for item in row.get("confidence_component_bonuses") or []: | |
| component_rows.append( | |
| { | |
| "player_name": player_name, | |
| "display_label": display_label, | |
| "component_type": "bonus", | |
| "label": item.get("label"), | |
| "value": item.get("value"), | |
| "source": row.get("confidence_source"), | |
| } | |
| ) | |
| for item in row.get("confidence_component_penalties") or []: | |
| component_rows.append( | |
| { | |
| "player_name": player_name, | |
| "display_label": display_label, | |
| "component_type": "penalty", | |
| "label": item.get("label"), | |
| "value": item.get("value"), | |
| "source": row.get("confidence_source"), | |
| } | |
| ) | |
| if component_rows: | |
| st.write("Confidence component math") | |
| st.dataframe(pd.DataFrame(component_rows), use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No confidence component rows are present yet.") | |
| with st.expander("Shared Baseline Diagnostics", expanded=False): | |
| baseline_summary_frames: list[pd.DataFrame] = [] | |
| batter_meta = (baseline_bundle or {}).get("batter_baseline_meta", pd.DataFrame()) | |
| pitcher_meta = (baseline_bundle or {}).get("pitcher_baseline_meta", pd.DataFrame()) | |
| snapshot_status = (baseline_bundle or {}).get("snapshot_status", pd.DataFrame()) | |
| hitter_rolling_snapshot = (baseline_bundle or {}).get("hitter_rolling_snapshot", pd.DataFrame()) | |
| pitcher_rolling_snapshot = (baseline_bundle or {}).get("pitcher_rolling_snapshot", pd.DataFrame()) | |
| source_status = str((baseline_bundle or {}).get("snapshot_source_status") or "unknown") | |
| runtime_fallback_used = bool((baseline_bundle or {}).get("runtime_fallback_used")) | |
| c1, c2 = st.columns(2) | |
| c1.metric("Baseline Source", source_status.replace("_", " ").title()) | |
| c2.metric("Runtime Fallback Used", "Yes" if runtime_fallback_used else "No") | |
| if isinstance(snapshot_status, pd.DataFrame) and not snapshot_status.empty: | |
| st.write("Snapshot Freshness") | |
| st.dataframe(snapshot_status, use_container_width=True, hide_index=True) | |
| if isinstance(batter_meta, pd.DataFrame) and not batter_meta.empty: | |
| batter_display = batter_meta.copy() | |
| batter_display["baseline_role"] = "batter" | |
| baseline_summary_frames.append( | |
| batter_display[ | |
| [ | |
| c for c in [ | |
| "baseline_role", | |
| "player_name", | |
| "baseline_mode", | |
| "prior_sample_size", | |
| "season_2026_sample_size", | |
| "prior_weight", | |
| "season_2026_weight", | |
| "baseline_driver", | |
| "rolling_overlay_active", | |
| ] if c in batter_display.columns | |
| ] | |
| ] | |
| ) | |
| if isinstance(pitcher_meta, pd.DataFrame) and not pitcher_meta.empty: | |
| pitcher_display = pitcher_meta.copy() | |
| pitcher_display["baseline_role"] = "pitcher" | |
| baseline_summary_frames.append( | |
| pitcher_display[ | |
| [ | |
| c for c in [ | |
| "baseline_role", | |
| "player_name", | |
| "baseline_mode", | |
| "prior_sample_size", | |
| "season_2026_sample_size", | |
| "prior_weight", | |
| "season_2026_weight", | |
| "baseline_driver", | |
| "rolling_overlay_active", | |
| ] if c in pitcher_display.columns | |
| ] | |
| ] | |
| ) | |
| if baseline_summary_frames: | |
| st.dataframe( | |
| pd.concat(baseline_summary_frames, ignore_index=True), | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| else: | |
| st.info("Shared baseline metadata is not loaded.") | |
| rolling_summary_frames: list[pd.DataFrame] = [] | |
| if isinstance(hitter_rolling_snapshot, pd.DataFrame) and not hitter_rolling_snapshot.empty: | |
| hitter_roll = hitter_rolling_snapshot.copy() | |
| hitter_roll["baseline_role"] = "batter" | |
| rolling_summary_frames.append( | |
| hitter_roll[ | |
| [ | |
| c for c in [ | |
| "baseline_role", | |
| "player_name", | |
| "batter_games_in_window_5g", | |
| "batter_games_in_window_10g", | |
| "batter_recent_form_available", | |
| "snapshot_built_at", | |
| "source_status", | |
| ] if c in hitter_roll.columns | |
| ] | |
| ] | |
| ) | |
| if isinstance(pitcher_rolling_snapshot, pd.DataFrame) and not pitcher_rolling_snapshot.empty: | |
| pitcher_roll = pitcher_rolling_snapshot.copy() | |
| pitcher_roll["baseline_role"] = "pitcher" | |
| rolling_summary_frames.append( | |
| pitcher_roll[ | |
| [ | |
| c for c in [ | |
| "baseline_role", | |
| "player_name", | |
| "pitcher_games_in_window_5g", | |
| "pitcher_games_in_window_10g", | |
| "pitcher_recent_form_available", | |
| "pitcher_rolling_confidence", | |
| "snapshot_built_at", | |
| "source_status", | |
| ] if c in pitcher_roll.columns | |
| ] | |
| ] | |
| ) | |
| if rolling_summary_frames: | |
| st.write("Rolling Snapshot Diagnostics") | |
| st.dataframe( | |
| pd.concat(rolling_summary_frames, ignore_index=True), | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| with st.expander("Props Baseline Diagnostics", expanded=False): | |
| baseline_debug_rows = [] | |
| starter_debug = st.session_state.get("props_starter_debug") or {} | |
| for market_key, payload in props_market_debug_bundle.items(): | |
| baseline_debug = (payload or {}).get("baseline_debug") or {} | |
| if baseline_debug: | |
| baseline_debug_rows.append( | |
| { | |
| "market_type": market_key, | |
| "baseline_source": baseline_debug.get("baseline_source"), | |
| "coverage_mode": baseline_debug.get("snapshot_coverage_mode"), | |
| "runtime_fallback_used": baseline_debug.get("runtime_fallback_used"), | |
| "request_patch_used": baseline_debug.get("request_patch_used"), | |
| "background_refresh_queued": baseline_debug.get("background_refresh_queued"), | |
| "requested_hitter_count": baseline_debug.get("requested_hitter_count"), | |
| "resolved_hitter_count": baseline_debug.get("resolved_hitter_count"), | |
| "requested_pitcher_count": baseline_debug.get("requested_pitcher_count"), | |
| "resolved_pitcher_count": baseline_debug.get("resolved_pitcher_count"), | |
| "slate_team_scope": ", ".join(baseline_debug.get("slate_team_scope") or []), | |
| "missing_hitter_names": ", ".join(baseline_debug.get("missing_hitter_names") or []), | |
| "missing_pitcher_names": ", ".join(baseline_debug.get("missing_pitcher_names") or []), | |
| } | |
| ) | |
| if baseline_debug_rows: | |
| baseline_debug_df = pd.DataFrame(baseline_debug_rows) | |
| c1, c2, c3, c4 = st.columns(4) | |
| c1.metric("Markets Captured", len(baseline_debug_rows)) | |
| c2.metric("Any Runtime Fallback", "Yes" if baseline_debug_df["runtime_fallback_used"].fillna(False).astype(bool).any() else "No") | |
| c3.metric("Any Request Patch", "Yes" if baseline_debug_df["request_patch_used"].fillna(False).astype(bool).any() else "No") | |
| c4.metric("Any Refresh Queued", "Yes" if baseline_debug_df["background_refresh_queued"].fillna(False).astype(bool).any() else "No") | |
| st.dataframe(baseline_debug_df, use_container_width=True, hide_index=True) | |
| if starter_debug: | |
| st.write("Starter / Lineup Cache Diagnostics") | |
| st.dataframe( | |
| pd.DataFrame( | |
| [ | |
| { | |
| "starter_cache_source": starter_debug.get("starter_cache_source"), | |
| "starter_cache_age_seconds": starter_debug.get("starter_cache_age_seconds"), | |
| "starter_refresh_mode": starter_debug.get("starter_refresh_mode"), | |
| "oddsapi_fallback_used_matchup_count": starter_debug.get("oddsapi_fallback_used_matchup_count"), | |
| } | |
| ] | |
| ), | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| else: | |
| st.info("Open the Props page in this session to capture Props baseline diagnostics.") | |
| with st.expander("Pitcher Resolution", expanded=False): | |
| props_prepared_bundle = st.session_state.get("props_prepared_bundle") or {} | |
| starter_bundle = props_prepared_bundle.get("starter_bundle") or {} | |
| merged_starters = starter_bundle.get("merged_starters") or {} | |
| # Always rebuild from the live props feed when available so odds API | |
| # pitchers are shown even if session state was populated by a prior | |
| # Props page visit (which only contains Stats API starters). | |
| if upcoming_props_debug is not None: | |
| _props_feed = upcoming_props_debug.get("merged_props_feed", pd.DataFrame()) | |
| if isinstance(_props_feed, pd.DataFrame) and not _props_feed.empty: | |
| try: | |
| _primary = read_cached_probable_starters(conn) | |
| except Exception: | |
| _primary = {} | |
| _fallback = build_oddsapi_starter_fallback_map( | |
| props_feed=_props_feed, | |
| primary_starters=_primary, | |
| pitcher_statcast_df=pitcher_statcast_df, | |
| ) | |
| merged_starters = merge_probable_starters_with_odds_fallback(_primary, _fallback) | |
| if merged_starters: | |
| resolution_rows = [] | |
| for (away_norm, home_norm), payload in merged_starters.items(): | |
| resolution_rows.append({ | |
| "matchup": f"{payload.get('away_team_raw') or away_norm} @ {payload.get('home_team_raw') or home_norm}", | |
| "away_pitcher": payload.get("away_pitcher") or "—", | |
| "away_source": payload.get("away_pitcher_source") or "unresolved", | |
| "home_pitcher": payload.get("home_pitcher") or "—", | |
| "home_source": payload.get("home_pitcher_source") or "unresolved", | |
| "cache_source": payload.get("starter_cache_source") or "unresolved", | |
| "fallback_used": bool(payload.get("fallback_used")), | |
| }) | |
| st.dataframe(pd.DataFrame(resolution_rows), use_container_width=True, hide_index=True) | |
| else: | |
| st.info("Props data is not yet available for pitcher resolution.") | |
| with st.expander("Props HR Health Diagnostics", expanded=False): | |
| props_hr_health_debug = ((props_market_debug_bundle.get("hr") or {}).get("hr_health_debug")) or {} | |
| if props_hr_health_debug: | |
| c1, c2, c3, c4, c5 = st.columns(5) | |
| c1.metric("Modeled 1+ HR Rows", int(props_hr_health_debug.get("modeled_hr_rows_total") or 0)) | |
| c2.metric("With HR%", int(props_hr_health_debug.get("modeled_hr_rows_with_probability") or 0)) | |
| c3.metric("With Edge", int(props_hr_health_debug.get("modeled_hr_rows_with_edge") or 0)) | |
| c4.metric("Missing HR%", int(props_hr_health_debug.get("modeled_hr_rows_missing_probability") or 0)) | |
| c5.metric("2+ HR Ladders", int(props_hr_health_debug.get("research_hr_ladder_rows_total") or 0)) | |
| context_df = pd.DataFrame( | |
| [ | |
| { | |
| "requested_hitter_count": props_hr_health_debug.get("requested_hitter_count"), | |
| "resolved_hitter_count": props_hr_health_debug.get("resolved_hitter_count"), | |
| "requested_pitcher_count": props_hr_health_debug.get("requested_pitcher_count"), | |
| "resolved_pitcher_count": props_hr_health_debug.get("resolved_pitcher_count"), | |
| } | |
| ] | |
| ) | |
| st.dataframe(context_df, use_container_width=True, hide_index=True) | |
| health_rows = pd.DataFrame(props_hr_health_debug.get("health_rows") or []) | |
| if not health_rows.empty: | |
| st.dataframe(health_rows, use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No modeled 1+ HR health rows captured in this session.") | |
| else: | |
| st.info("Open the Props page in this session to capture HR health diagnostics.") | |
| with st.expander("Shared Matchup Component Diagnostics", expanded=False): | |
| shared_component_rows = [] | |
| executed_rows = [] | |
| gating_rows = [] | |
| failure_summary_rows = [] | |
| for market_key, payload in props_market_debug_bundle.items(): | |
| shared_component_debug = (payload or {}).get("shared_component_debug") or {} | |
| for row in shared_component_debug.get("rows") or []: | |
| shared_component_rows.append({"market_type": market_key, **row}) | |
| for row in shared_component_debug.get("executed_rows") or []: | |
| executed_rows.append({"market_type": market_key, **row}) | |
| for row in shared_component_debug.get("gating_rows") or []: | |
| gating_rows.append({"market_type": market_key, **row}) | |
| for row in shared_component_debug.get("failure_summary") or []: | |
| failure_summary_rows.append({"market_type": market_key, **row}) | |
| if shared_component_rows: | |
| summary_df = pd.DataFrame(failure_summary_rows) | |
| if not summary_df.empty: | |
| st.write("Failure Summary") | |
| st.dataframe(summary_df, use_container_width=True, hide_index=True) | |
| if gating_rows: | |
| st.write("Upstream Gating Failures") | |
| st.dataframe(pd.DataFrame(gating_rows), use_container_width=True, hide_index=True) | |
| if executed_rows: | |
| st.write("Executed Matchup Components") | |
| st.dataframe(pd.DataFrame(executed_rows), use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No shared-component execution rows captured in this session.") | |
| else: | |
| st.info("Open the Props page in this session to capture shared matchup diagnostics.") | |
| with st.expander("Model Grading Rubric", expanded=False): | |
| props_hr_health_debug = ((props_market_debug_bundle.get("hr") or {}).get("hr_health_debug")) or {} | |
| combined_shared_component_debug = { | |
| "rows": [ | |
| row | |
| for payload in props_market_debug_bundle.values() | |
| for row in ((payload or {}).get("shared_component_debug") or {}).get("executed_rows", []) | |
| ] | |
| } | |
| rubric_df, rubric_summary = _build_model_upgrade_rubric( | |
| props_hr_health_debug=props_hr_health_debug, | |
| shared_component_debug=combined_shared_component_debug, | |
| ) | |
| c1, c2, c3 = st.columns(3) | |
| c1.metric( | |
| "Old Architecture Grade", | |
| f"{int(rubric_summary.get('old_architecture_score') or 0)}/{int(rubric_summary.get('max_score') or 100)}", | |
| ) | |
| c2.metric( | |
| "Current Architecture Grade", | |
| f"{int(rubric_summary.get('current_architecture_score') or 0)}/{int(rubric_summary.get('max_score') or 100)}", | |
| ) | |
| c3.metric( | |
| "Modeled 1+ HR Rows", | |
| int(rubric_summary.get("modeled_hr_rows_total") or 0), | |
| ) | |
| st.caption( | |
| "This is an architecture and model-readiness rubric, not a live ROI or hit-rate grade. " | |
| "Replace or augment it with rolling backtest metrics as the evaluation layer is built." | |
| ) | |
| st.dataframe(rubric_df, use_container_width=True, hide_index=True) | |
| with st.expander("Debug Event Row Read Status", expanded=False): | |
| read_status = debug_event_row_status or {} | |
| if read_status: | |
| status_rows = pd.DataFrame( | |
| [ | |
| { | |
| "section": key, | |
| "table_name": value.get("table_name"), | |
| "read_source": value.get("read_source"), | |
| "read_attempts": value.get("read_attempts"), | |
| "retry_used": value.get("retry_used"), | |
| "snapshot_built_at": value.get("snapshot_built_at"), | |
| "source_status": value.get("source_status"), | |
| "read_error": value.get("read_error"), | |
| } | |
| for key, value in read_status.items() | |
| ] | |
| ) | |
| st.dataframe(status_rows, use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No debug event-row read status captured in this session.") | |
| with st.expander("Cached Source Freshness", expanded=False): | |
| freshness_rows: list[dict[str, Any]] = [] | |
| try: | |
| schedule_cached = debug_source_bundle.get("schedule_cached", pd.DataFrame()) | |
| freshness_rows.append( | |
| { | |
| "source": "cached_schedule", | |
| "row_count": int(len(schedule_cached)), | |
| "latest_fetched_at": ( | |
| pd.to_datetime(schedule_cached["fetched_at"], errors="coerce").max() | |
| if not schedule_cached.empty and "fetched_at" in schedule_cached.columns | |
| else None | |
| ), | |
| } | |
| ) | |
| except Exception: | |
| pass | |
| try: | |
| odds_cached = debug_source_bundle.get("odds_cached", pd.DataFrame()) | |
| freshness_rows.append( | |
| { | |
| "source": "cached_odds", | |
| "row_count": int(len(odds_cached)), | |
| "latest_fetched_at": ( | |
| pd.to_datetime(odds_cached["fetched_at"], errors="coerce").max() | |
| if not odds_cached.empty and "fetched_at" in odds_cached.columns | |
| else None | |
| ), | |
| } | |
| ) | |
| except Exception: | |
| pass | |
| try: | |
| starters_meta = debug_source_bundle.get("starters_meta", pd.DataFrame()) | |
| freshness_rows.append( | |
| { | |
| "source": "cached_probable_starters", | |
| "row_count": int(starters_meta.iloc[0]["matchup_count"]) if not starters_meta.empty else 0, | |
| "latest_fetched_at": starters_meta.iloc[0]["fetched_at"] if not starters_meta.empty else None, | |
| "refresh_mode": st.session_state.get("probable_starters_refresh_mode"), | |
| "cache_age_seconds": st.session_state.get("probable_starters_cache_age_seconds"), | |
| } | |
| ) | |
| except Exception: | |
| pass | |
| try: | |
| props_cache = debug_source_bundle.get("props_cache", {}) | |
| props_meta = props_cache.get("cache_meta", pd.DataFrame()) | |
| freshness_rows.append( | |
| { | |
| "source": "cached_upcoming_props_bundle", | |
| "row_count": int(props_meta.iloc[0]["merged_row_count"]) if not props_meta.empty else 0, | |
| "latest_fetched_at": props_meta.iloc[0]["fetched_at"] if not props_meta.empty else None, | |
| } | |
| ) | |
| except Exception: | |
| pass | |
| if freshness_rows: | |
| st.dataframe(pd.DataFrame(freshness_rows), use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No cached source freshness rows available.") | |
| st.subheader("Upcoming Props Feed Diagnostics") | |
| props_debug = upcoming_props_debug or {} | |
| coverage_summary_df = props_debug.get("coverage_summary", pd.DataFrame()) | |
| coverage_summary_api_df = props_debug.get("coverage_summary_api", pd.DataFrame()) | |
| coverage_summary_scraper_added_df = props_debug.get("coverage_summary_scraper_added", pd.DataFrame()) | |
| coverage_summary_final_df = props_debug.get("coverage_summary_final", pd.DataFrame()) | |
| coverage_summary_hr_api_df = props_debug.get("coverage_summary_hr_api", pd.DataFrame()) | |
| coverage_summary_hr_supplemental_df = props_debug.get("coverage_summary_hr_supplemental", pd.DataFrame()) | |
| coverage_summary_hr_final_df = props_debug.get("coverage_summary_hr_final", pd.DataFrame()) | |
| missing_books_by_market_df = props_debug.get("missing_books_by_market", pd.DataFrame()) | |
| missing_event_books_by_market_df = props_debug.get("missing_event_books_by_market", pd.DataFrame()) | |
| missing_hr_books_global_df = props_debug.get("missing_hr_books_global", pd.DataFrame()) | |
| missing_hr_books_by_event_df = props_debug.get("missing_hr_books_by_event", pd.DataFrame()) | |
| odds_api_raw_df = props_debug.get("odds_api_raw", pd.DataFrame()) | |
| scraper_raw_df = props_debug.get("scraper_raw", pd.DataFrame()) | |
| merged_props_df = props_debug.get("merged_props_feed", pd.DataFrame()) | |
| props_cache_meta = props_debug.get("cache_meta", pd.DataFrame()) | |
| props_cache_source = str(props_debug.get("cache_source") or "unknown") | |
| hr_snapshot_completeness = dict(props_debug.get("hr_snapshot_completeness") or {}) | |
| hr_snapshot_state = str(props_debug.get("hr_snapshot_state") or "") | |
| current_hr_row_count = int(props_debug.get("current_hr_row_count") or 0) | |
| current_hr_event_count = int(props_debug.get("current_hr_event_count") or 0) | |
| last_known_good_hr_row_count = int(props_debug.get("last_known_good_hr_row_count") or 0) | |
| last_known_good_hr_built_at = str(props_debug.get("last_known_good_hr_built_at") or "") | |
| hr_refresh_overwrite_prevented = bool(props_debug.get("hr_refresh_overwrite_prevented")) | |
| adapter_status_by_book = dict(props_debug.get("adapter_status_by_book") or {}) | |
| adapter_error_by_book = dict(props_debug.get("adapter_error_by_book") or {}) | |
| adapter_rows_by_book = dict(props_debug.get("adapter_rows_by_book") or {}) | |
| adapter_last_attempted_at_by_book = dict(props_debug.get("adapter_last_attempted_at_by_book") or {}) | |
| adapter_retry_after_by_book = dict(props_debug.get("adapter_retry_after_by_book") or {}) | |
| scraper_candidate_count = int(props_debug.get("scraper_candidate_count") or 0) | |
| scraper_added_count = int(props_debug.get("scraper_added_count") or 0) | |
| scraper_duplicate_reject_count = int(props_debug.get("scraper_duplicate_reject_count") or 0) | |
| c1, c2 = st.columns(2) | |
| c1.metric("Props Cache Source", props_cache_source.replace("_", " ").title()) | |
| c2.metric( | |
| "Props Cached Rows", | |
| int(props_cache_meta.iloc[0]["merged_row_count"]) if isinstance(props_cache_meta, pd.DataFrame) and not props_cache_meta.empty and "merged_row_count" in props_cache_meta.columns else int(len(merged_props_df)), | |
| ) | |
| if isinstance(props_cache_meta, pd.DataFrame) and not props_cache_meta.empty: | |
| st.write("Props Bundle Cache Meta") | |
| st.dataframe(props_cache_meta, use_container_width=True, hide_index=True) | |
| coverage_metric_cols = st.columns(3) | |
| coverage_metric_cols[0].metric("Scraper Candidates", scraper_candidate_count) | |
| coverage_metric_cols[1].metric("Scraper Added", scraper_added_count) | |
| coverage_metric_cols[2].metric("Scraper Duplicate Rejects", scraper_duplicate_reject_count) | |
| if hr_snapshot_completeness: | |
| hr_metric_cols = st.columns(4) | |
| hr_metric_cols[0].metric("HR Books Requested", int(hr_snapshot_completeness.get("requested_count") or 0)) | |
| hr_metric_cols[1].metric("HR Books Present", int(hr_snapshot_completeness.get("present_count") or 0)) | |
| hr_metric_cols[2].metric("HR Books Missing", int(hr_snapshot_completeness.get("missing_count") or 0)) | |
| hr_metric_cols[3].metric("HR Snapshot Complete", "Yes" if hr_snapshot_completeness.get("is_complete") else "No") | |
| hr_state_cols = st.columns(5) | |
| hr_state_cols[0].metric("HR Snapshot State", hr_snapshot_state or "unknown") | |
| hr_state_cols[1].metric("Current HR Rows", current_hr_row_count) | |
| hr_state_cols[2].metric("Current HR Events", current_hr_event_count) | |
| hr_state_cols[3].metric("Last Known Good HR Rows", last_known_good_hr_row_count) | |
| hr_state_cols[4].metric("Overwrite Prevented", "Yes" if hr_refresh_overwrite_prevented else "No") | |
| if last_known_good_hr_built_at: | |
| st.caption(f"Last known good HR snapshot built at: {last_known_good_hr_built_at}") | |
| if not coverage_summary_df.empty: | |
| st.write("Coverage Summary") | |
| st.dataframe(coverage_summary_df, use_container_width=True, hide_index=True) | |
| else: | |
| st.caption("Coverage summary is empty.") | |
| if not coverage_summary_api_df.empty: | |
| st.write("API Rows by Market and Sportsbook") | |
| st.dataframe(coverage_summary_api_df, use_container_width=True, hide_index=True) | |
| if not coverage_summary_scraper_added_df.empty: | |
| st.write("Scraper-Added Rows by Market and Sportsbook") | |
| st.dataframe(coverage_summary_scraper_added_df, use_container_width=True, hide_index=True) | |
| if not coverage_summary_final_df.empty: | |
| st.write("Final Merged Rows by Market and Sportsbook") | |
| st.dataframe(coverage_summary_final_df, use_container_width=True, hide_index=True) | |
| if not coverage_summary_hr_api_df.empty: | |
| st.write("HR API Rows by Sportsbook") | |
| st.dataframe(coverage_summary_hr_api_df, use_container_width=True, hide_index=True) | |
| if not coverage_summary_hr_supplemental_df.empty: | |
| st.write("HR Supplemental Rows by Sportsbook") | |
| st.dataframe(coverage_summary_hr_supplemental_df, use_container_width=True, hide_index=True) | |
| if not coverage_summary_hr_final_df.empty: | |
| st.write("HR Final Rows by Sportsbook") | |
| st.dataframe(coverage_summary_hr_final_df, use_container_width=True, hide_index=True) | |
| adapter_rows = [] | |
| adapter_books = sorted( | |
| set(adapter_status_by_book) | set(adapter_error_by_book) | set(adapter_rows_by_book) | |
| ) | |
| for book_key in adapter_books: | |
| adapter_rows.append( | |
| { | |
| "sportsbook_key": book_key, | |
| "adapter_status": adapter_status_by_book.get(book_key, ""), | |
| "adapter_error": adapter_error_by_book.get(book_key, ""), | |
| "adapter_rows_returned": int(adapter_rows_by_book.get(book_key) or 0), | |
| "last_attempted_at": adapter_last_attempted_at_by_book.get(book_key, ""), | |
| "retry_after": adapter_retry_after_by_book.get(book_key, ""), | |
| } | |
| ) | |
| if adapter_rows: | |
| st.write("Supplemental Adapter Status by Sportsbook") | |
| st.dataframe(pd.DataFrame(adapter_rows), use_container_width=True, hide_index=True) | |
| if not missing_books_by_market_df.empty: | |
| st.write("Missing Books by Market After Reconciliation") | |
| st.dataframe(missing_books_by_market_df, use_container_width=True, hide_index=True) | |
| if not missing_event_books_by_market_df.empty: | |
| with st.expander("Missing Event Books by Market", expanded=False): | |
| st.dataframe(missing_event_books_by_market_df, use_container_width=True, hide_index=True) | |
| if not missing_hr_books_global_df.empty: | |
| st.write("Missing HR Books Global") | |
| st.dataframe(missing_hr_books_global_df, use_container_width=True, hide_index=True) | |
| if not missing_hr_books_by_event_df.empty: | |
| with st.expander("Missing HR Books by Event", expanded=False): | |
| st.dataframe(missing_hr_books_by_event_df, use_container_width=True, hide_index=True) | |
| if not merged_props_df.empty: | |
| market_series = merged_props_df.get("market", pd.Series([""] * len(merged_props_df), index=merged_props_df.index)).astype(str).str.strip().str.lower() | |
| sportsbook_series = merged_props_df.get("sportsbook", pd.Series([""] * len(merged_props_df), index=merged_props_df.index)).astype(str).str.strip() | |
| available_books_by_market = ( | |
| merged_props_df.assign( | |
| _market_family=market_series, | |
| _sportsbook=sportsbook_series, | |
| ) | |
| .groupby("_market_family", dropna=False)["_sportsbook"] | |
| .agg(lambda s: ", ".join(sorted({value for value in s.tolist() if str(value).strip()}))) | |
| .reset_index() | |
| .rename(columns={"_market_family": "market_family", "_sportsbook": "available_books"}) | |
| ) | |
| available_books_by_market["book_count"] = available_books_by_market["available_books"].apply( | |
| lambda text: len([part for part in str(text).split(", ") if part]) | |
| ) | |
| merged_rows_by_market_and_book = ( | |
| merged_props_df.assign(_market_family=market_series, _sportsbook=sportsbook_series) | |
| .groupby(["_market_family", "_sportsbook"], dropna=False) | |
| .agg( | |
| rows=("event_id", "size"), | |
| unique_events=("event_id", pd.Series.nunique), | |
| unique_players=("player_name", pd.Series.nunique), | |
| ) | |
| .reset_index() | |
| .rename(columns={"_market_family": "market_family", "_sportsbook": "sportsbook"}) | |
| .sort_values(["market_family", "rows"], ascending=[True, False], na_position="last") | |
| ) | |
| event_market_cols = [ | |
| c for c in ["event_id", "away_team", "home_team", "market"] if c in merged_props_df.columns | |
| ] | |
| if event_market_cols: | |
| available_books_by_event_market = ( | |
| merged_props_df.assign(_sportsbook=sportsbook_series) | |
| .groupby(event_market_cols, dropna=False)["_sportsbook"] | |
| .agg(lambda s: ", ".join(sorted({value for value in s.tolist() if str(value).strip()}))) | |
| .reset_index() | |
| .rename(columns={"_sportsbook": "available_books"}) | |
| ) | |
| available_books_by_event_market["book_count"] = available_books_by_event_market["available_books"].apply( | |
| lambda text: len([part for part in str(text).split(", ") if part]) | |
| ) | |
| else: | |
| available_books_by_event_market = pd.DataFrame() | |
| candidate_key_parts = [] | |
| for col in ("event_id", "player_name", "market", "line", "selection_side"): | |
| if col in merged_props_df.columns: | |
| candidate_key_parts.append(merged_props_df[col].astype(str).fillna("")) | |
| if candidate_key_parts: | |
| candidate_key = candidate_key_parts[0] | |
| for part in candidate_key_parts[1:]: | |
| candidate_key = candidate_key + "|" + part | |
| best_line_candidate_counts = ( | |
| pd.DataFrame( | |
| { | |
| "market_family": market_series, | |
| "candidate_key": candidate_key, | |
| } | |
| ) | |
| .drop_duplicates() | |
| .groupby("market_family", dropna=False) | |
| .agg(best_line_candidate_count=("candidate_key", "size")) | |
| .reset_index() | |
| .sort_values("best_line_candidate_count", ascending=False, na_position="last") | |
| ) | |
| else: | |
| best_line_candidate_counts = pd.DataFrame() | |
| hr_book_coverage = merged_rows_by_market_and_book[ | |
| merged_rows_by_market_and_book["market_family"].astype(str).str.lower() == "hr" | |
| ].copy() | |
| st.write("Available Books by Market") | |
| st.dataframe(available_books_by_market, use_container_width=True, hide_index=True) | |
| st.write("Merged Rows by Market and Sportsbook") | |
| st.dataframe(merged_rows_by_market_and_book, use_container_width=True, hide_index=True) | |
| st.write("Best-Line Candidate Counts by Market") | |
| if best_line_candidate_counts.empty: | |
| st.info("No best-line candidate counts are available.") | |
| else: | |
| st.dataframe(best_line_candidate_counts, use_container_width=True, hide_index=True) | |
| st.write("HR Book Coverage on Active Slate") | |
| if hr_book_coverage.empty: | |
| st.info("No HR rows are currently available in the merged cached feed.") | |
| else: | |
| st.dataframe(hr_book_coverage, use_container_width=True, hide_index=True) | |
| with st.expander("Available Books by Event and Market", expanded=False): | |
| if available_books_by_event_market.empty: | |
| st.info("No event-by-market coverage rows are available.") | |
| else: | |
| st.dataframe(available_books_by_event_market, use_container_width=True, hide_index=True) | |
| raw_cols = [ | |
| "provider", | |
| "row_source_type", | |
| "coverage_completion_status", | |
| "sportsbook", | |
| "sportsbook_key", | |
| "event_id", | |
| "commence_time", | |
| "away_team", | |
| "home_team", | |
| "market", | |
| "player_name_raw", | |
| "player_name", | |
| "odds_american", | |
| "line", | |
| ] | |
| with st.expander("Odds API raw", expanded=False): | |
| if odds_api_raw_df.empty: | |
| st.info("No Odds API upcoming props rows available.") | |
| else: | |
| st.dataframe( | |
| odds_api_raw_df[[c for c in raw_cols if c in odds_api_raw_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| with st.expander("Scraper raw", expanded=False): | |
| if scraper_raw_df.empty: | |
| st.info("No scraper fallback rows were needed or returned.") | |
| else: | |
| st.dataframe( | |
| scraper_raw_df[[c for c in raw_cols if c in scraper_raw_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| with st.expander("Merged Props Feed", expanded=False): | |
| if merged_props_df.empty: | |
| st.info("No merged upcoming props feed is available.") | |
| else: | |
| st.dataframe( | |
| merged_props_df[[c for c in raw_cols if c in merged_props_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| st.subheader("Odds API Coverage Probe") | |
| st.caption( | |
| "Runs a small diagnostic against upcoming MLB events to show whether selected " | |
| "books have player-prop coverage for HR, hits, and pitcher strikeouts." | |
| ) | |
| if st.button("Run Odds API Coverage Probe", key="dbg_run_odds_coverage_probe"): | |
| st.session_state["odds_coverage_probe_loaded"] = True | |
| if st.session_state.get("odds_coverage_probe_loaded", False): | |
| with st.spinner("Running Odds API coverage probe..."): | |
| probe_bundle = _load_upcoming_props_coverage_probe() | |
| probe_summary_df = probe_bundle.get("coverage_probe_summary", pd.DataFrame()) | |
| probe_raw_df = probe_bundle.get("coverage_probe_raw", pd.DataFrame()) | |
| if probe_summary_df.empty: | |
| st.info("Coverage probe returned no rows.") | |
| else: | |
| st.dataframe(probe_summary_df, use_container_width=True, hide_index=True) | |
| with st.expander("Coverage Probe Raw Rows", expanded=False): | |
| if probe_raw_df.empty: | |
| st.info("No raw coverage-probe rows are available.") | |
| else: | |
| raw_probe_cols = [ | |
| "event_id", | |
| "away_team", | |
| "home_team", | |
| "commence_time", | |
| "sportsbook", | |
| "sportsbook_key", | |
| "market_key", | |
| "response_status", | |
| "bookmakers_returned", | |
| "outcomes_returned", | |
| "has_data", | |
| "returned_books", | |
| "error", | |
| ] | |
| st.dataframe( | |
| probe_raw_df[[c for c in raw_probe_cols if c in probe_raw_df.columns]], | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| # ------------------------------------------------------------------ | |
| # SECTION 5d — Pitcher Resolution Log | |
| # ------------------------------------------------------------------ | |
| st.subheader("Pitcher Resolution Log") | |
| st.caption( | |
| "Every pitcher name processed by the system — from probable starters (mlb_starters), " | |
| "live feed (mlb_live), and statcast lookup (pitcher_adjustment). " | |
| "Green = matched, Yellow = loose match, Red = failed, Gray = pre-resolution (api_fetch / live_feed)." | |
| ) | |
| with st.expander("Pitcher Resolution Log", expanded=True): | |
| _pr_col1, _pr_col2 = st.columns(2) | |
| with _pr_col1: | |
| _pr_date_filter = st.text_input( | |
| "Filter by game_date (YYYY-MM-DD, leave blank for all)", | |
| value="", | |
| key="pr_date_filter", | |
| ) | |
| with _pr_col2: | |
| _pr_method_filter = st.multiselect( | |
| "Filter by match_method", | |
| options=["api_fetch", "live_feed", "id", "exact", "loose", "failed"], | |
| default=[], | |
| key="pr_method_filter", | |
| ) | |
| try: | |
| pr_df = read_pitcher_resolution_log(conn, limit=1000) | |
| except Exception as _pr_exc: | |
| pr_df = pd.DataFrame() | |
| st.warning(f"Could not load pitcher_resolution_log: {_pr_exc}") | |
| if not pr_df.empty: | |
| if _pr_date_filter.strip(): | |
| if "game_date" in pr_df.columns: | |
| pr_df = pr_df[pr_df["game_date"].astype(str).str.startswith(_pr_date_filter.strip())] | |
| if _pr_method_filter: | |
| if "match_method" in pr_df.columns: | |
| pr_df = pr_df[pr_df["match_method"].isin(_pr_method_filter)] | |
| _pr_display_cols = [c for c in [ | |
| "game_date", "source", "input_name", "normalized_name", | |
| "matched_canonical", "match_method", "sample_size", "p_throws", "pitcher_id", | |
| ] if c in pr_df.columns] | |
| def _pr_row_style(row): | |
| method = str(row.get("match_method", "")).lower() | |
| size = row.get("sample_size", 0) | |
| try: | |
| size = int(size) | |
| except Exception: | |
| size = 0 | |
| if method == "failed": | |
| color = "background-color: #ffcccc" | |
| elif method == "loose": | |
| color = "background-color: #fff3cd" | |
| elif method in ("id", "exact") and size > 0: | |
| color = "background-color: #d4edda" | |
| else: | |
| color = "background-color: #f8f9fa" | |
| return [color] * len(row) | |
| _pr_summary_cols = { | |
| "total": len(pr_df), | |
| "matched": int((pr_df.get("match_method", pd.Series(dtype=str)).isin(["id", "exact"])).sum()) if "match_method" in pr_df.columns else 0, | |
| "loose": int((pr_df.get("match_method", pd.Series(dtype=str)) == "loose").sum()) if "match_method" in pr_df.columns else 0, | |
| "failed": int((pr_df.get("match_method", pd.Series(dtype=str)) == "failed").sum()) if "match_method" in pr_df.columns else 0, | |
| } | |
| st.caption( | |
| f"Showing {_pr_summary_cols['total']} rows — " | |
| f"matched: {_pr_summary_cols['matched']} " | |
| f"loose: {_pr_summary_cols['loose']} " | |
| f"failed: {_pr_summary_cols['failed']}" | |
| ) | |
| try: | |
| styled = pr_df[_pr_display_cols].style.apply(_pr_row_style, axis=1) | |
| st.dataframe(styled, use_container_width=True, hide_index=True) | |
| except Exception: | |
| st.dataframe(pr_df[_pr_display_cols], use_container_width=True, hide_index=True) | |
| else: | |
| st.info( | |
| "No pitcher resolution log entries yet. Resolution rows populate once the props " | |
| "or live game pipeline runs with an active DB connection." | |
| ) | |
| # ------------------------------------------------------------------ | |
| # SECTION 6 — Admin Tools | |
| # ------------------------------------------------------------------ | |
| st.subheader("Admin Tools") | |
| col_a, col_b2, col_c = st.columns(3) | |
| with col_a: | |
| if grade_outcomes_fn is not None: | |
| if st.button("Grade Final Game Outcomes", key="dbg_grade_final"): | |
| grade_outcomes_fn(scores_df) | |
| st.success("Grading attempted.") | |
| else: | |
| st.caption("grade_outcomes_fn not provided.") | |
| with col_b2: | |
| if grade_props_fn is not None: | |
| if st.button("Build Batter Prop Outcomes", key="dbg_grade_props"): | |
| grade_props_fn() | |
| st.success("Prop outcome build attempted.") | |
| else: | |
| st.caption("grade_props_fn not provided.") | |
| with col_c: | |
| if fill_realized_fn is not None: | |
| if st.button("Fill Realized Outcomes (Statcast)", key="dbg_fill_realized"): | |
| fill_realized_fn(statcast_df) | |
| st.success("Realized outcome fill attempted.") | |
| else: | |
| st.caption("fill_realized_fn not provided.") | |
| st.caption(f"Current WBC date: {current_wbc_date_str()}") | |
| # ------------------------------------------------------------------ | |
| # SECTION 6b — Data Inventory | |
| # ------------------------------------------------------------------ | |
| st.subheader("Data Inventory") | |
| if "dbg_lib_loaded" not in st.session_state: | |
| st.session_state["dbg_lib_loaded"] = False | |
| if st.button("Load Data Library", key="dbg_load_inventory"): | |
| st.session_state["dbg_lib_loaded"] = True | |
| if st.session_state["dbg_lib_loaded"]: | |
| with st.spinner("Loading data library..."): | |
| with st.expander("All database tables (row counts)", expanded=True): | |
| inv_df = _query_db_inventory(conn) | |
| if inv_df.empty: | |
| st.warning("Could not read table inventory — check DB connection.") | |
| else: | |
| st.dataframe(inv_df, use_container_width=True, hide_index=True) | |
| st.caption(f"{len(inv_df)} tables · {inv_df['row_count'].sum():,.0f} estimated rows") | |
| # ------------------------------------------------------------------ | |
| # SECTION 6c — Coverage Diagnostics | |
| # ------------------------------------------------------------------ | |
| st.subheader("Coverage Diagnostics") | |
| coverage_rows = _build_coverage_diagnostics(conn) | |
| if coverage_rows: | |
| cov_df = pd.DataFrame(coverage_rows) | |
| first_cols = [c for c in ["table", "row_count", "distinct_game_pks", | |
| "latest_game_date", "latest_graded_at", | |
| "latest_source_season", "status"] if c in cov_df.columns] | |
| st.dataframe(cov_df[first_cols], use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No coverage data available.") | |
| overlap = _build_overlap_diagnostics(conn) | |
| if overlap and "error" not in overlap: | |
| st.write("**game_outcomes ↔ statcast / live_pitch_mix_2026 / live_batter_game_log_2026 overlap**") | |
| st.dataframe(pd.DataFrame([overlap]), use_container_width=True, hide_index=True) | |
| elif overlap and "error" in overlap: | |
| st.warning(f"Overlap query error: {overlap['error']}") | |
| # --- Batter prop outcomes --- | |
| with st.expander("Batter prop outcomes", expanded=False): | |
| batter_prop_outcomes_df = debug_audit_bundle.get("batter_prop_outcomes", pd.DataFrame()) | |
| st.write(f"Rows: {len(batter_prop_outcomes_df)}") | |
| if not batter_prop_outcomes_df.empty: | |
| display_cols = [c for c in [ | |
| "created_at", "graded_at", "game_pk", "slot", "batter_name", | |
| "fair_hr_odds", "book_hr_odds", "adjusted_edge", "confidence", | |
| "recommendation_tier", "realized_hit", "realized_hr", "realized_tb2p", | |
| "grade_status", "outcome_source", | |
| ] if c in batter_prop_outcomes_df.columns] | |
| st.dataframe(batter_prop_outcomes_df[display_cols].tail(30), use_container_width=True, hide_index=True) | |
| # --- Game outcomes --- | |
| with st.expander("Game outcomes", expanded=False): | |
| game_outcomes_df = debug_audit_bundle.get("game_outcomes", pd.DataFrame()) | |
| st.write(f"Rows: {len(game_outcomes_df)}") | |
| if not game_outcomes_df.empty: | |
| st.dataframe(game_outcomes_df.tail(20), use_container_width=True, hide_index=True) | |
| # --- Recommendation logs --- | |
| with st.expander("Recommendation logs", expanded=False): | |
| rec_logs_df = debug_audit_bundle.get("recommendation_logs", pd.DataFrame()) | |
| st.write(f"Rows: {len(rec_logs_df)}") | |
| if not rec_logs_df.empty: | |
| st.dataframe(rec_logs_df.tail(20), use_container_width=True, hide_index=True) | |
| # --- Recommendation audit --- | |
| with st.expander("Recommendation audit", expanded=False): | |
| audit_df = debug_audit_bundle.get("recommendation_audit", pd.DataFrame()) | |
| st.write(f"Rows: {len(audit_df)}") | |
| if not audit_df.empty: | |
| audit_display_cols = [c for c in [ | |
| "created_at", "game_pk", "away_team", "home_team", "slot", "batter_name", | |
| "fair_hr_odds", "book_hr_odds", "adjusted_edge", "confidence", | |
| "recommendation_tier", "realized_hr", "graded_at", "outcome_source", | |
| ] if c in audit_df.columns] | |
| st.dataframe(audit_df[audit_display_cols].tail(20), use_container_width=True, hide_index=True) | |
| # --- Batter prop audit --- | |
| with st.expander("Batter prop audit", expanded=False): | |
| batter_audit_df = debug_audit_bundle.get("batter_prop_audit", pd.DataFrame()) | |
| st.write(f"Rows: {len(batter_audit_df)}") | |
| if not batter_audit_df.empty: | |
| st.dataframe(batter_audit_df.tail(20), use_container_width=True, hide_index=True) | |
| else: | |
| st.caption("Please allow a few moments to load the data library..") | |
| # --- Simulator raw rows --- | |
| with st.expander("Simulator raw rows", expanded=False): | |
| if not prep_df.empty: | |
| sim_debug_rows: list[dict] = [] | |
| for _, live_row in prep_df.iterrows(): | |
| game = live_row.to_dict() | |
| try: | |
| sim_rows = build_upcoming_simulated_rows( | |
| game_row=game, | |
| statcast_df=statcast_df, | |
| pitcher_statcast_df=pitcher_statcast_df, | |
| weather_row=None, | |
| ) | |
| except Exception as e: | |
| sim_debug_rows.append({ | |
| "away_team": game.get("away_team"), "home_team": game.get("home_team"), | |
| "slot": "ERROR", "batter_name": None, "pitcher_name": game.get("pitcher_name"), | |
| "hit_prob": None, "hr_prob": None, "tb2p_prob": None, "debug_note": str(e), | |
| }) | |
| continue | |
| for row in (sim_rows or []): | |
| if isinstance(row, dict): | |
| sim_debug_rows.append({ | |
| "away_team": game.get("away_team"), | |
| "home_team": game.get("home_team"), | |
| "slot": row.get("slot"), | |
| "batter_name": row.get("batter_name"), | |
| "pitcher_name": row.get("pitcher_name"), | |
| "hit_prob": row.get("hit_prob"), | |
| "hr_prob": row.get("hr_prob"), | |
| "tb2p_prob": row.get("tb2p_prob"), | |
| "debug_note": None, | |
| }) | |
| if sim_debug_rows: | |
| st.dataframe(pd.DataFrame(sim_debug_rows), use_container_width=True, hide_index=True) | |
| else: | |
| st.info("No simulator rows available.") | |
| else: | |
| st.info("No prepared live games.") | |
| # ------------------------------------------------------------------ | |
| # SECTION 7 — Export | |
| # ------------------------------------------------------------------ | |
| st.subheader("Export") | |
| if not sim_df.empty: | |
| col_csv, col_json = st.columns(2) | |
| with col_csv: | |
| csv_data = sim_df.to_csv(index=False).encode("utf-8") | |
| st.download_button( | |
| label="Download CSV", | |
| data=csv_data, | |
| file_name="debug_sim_rows.csv", | |
| mime="text/csv", | |
| key="dbg_dl_csv", | |
| ) | |
| with col_json: | |
| json_data = json.dumps( | |
| [ | |
| {k: (v.item() if hasattr(v, "item") else v) for k, v in row.items()} | |
| for row in filtered_rows | |
| ], | |
| default=str, | |
| ).encode("utf-8") | |
| st.download_button( | |
| label="Download JSON", | |
| data=json_data, | |
| file_name="debug_sim_rows.json", | |
| mime="application/json", | |
| key="dbg_dl_json", | |
| ) | |
| else: | |
| st.info("No data to export.") | |
| # ------------------------------------------------------------------ | |
| # SECTION 8 — Audit Metadata (placeholders) | |
| # ------------------------------------------------------------------ | |
| st.subheader("Audit Metadata") | |
| st.json({ | |
| "model_version": "Batch 13", | |
| "feature_version": "rolling_form+opportunity+drift", | |
| "odds_snapshot_id": None, | |
| "data_timestamp": str(pd.Timestamp.now()), | |
| }) | |
| # ------------------------------------------------------------------ | |
| # SECTION 9 — Model Evaluation Metrics (CLV / ERE) | |
| # ------------------------------------------------------------------ | |
| st.subheader("Model Evaluation Metrics") | |
| if "dbg_eval_metrics_loaded" not in st.session_state: | |
| st.session_state["dbg_eval_metrics_loaded"] = False | |
| if st.button("Load Evaluation Metrics", key="dbg_load_eval_metrics"): | |
| st.session_state["dbg_eval_metrics_loaded"] = True | |
| if st.session_state["dbg_eval_metrics_loaded"]: | |
| audit_df = debug_audit_bundle.get("recommendation_audit", pd.DataFrame()) | |
| eval_tables = [ | |
| ("HR Probability Calibration", build_hr_calibration_table(audit_df)), | |
| ("Edge Bucket Performance", build_edge_bucket_table(audit_df)), | |
| ("Confidence Bucket", build_confidence_table(audit_df)), | |
| ("Recommendation Tier", build_tier_performance_table(audit_df)), | |
| ("Global ERE", build_ere_table(audit_df)), | |
| ("ERE by Edge Bucket", build_ere_by_edge_bucket_table(audit_df)), | |
| ("ERE by Confidence", build_ere_by_confidence_bucket_table(audit_df)), | |
| ("ERE by Tier", build_ere_by_tier_table(audit_df)), | |
| ("CLV Summary", build_clv_table(audit_df)), | |
| ("CLV by Tier", build_clv_by_tier_table(audit_df)), | |
| ] | |
| for title, tbl in eval_tables: | |
| if not tbl.empty: | |
| st.write(title) | |
| st.dataframe(tbl, use_container_width=True, hide_index=True) | |
| # Batter-specific metrics | |
| batter_audit_df_eval = debug_audit_bundle.get("batter_prop_audit", pd.DataFrame()) | |
| for title, fn in [ | |
| ("Batter HR Rate by Tier", build_batter_hr_tier_table), | |
| ("Batter HR Rate by Confidence", build_batter_hr_confidence_table), | |
| ("Batter HR Rate by Edge", build_batter_hr_edge_table), | |
| ]: | |
| try: | |
| tbl = fn(batter_audit_df_eval) | |
| except Exception: | |
| tbl = pd.DataFrame() | |
| if not tbl.empty: | |
| st.write(title) | |
| st.dataframe(tbl, use_container_width=True, hide_index=True) | |
| # Scores raw status (diagnostic) | |
| if not scores_df.empty and "status" in scores_df.columns: | |
| with st.expander("Raw score statuses", expanded=False): | |
| st.write(sorted(scores_df["status"].fillna("").astype(str).unique().tolist())) | |