Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import math | |
| import pandas as pd | |
| def _resolve_expected_prob(df: pd.DataFrame) -> pd.Series | None: | |
| if "fair_prob" in df.columns: | |
| series = pd.to_numeric(df["fair_prob"], errors="coerce") | |
| if series.notna().any(): | |
| return series | |
| if "model_k_prob" in df.columns: | |
| series = pd.to_numeric(df["model_k_prob"], errors="coerce") | |
| if series.notna().any(): | |
| return series | |
| if "model_hr_prob" in df.columns: | |
| series = pd.to_numeric(df["model_hr_prob"], errors="coerce") | |
| if series.notna().any(): | |
| return series | |
| if "fair_hr_odds" in df.columns: | |
| return df["fair_hr_odds"].apply(_american_to_prob) | |
| return None | |
| def _resolve_realized_outcome(df: pd.DataFrame) -> pd.Series | None: | |
| for col in ("realized_outcome", "realized_hr", "realized_k_over", "realized_win"): | |
| if col in df.columns: | |
| return pd.to_numeric(df[col], errors="coerce") | |
| return None | |
| def build_hr_calibration_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Compare predicted HR probability vs realized HR rate. | |
| """ | |
| if audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| df = df[df["hr_prob"].notna()] | |
| bins = pd.cut( | |
| df["hr_prob"], | |
| bins=[0, .02, .04, .06, .08, .10, .15, .20, 1], | |
| include_lowest=True, | |
| ) | |
| table = ( | |
| df.groupby(bins) | |
| .agg( | |
| predictions=("hr_prob", "count"), | |
| avg_pred_prob=("hr_prob", "mean"), | |
| realized_hr_rate=("realized_hr", "mean"), | |
| ) | |
| .reset_index() | |
| ) | |
| table["realized_hr_rate"] = table["realized_hr_rate"].fillna(0) | |
| return table | |
| def build_edge_bucket_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| df = df[df["adjusted_edge"].notna()] | |
| bins = pd.cut( | |
| df["adjusted_edge"], | |
| bins=[-1, -.05, -.02, 0, .02, .04, .06, .10, 1], | |
| include_lowest=True, | |
| ) | |
| table = ( | |
| df.groupby(bins) | |
| .agg( | |
| samples=("adjusted_edge", "count"), | |
| avg_edge=("adjusted_edge", "mean"), | |
| hr_rate=("realized_hr", "mean"), | |
| ) | |
| .reset_index() | |
| ) | |
| table["hr_rate"] = table["hr_rate"].fillna(0) | |
| return table | |
| def build_confidence_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| bins = pd.cut( | |
| df["confidence"], | |
| bins=[0, 40, 55, 70, 85, 100], | |
| include_lowest=True, | |
| ) | |
| table = ( | |
| df.groupby(bins) | |
| .agg( | |
| samples=("confidence", "count"), | |
| hr_rate=("realized_hr", "mean"), | |
| ) | |
| .reset_index() | |
| ) | |
| table["hr_rate"] = table["hr_rate"].fillna(0) | |
| return table | |
| def build_tier_performance_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df.empty: | |
| return pd.DataFrame() | |
| table = ( | |
| audit_df.groupby("recommendation_tier") | |
| .agg( | |
| samples=("recommendation_tier", "count"), | |
| hr_rate=("realized_hr", "mean"), | |
| avg_edge=("adjusted_edge", "mean"), | |
| avg_confidence=("confidence", "mean"), | |
| ) | |
| .reset_index() | |
| ) | |
| table["hr_rate"] = table["hr_rate"].fillna(0) | |
| return table | |
| def _safe_float(value) -> float | None: | |
| try: | |
| if value is None: | |
| return None | |
| text = str(value).strip().lower() | |
| if text in {"", "nan", "none"}: | |
| return None | |
| return float(value) | |
| except Exception: | |
| return None | |
| def _american_to_prob(odds) -> float | None: | |
| value = _safe_float(odds) | |
| if value is None: | |
| return None | |
| if value > 0: | |
| return 100.0 / (value + 100.0) | |
| if value < 0: | |
| return abs(value) / (abs(value) + 100.0) | |
| return None | |
| def _bucket_series(values: pd.Series, edges: list[float], labels: list[str]) -> pd.Series: | |
| try: | |
| return pd.cut(values, bins=edges, labels=labels, include_lowest=True) | |
| except Exception: | |
| return pd.Series([None] * len(values), index=values.index) | |
| def build_ere_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Global Edge Realization Efficiency. | |
| ERE = realized outcomes / expected outcomes | |
| """ | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| realized = _resolve_realized_outcome(df) | |
| if realized is None: | |
| return pd.DataFrame() | |
| expected = _resolve_expected_prob(df) | |
| if expected is None: | |
| return pd.DataFrame() | |
| df["expected_prob"] = expected.apply(_safe_float) | |
| df["realized_outcome"] = realized.apply(_safe_float).fillna(0.0) | |
| df = df[df["expected_prob"].notna()].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| expected_total = float(df["expected_prob"].sum()) | |
| realized_total = float(df["realized_outcome"].sum()) | |
| ere = realized_total / expected_total if expected_total > 0 else None | |
| return pd.DataFrame( | |
| [ | |
| { | |
| "bets": int(len(df)), | |
| "expected_hr_total": round(expected_total, 4), | |
| "actual_hr_total": round(realized_total, 4), | |
| "ere": round(ere, 4) if ere is not None else None, | |
| } | |
| ] | |
| ) | |
| def build_ere_by_edge_bucket_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| realized = _resolve_realized_outcome(df) | |
| if "adjusted_edge" not in df.columns or realized is None: | |
| return pd.DataFrame() | |
| expected = _resolve_expected_prob(df) | |
| if expected is None: | |
| return pd.DataFrame() | |
| df["expected_prob"] = expected.apply(_safe_float) | |
| df["adjusted_edge"] = df["adjusted_edge"].apply(_safe_float) | |
| df["realized_outcome"] = realized.apply(_safe_float).fillna(0.0) | |
| df = df[df["adjusted_edge"].notna() & df["expected_prob"].notna()].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| edges = [-math.inf, 0.02, 0.04, 0.06, 0.08, math.inf] | |
| labels = ["<2%", "2-4%", "4-6%", "6-8%", "8%+"] | |
| df["edge_bucket"] = _bucket_series(df["adjusted_edge"], edges, labels) | |
| grouped = ( | |
| df.groupby("edge_bucket", dropna=False) | |
| .agg( | |
| bets=("realized_outcome", "size"), | |
| expected_hr_total=("expected_prob", "sum"), | |
| actual_hr_total=("realized_outcome", "sum"), | |
| ) | |
| .reset_index() | |
| ) | |
| grouped["ere"] = grouped.apply( | |
| lambda r: (r["actual_hr_total"] / r["expected_hr_total"]) | |
| if r["expected_hr_total"] and r["expected_hr_total"] > 0 | |
| else None, | |
| axis=1, | |
| ) | |
| grouped["expected_hr_total"] = grouped["expected_hr_total"].round(4) | |
| grouped["actual_hr_total"] = grouped["actual_hr_total"].round(4) | |
| grouped["ere"] = grouped["ere"].round(4) | |
| return grouped | |
| def build_ere_by_confidence_bucket_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| realized = _resolve_realized_outcome(df) | |
| if "confidence" not in df.columns or realized is None: | |
| return pd.DataFrame() | |
| expected = _resolve_expected_prob(df) | |
| if expected is None: | |
| return pd.DataFrame() | |
| df["expected_prob"] = expected.apply(_safe_float) | |
| df["confidence"] = df["confidence"].apply(_safe_float) | |
| df["realized_outcome"] = realized.apply(_safe_float).fillna(0.0) | |
| df = df[df["confidence"].notna() & df["expected_prob"].notna()].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| edges = [-math.inf, 0.4, 0.5, 0.6, 0.7, math.inf] | |
| labels = ["<0.40", "0.40-0.50", "0.50-0.60", "0.60-0.70", "0.70+"] | |
| df["confidence_bucket"] = _bucket_series(df["confidence"], edges, labels) | |
| grouped = ( | |
| df.groupby("confidence_bucket", dropna=False) | |
| .agg( | |
| bets=("realized_outcome", "size"), | |
| expected_hr_total=("expected_prob", "sum"), | |
| actual_hr_total=("realized_outcome", "sum"), | |
| ) | |
| .reset_index() | |
| ) | |
| grouped["ere"] = grouped.apply( | |
| lambda r: (r["actual_hr_total"] / r["expected_hr_total"]) | |
| if r["expected_hr_total"] and r["expected_hr_total"] > 0 | |
| else None, | |
| axis=1, | |
| ) | |
| grouped["expected_hr_total"] = grouped["expected_hr_total"].round(4) | |
| grouped["actual_hr_total"] = grouped["actual_hr_total"].round(4) | |
| grouped["ere"] = grouped["ere"].round(4) | |
| return grouped | |
| def build_ere_by_tier_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| realized = _resolve_realized_outcome(df) | |
| if "recommendation_tier" not in df.columns or realized is None: | |
| return pd.DataFrame() | |
| expected = _resolve_expected_prob(df) | |
| if expected is None: | |
| return pd.DataFrame() | |
| df["expected_prob"] = expected.apply(_safe_float) | |
| df["realized_outcome"] = realized.apply(_safe_float).fillna(0.0) | |
| df["recommendation_tier"] = df["recommendation_tier"].fillna("").astype(str) | |
| df = df[df["expected_prob"].notna() & df["recommendation_tier"].ne("")].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| grouped = ( | |
| df.groupby("recommendation_tier", dropna=False) | |
| .agg( | |
| bets=("realized_outcome", "size"), | |
| expected_hr_total=("expected_prob", "sum"), | |
| actual_hr_total=("realized_outcome", "sum"), | |
| ) | |
| .reset_index() | |
| ) | |
| grouped["ere"] = grouped.apply( | |
| lambda r: (r["actual_hr_total"] / r["expected_hr_total"]) | |
| if r["expected_hr_total"] and r["expected_hr_total"] > 0 | |
| else None, | |
| axis=1, | |
| ) | |
| grouped["expected_hr_total"] = grouped["expected_hr_total"].round(4) | |
| grouped["actual_hr_total"] = grouped["actual_hr_total"].round(4) | |
| grouped["ere"] = grouped["ere"].round(4) | |
| return grouped | |
| def build_clv_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| CLV table. Uses closing odds if available. | |
| Assumes: | |
| - book_hr_odds = bet-time odds | |
| - closing_hr_odds = closing odds (if present) | |
| """ | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| if "book_hr_odds" not in df.columns or "closing_hr_odds" not in df.columns: | |
| return pd.DataFrame() | |
| df["bet_prob"] = df["book_hr_odds"].apply(_american_to_prob) | |
| df["close_prob"] = df["closing_hr_odds"].apply(_american_to_prob) | |
| df["clv"] = df["close_prob"] - df["bet_prob"] | |
| df = df[df["bet_prob"].notna() & df["close_prob"].notna()].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| summary = { | |
| "bets": int(len(df)), | |
| "avg_bet_prob": round(float(df["bet_prob"].mean()), 4), | |
| "avg_close_prob": round(float(df["close_prob"].mean()), 4), | |
| "avg_clv": round(float(df["clv"].mean()), 4), | |
| "beat_closing_pct": round(float((df["clv"] > 0).mean()), 4), | |
| } | |
| return pd.DataFrame([summary]) | |
| def build_clv_by_tier_table(audit_df: pd.DataFrame) -> pd.DataFrame: | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| if ( | |
| "book_hr_odds" not in df.columns | |
| or "closing_hr_odds" not in df.columns | |
| or "recommendation_tier" not in df.columns | |
| ): | |
| return pd.DataFrame() | |
| df["bet_prob"] = df["book_hr_odds"].apply(_american_to_prob) | |
| df["close_prob"] = df["closing_hr_odds"].apply(_american_to_prob) | |
| df["clv"] = df["close_prob"] - df["bet_prob"] | |
| df["recommendation_tier"] = df["recommendation_tier"].fillna("").astype(str) | |
| df = df[ | |
| df["bet_prob"].notna() | |
| & df["close_prob"].notna() | |
| & df["recommendation_tier"].ne("") | |
| ].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| grouped = ( | |
| df.groupby("recommendation_tier", dropna=False) | |
| .agg( | |
| bets=("clv", "size"), | |
| avg_clv=("clv", "mean"), | |
| beat_closing_pct=("clv", lambda s: (s > 0).mean()), | |
| ) | |
| .reset_index() | |
| ) | |
| grouped["avg_clv"] = grouped["avg_clv"].round(4) | |
| grouped["beat_closing_pct"] = grouped["beat_closing_pct"].round(4) | |
| return grouped | |
| def build_props_calibration_table(audit_df: pd.DataFrame, bins: list[float] | None = None) -> pd.DataFrame: | |
| if audit_df is None or audit_df.empty: | |
| return pd.DataFrame() | |
| df = audit_df.copy() | |
| expected = _resolve_expected_prob(df) | |
| realized = _resolve_realized_outcome(df) | |
| if expected is None or realized is None: | |
| return pd.DataFrame() | |
| df["expected_prob"] = expected.apply(_safe_float) | |
| df["realized_outcome"] = realized.apply(_safe_float) | |
| df = df[df["expected_prob"].notna() & df["realized_outcome"].notna()].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| cut_bins = bins or [0, 0.05, 0.10, 0.20, 0.35, 0.50, 0.65, 0.80, 1.0] | |
| grouped = ( | |
| df.groupby(pd.cut(df["expected_prob"], bins=cut_bins, include_lowest=True), dropna=False) | |
| .agg( | |
| samples=("expected_prob", "count"), | |
| avg_pred_prob=("expected_prob", "mean"), | |
| realized_rate=("realized_outcome", "mean"), | |
| ) | |
| .reset_index() | |
| ) | |
| grouped["realized_rate"] = grouped["realized_rate"].round(4) | |
| grouped["avg_pred_prob"] = grouped["avg_pred_prob"].round(4) | |
| return grouped | |