"""Ringside Predictor — interactive demo for the Ringside Analytics XGBoost model. Picks two wrestlers from a dropdown of the top-500-by-match-count, lets users set match context, and returns win probability with feature attribution. Inference runs against bundled snapshot files (`data/`) — no DB, no internet needed at request time. """ from __future__ import annotations from pathlib import Path import gradio as gr import joblib import numpy as np import pandas as pd from huggingface_hub import hf_hub_download # ─── Constants ──────────────────────────────────────────────────────── DATA_DIR = Path(__file__).parent / "data" MODEL_REPO = "datamatters24/ringside-match-winner" ALIGNMENT_INT = {"face": 0, "tweener": 1, "heel": 2} DEFAULT_ALIGNMENT = 1 # tweener / unknown — matches training distribution # 35-feature order, exact match to scaler.feature_names_in_ FEATURE_ORDER = [ "win_rate_30d", "win_rate_90d", "win_rate_365d", "current_win_streak", "current_loss_streak", "is_ppv", "is_title_match", "card_position", "event_tier", "match_type_win_rate", "is_singles", "is_tag_team", "is_triple_threat", "is_fatal_four_way", "is_ladder", "is_cage", "is_hell_in_a_cell", "is_royal_rumble", "is_champion", "num_defenses", "days_since_title_match", "years_active", "matches_last_90d", "days_since_last_match", "promotion_win_rate", "h2h_win_rate", "h2h_matches", "alignment", "is_face", "is_heel", "days_since_turn", "turns_12m", "face_heel_matchup", "avg_match_rating", "card_position_momentum", ] MATCH_TYPES = [ "singles", "tag_team", "triple_threat", "fatal_four_way", "ladder", "cage", "hell_in_a_cell", "royal_rumble", ] # ─── Load model + snapshot ──────────────────────────────────────────── def _load(): print("Downloading model from HF Hub...") xgb_path = hf_hub_download(repo_id=MODEL_REPO, filename="xgboost.joblib") scaler_path = hf_hub_download(repo_id=MODEL_REPO, filename="scaler.joblib") xgb = joblib.load(xgb_path) scaler = joblib.load(scaler_path) print(f"Loading snapshot from {DATA_DIR}...") stats = pd.read_parquet(DATA_DIR / "wrestler_stats.parquet") h2h = pd.read_parquet(DATA_DIR / "h2h.parquet") mt = pd.read_parquet(DATA_DIR / "match_type_stats.parquet") return xgb, scaler, stats, h2h, mt XGB, SCALER, STATS, H2H, MT = _load() WRESTLER_NAMES = STATS.sort_values("ring_name")["ring_name"].tolist() # ─── Feature builder ────────────────────────────────────────────────── def build_feature_row( focal: pd.Series, opponent: pd.Series, is_ppv: bool, is_title_match: bool, match_type: str, ) -> pd.DataFrame: """Construct one row of 35 features for the focal wrestler.""" # Match-type one-hots mt_flags = {f"is_{mt_key}": int(match_type == mt_key) for mt_key in MATCH_TYPES} # match_type_win_rate (focal in this match type) mtwr = MT[(MT["wrestler_id"] == focal["wrestler_id"]) & (MT["match_type"] == match_type)] match_type_win_rate = float(mtwr["win_rate"].iloc[0]) if len(mtwr) else float(focal["career_wr"]) # Head-to-head a, b = sorted([int(focal["wrestler_id"]), int(opponent["wrestler_id"])]) h_row = H2H[(H2H["w_a"] == a) & (H2H["w_b"] == b)] if len(h_row): h_total = int(h_row["h2h_matches"].iloc[0]) a_wr = float(h_row["a_win_rate"].iloc[0]) # If focal is the smaller id (a), that's their direct rate; else flip h_focal_wr = a_wr if focal["wrestler_id"] == a else (1.0 - a_wr) else: h_total, h_focal_wr = 0, 0.5 # Alignment encoding align_focal = ALIGNMENT_INT.get(focal["alignment"], DEFAULT_ALIGNMENT) align_oppo = ALIGNMENT_INT.get(opponent["alignment"], DEFAULT_ALIGNMENT) is_face = int(focal["alignment"] == "face") is_heel = int(focal["alignment"] == "heel") face_heel = int((focal["alignment"] == "face" and opponent["alignment"] == "heel") or (focal["alignment"] == "heel" and opponent["alignment"] == "face")) row = { "win_rate_30d": float(focal["win_rate_30d"]), "win_rate_90d": float(focal["win_rate_90d"]), "win_rate_365d": float(focal["win_rate_365d"]), "current_win_streak": int(focal["current_win_streak"]), "current_loss_streak": int(focal["current_loss_streak"]), "is_ppv": int(is_ppv), "is_title_match": int(is_title_match), "card_position": 5.0, # mid-card default "event_tier": int(is_ppv) * 2 + int(is_title_match), "match_type_win_rate": match_type_win_rate, **mt_flags, "is_champion": int(focal["is_champion"]), "num_defenses": int(focal["num_defenses"]), "days_since_title_match": int(focal["days_since_title_match"]), "years_active": float(focal["years_active"]), "matches_last_90d": int(focal["matches_last_90d"]), "days_since_last_match": int(focal["days_since_last_match"]), "promotion_win_rate": float(focal["promotion_win_rate"]), "h2h_win_rate": h_focal_wr, "h2h_matches": h_total, "alignment": align_focal, "is_face": is_face, "is_heel": is_heel, "days_since_turn": int(focal["days_since_turn"]), "turns_12m": int(focal["turns_12m"]), "face_heel_matchup": face_heel, "avg_match_rating": float(focal["avg_match_rating"]), "card_position_momentum": float(focal["card_position_momentum"]), } return pd.DataFrame([row])[FEATURE_ORDER] # ─── Inference ──────────────────────────────────────────────────────── def predict(wrestler_a: str, wrestler_b: str, is_ppv: bool, is_title_match: bool, match_type: str): if wrestler_a == wrestler_b: return ("⚠️ Pick two different wrestlers.", None, "") a = STATS[STATS["ring_name"] == wrestler_a].iloc[0] b = STATS[STATS["ring_name"] == wrestler_b].iloc[0] Xa = build_feature_row(a, b, is_ppv, is_title_match, match_type) Xb = build_feature_row(b, a, is_ppv, is_title_match, match_type) Xa_s = SCALER.transform(Xa) Xb_s = SCALER.transform(Xb) # Symmetric prediction: each wrestler's P(win) under the model, then renormalize pa = float(XGB.predict_proba(Xa_s)[0, 1]) pb = float(XGB.predict_proba(Xb_s)[0, 1]) norm = pa + pb pa_n, pb_n = pa / norm, pb / norm summary = ( f"### Predicted win probabilities\n\n" f"- **{wrestler_a}**: {pa_n:.0%}\n" f"- **{wrestler_b}**: {pb_n:.0%}\n\n" f"_Raw model outputs (independent calls): {pa:.2f} / {pb:.2f} — these are normalized to sum to 1._" ) # Top contributing features (XGBoost feature importances * focal feature deviation from mean) imp = pd.Series(XGB.feature_importances_, index=FEATURE_ORDER).sort_values(ascending=False) contrib = pd.DataFrame({ "feature": imp.index, "importance": imp.values, f"{wrestler_a}": Xa.iloc[0].values, f"{wrestler_b}": Xb.iloc[0].values, }).head(10) explanation = ( "### Top 10 features driving the model\n\n" f"For *any* prediction, these 10 features carry ~98% of XGBoost's signal. " f"Compare {wrestler_a}'s values to {wrestler_b}'s to see why the model leans the way it does — " f"streak features and recent activity dominate. **Booking momentum is the story.**\n\n" "⚠️ Reminder: pro wrestling outcomes are scripted. This model predicts who tends to be **booked** to win, not who would win an athletic contest. Not for betting." ) return (summary, contrib, explanation) # ─── UI ─────────────────────────────────────────────────────────────── DESCRIPTION = """ # 🤼 Ringside Predictor Live demo of the [Ringside Analytics match-winner model](https://huggingface.co/datamatters24/ringside-match-winner) trained on **482K pro wrestling matches** (1980–present). Pick two wrestlers, set the match context, and the XGBoost model (test AUC 0.718) returns a win probability with feature attribution. **How to read this:** the prediction reflects **booking patterns** the model learned from historical data — not athletic ability. Pro wrestling outcomes are scripted. See the [paper](https://tedrubin80.github.io/wrastlingfirst/paper.html) for a full discussion of the kayfabe problem. [Dataset](https://huggingface.co/datasets/datamatters24/ringside-analytics) · [Model](https://huggingface.co/datamatters24/ringside-match-winner) · [Source code](https://github.com/tedrubin80/wrastlingfirst) """ with gr.Blocks(title="Ringside Predictor", theme=gr.themes.Soft(primary_hue="red")) as demo: gr.Markdown(DESCRIPTION) with gr.Row(): a = gr.Dropdown( choices=WRESTLER_NAMES, value="John Cena", label="Wrestler A", filterable=True, ) b = gr.Dropdown( choices=WRESTLER_NAMES, value="Roman Reigns", label="Wrestler B", filterable=True, ) with gr.Row(): ppv = gr.Checkbox(label="PPV / Premium live event", value=False) title = gr.Checkbox(label="Title match", value=False) mt_in = gr.Dropdown( choices=MATCH_TYPES, value="singles", label="Match type", ) btn = gr.Button("🔮 Predict", variant="primary") out_summary = gr.Markdown() out_table = gr.Dataframe(label="Top features") out_caveat = gr.Markdown() btn.click( predict, inputs=[a, b, ppv, title, mt_in], outputs=[out_summary, out_table, out_caveat], ) gr.Examples( examples=[ ["John Cena", "Roman Reigns", False, False, "singles"], ["Stone Cold Steve Austin", "The Rock", True, True, "singles"], ["Hulk Hogan", "Andre the Giant", True, True, "singles"], ["Bret Hart", "Shawn Michaels", True, True, "cage"], ], inputs=[a, b, ppv, title, mt_in], ) if __name__ == "__main__": demo.launch()