| """Ringside Predictor โ interactive demo for the Ringside Analytics XGBoost model. |
| |
| Picks two wrestlers from a dropdown of the top-500-by-match-count, lets users |
| set match context, and returns win probability with feature attribution. |
| |
| Inference runs against bundled snapshot files (`data/`) โ no DB, no internet |
| needed at request time. |
| """ |
| from __future__ import annotations |
|
|
| from pathlib import Path |
|
|
| import gradio as gr |
| import joblib |
| import numpy as np |
| import pandas as pd |
| from huggingface_hub import hf_hub_download |
|
|
| |
| DATA_DIR = Path(__file__).parent / "data" |
| MODEL_REPO = "datamatters24/ringside-match-winner" |
|
|
| ALIGNMENT_INT = {"face": 0, "tweener": 1, "heel": 2} |
| DEFAULT_ALIGNMENT = 1 |
|
|
| |
| FEATURE_ORDER = [ |
| "win_rate_30d", "win_rate_90d", "win_rate_365d", |
| "current_win_streak", "current_loss_streak", |
| "is_ppv", "is_title_match", "card_position", "event_tier", |
| "match_type_win_rate", |
| "is_singles", "is_tag_team", "is_triple_threat", "is_fatal_four_way", |
| "is_ladder", "is_cage", "is_hell_in_a_cell", "is_royal_rumble", |
| "is_champion", "num_defenses", "days_since_title_match", |
| "years_active", "matches_last_90d", "days_since_last_match", |
| "promotion_win_rate", "h2h_win_rate", "h2h_matches", |
| "alignment", "is_face", "is_heel", |
| "days_since_turn", "turns_12m", "face_heel_matchup", |
| "avg_match_rating", "card_position_momentum", |
| ] |
|
|
| MATCH_TYPES = [ |
| "singles", "tag_team", "triple_threat", "fatal_four_way", |
| "ladder", "cage", "hell_in_a_cell", "royal_rumble", |
| ] |
|
|
| |
| def _load(): |
| print("Downloading model from HF Hub...") |
| xgb_path = hf_hub_download(repo_id=MODEL_REPO, filename="xgboost.joblib") |
| scaler_path = hf_hub_download(repo_id=MODEL_REPO, filename="scaler.joblib") |
|
|
| xgb = joblib.load(xgb_path) |
| scaler = joblib.load(scaler_path) |
|
|
| print(f"Loading snapshot from {DATA_DIR}...") |
| stats = pd.read_parquet(DATA_DIR / "wrestler_stats.parquet") |
| h2h = pd.read_parquet(DATA_DIR / "h2h.parquet") |
| mt = pd.read_parquet(DATA_DIR / "match_type_stats.parquet") |
| return xgb, scaler, stats, h2h, mt |
|
|
| XGB, SCALER, STATS, H2H, MT = _load() |
| WRESTLER_NAMES = STATS.sort_values("ring_name")["ring_name"].tolist() |
|
|
|
|
| |
| def build_feature_row( |
| focal: pd.Series, |
| opponent: pd.Series, |
| is_ppv: bool, |
| is_title_match: bool, |
| match_type: str, |
| ) -> pd.DataFrame: |
| """Construct one row of 35 features for the focal wrestler.""" |
| |
| mt_flags = {f"is_{mt_key}": int(match_type == mt_key) for mt_key in MATCH_TYPES} |
|
|
| |
| mtwr = MT[(MT["wrestler_id"] == focal["wrestler_id"]) & (MT["match_type"] == match_type)] |
| match_type_win_rate = float(mtwr["win_rate"].iloc[0]) if len(mtwr) else float(focal["career_wr"]) |
|
|
| |
| a, b = sorted([int(focal["wrestler_id"]), int(opponent["wrestler_id"])]) |
| h_row = H2H[(H2H["w_a"] == a) & (H2H["w_b"] == b)] |
| if len(h_row): |
| h_total = int(h_row["h2h_matches"].iloc[0]) |
| a_wr = float(h_row["a_win_rate"].iloc[0]) |
| |
| h_focal_wr = a_wr if focal["wrestler_id"] == a else (1.0 - a_wr) |
| else: |
| h_total, h_focal_wr = 0, 0.5 |
|
|
| |
| align_focal = ALIGNMENT_INT.get(focal["alignment"], DEFAULT_ALIGNMENT) |
| align_oppo = ALIGNMENT_INT.get(opponent["alignment"], DEFAULT_ALIGNMENT) |
| is_face = int(focal["alignment"] == "face") |
| is_heel = int(focal["alignment"] == "heel") |
| face_heel = int((focal["alignment"] == "face" and opponent["alignment"] == "heel") |
| or (focal["alignment"] == "heel" and opponent["alignment"] == "face")) |
|
|
| row = { |
| "win_rate_30d": float(focal["win_rate_30d"]), |
| "win_rate_90d": float(focal["win_rate_90d"]), |
| "win_rate_365d": float(focal["win_rate_365d"]), |
| "current_win_streak": int(focal["current_win_streak"]), |
| "current_loss_streak": int(focal["current_loss_streak"]), |
| "is_ppv": int(is_ppv), |
| "is_title_match": int(is_title_match), |
| "card_position": 5.0, |
| "event_tier": int(is_ppv) * 2 + int(is_title_match), |
| "match_type_win_rate": match_type_win_rate, |
| **mt_flags, |
| "is_champion": int(focal["is_champion"]), |
| "num_defenses": int(focal["num_defenses"]), |
| "days_since_title_match": int(focal["days_since_title_match"]), |
| "years_active": float(focal["years_active"]), |
| "matches_last_90d": int(focal["matches_last_90d"]), |
| "days_since_last_match": int(focal["days_since_last_match"]), |
| "promotion_win_rate": float(focal["promotion_win_rate"]), |
| "h2h_win_rate": h_focal_wr, |
| "h2h_matches": h_total, |
| "alignment": align_focal, |
| "is_face": is_face, |
| "is_heel": is_heel, |
| "days_since_turn": int(focal["days_since_turn"]), |
| "turns_12m": int(focal["turns_12m"]), |
| "face_heel_matchup": face_heel, |
| "avg_match_rating": float(focal["avg_match_rating"]), |
| "card_position_momentum": float(focal["card_position_momentum"]), |
| } |
| return pd.DataFrame([row])[FEATURE_ORDER] |
|
|
|
|
| |
| def predict(wrestler_a: str, wrestler_b: str, is_ppv: bool, is_title_match: bool, match_type: str): |
| if wrestler_a == wrestler_b: |
| return ("โ ๏ธ Pick two different wrestlers.", None, "") |
|
|
| a = STATS[STATS["ring_name"] == wrestler_a].iloc[0] |
| b = STATS[STATS["ring_name"] == wrestler_b].iloc[0] |
|
|
| Xa = build_feature_row(a, b, is_ppv, is_title_match, match_type) |
| Xb = build_feature_row(b, a, is_ppv, is_title_match, match_type) |
|
|
| Xa_s = SCALER.transform(Xa) |
| Xb_s = SCALER.transform(Xb) |
|
|
| |
| pa = float(XGB.predict_proba(Xa_s)[0, 1]) |
| pb = float(XGB.predict_proba(Xb_s)[0, 1]) |
| norm = pa + pb |
| pa_n, pb_n = pa / norm, pb / norm |
|
|
| summary = ( |
| f"### Predicted win probabilities\n\n" |
| f"- **{wrestler_a}**: {pa_n:.0%}\n" |
| f"- **{wrestler_b}**: {pb_n:.0%}\n\n" |
| f"_Raw model outputs (independent calls): {pa:.2f} / {pb:.2f} โ these are normalized to sum to 1._" |
| ) |
|
|
| |
| imp = pd.Series(XGB.feature_importances_, index=FEATURE_ORDER).sort_values(ascending=False) |
| contrib = pd.DataFrame({ |
| "feature": imp.index, |
| "importance": imp.values, |
| f"{wrestler_a}": Xa.iloc[0].values, |
| f"{wrestler_b}": Xb.iloc[0].values, |
| }).head(10) |
|
|
| explanation = ( |
| "### Top 10 features driving the model\n\n" |
| f"For *any* prediction, these 10 features carry ~98% of XGBoost's signal. " |
| f"Compare {wrestler_a}'s values to {wrestler_b}'s to see why the model leans the way it does โ " |
| f"streak features and recent activity dominate. **Booking momentum is the story.**\n\n" |
| "โ ๏ธ Reminder: pro wrestling outcomes are scripted. This model predicts who tends to be **booked** to win, not who would win an athletic contest. Not for betting." |
| ) |
|
|
| return (summary, contrib, explanation) |
|
|
|
|
| |
| DESCRIPTION = """ |
| # ๐คผ Ringside Predictor |
| |
| Live demo of the [Ringside Analytics match-winner model](https://huggingface.co/datamatters24/ringside-match-winner) trained on **482K pro wrestling matches** (1980โpresent). |
| |
| Pick two wrestlers, set the match context, and the XGBoost model (test AUC 0.718) returns a win probability with feature attribution. |
| |
| **How to read this:** the prediction reflects **booking patterns** the model learned from historical data โ not athletic ability. Pro wrestling outcomes are scripted. See the [paper](https://tedrubin80.github.io/wrastlingfirst/paper.html) for a full discussion of the kayfabe problem. |
| |
| [Dataset](https://huggingface.co/datasets/datamatters24/ringside-analytics) ยท [Model](https://huggingface.co/datamatters24/ringside-match-winner) ยท [Source code](https://github.com/tedrubin80/wrastlingfirst) |
| """ |
|
|
| with gr.Blocks(title="Ringside Predictor", theme=gr.themes.Soft(primary_hue="red")) as demo: |
| gr.Markdown(DESCRIPTION) |
|
|
| with gr.Row(): |
| a = gr.Dropdown( |
| choices=WRESTLER_NAMES, value="John Cena", |
| label="Wrestler A", filterable=True, |
| ) |
| b = gr.Dropdown( |
| choices=WRESTLER_NAMES, value="Roman Reigns", |
| label="Wrestler B", filterable=True, |
| ) |
|
|
| with gr.Row(): |
| ppv = gr.Checkbox(label="PPV / Premium live event", value=False) |
| title = gr.Checkbox(label="Title match", value=False) |
| mt_in = gr.Dropdown( |
| choices=MATCH_TYPES, value="singles", label="Match type", |
| ) |
|
|
| btn = gr.Button("๐ฎ Predict", variant="primary") |
|
|
| out_summary = gr.Markdown() |
| out_table = gr.Dataframe(label="Top features") |
| out_caveat = gr.Markdown() |
|
|
| btn.click( |
| predict, |
| inputs=[a, b, ppv, title, mt_in], |
| outputs=[out_summary, out_table, out_caveat], |
| ) |
|
|
| gr.Examples( |
| examples=[ |
| ["John Cena", "Roman Reigns", False, False, "singles"], |
| ["Stone Cold Steve Austin", "The Rock", True, True, "singles"], |
| ["Hulk Hogan", "Andre the Giant", True, True, "singles"], |
| ["Bret Hart", "Shawn Michaels", True, True, "cage"], |
| ], |
| inputs=[a, b, ppv, title, mt_in], |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|