datamatters24's picture
Initial Space โ€” Gradio demo for Ringside match-winner model
408e5ba verified
"""Ringside Predictor โ€” interactive demo for the Ringside Analytics XGBoost model.
Picks two wrestlers from a dropdown of the top-500-by-match-count, lets users
set match context, and returns win probability with feature attribution.
Inference runs against bundled snapshot files (`data/`) โ€” no DB, no internet
needed at request time.
"""
from __future__ import annotations
from pathlib import Path
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from huggingface_hub import hf_hub_download
# โ”€โ”€โ”€ Constants โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
DATA_DIR = Path(__file__).parent / "data"
MODEL_REPO = "datamatters24/ringside-match-winner"
ALIGNMENT_INT = {"face": 0, "tweener": 1, "heel": 2}
DEFAULT_ALIGNMENT = 1 # tweener / unknown โ€” matches training distribution
# 35-feature order, exact match to scaler.feature_names_in_
FEATURE_ORDER = [
"win_rate_30d", "win_rate_90d", "win_rate_365d",
"current_win_streak", "current_loss_streak",
"is_ppv", "is_title_match", "card_position", "event_tier",
"match_type_win_rate",
"is_singles", "is_tag_team", "is_triple_threat", "is_fatal_four_way",
"is_ladder", "is_cage", "is_hell_in_a_cell", "is_royal_rumble",
"is_champion", "num_defenses", "days_since_title_match",
"years_active", "matches_last_90d", "days_since_last_match",
"promotion_win_rate", "h2h_win_rate", "h2h_matches",
"alignment", "is_face", "is_heel",
"days_since_turn", "turns_12m", "face_heel_matchup",
"avg_match_rating", "card_position_momentum",
]
MATCH_TYPES = [
"singles", "tag_team", "triple_threat", "fatal_four_way",
"ladder", "cage", "hell_in_a_cell", "royal_rumble",
]
# โ”€โ”€โ”€ Load model + snapshot โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _load():
print("Downloading model from HF Hub...")
xgb_path = hf_hub_download(repo_id=MODEL_REPO, filename="xgboost.joblib")
scaler_path = hf_hub_download(repo_id=MODEL_REPO, filename="scaler.joblib")
xgb = joblib.load(xgb_path)
scaler = joblib.load(scaler_path)
print(f"Loading snapshot from {DATA_DIR}...")
stats = pd.read_parquet(DATA_DIR / "wrestler_stats.parquet")
h2h = pd.read_parquet(DATA_DIR / "h2h.parquet")
mt = pd.read_parquet(DATA_DIR / "match_type_stats.parquet")
return xgb, scaler, stats, h2h, mt
XGB, SCALER, STATS, H2H, MT = _load()
WRESTLER_NAMES = STATS.sort_values("ring_name")["ring_name"].tolist()
# โ”€โ”€โ”€ Feature builder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def build_feature_row(
focal: pd.Series,
opponent: pd.Series,
is_ppv: bool,
is_title_match: bool,
match_type: str,
) -> pd.DataFrame:
"""Construct one row of 35 features for the focal wrestler."""
# Match-type one-hots
mt_flags = {f"is_{mt_key}": int(match_type == mt_key) for mt_key in MATCH_TYPES}
# match_type_win_rate (focal in this match type)
mtwr = MT[(MT["wrestler_id"] == focal["wrestler_id"]) & (MT["match_type"] == match_type)]
match_type_win_rate = float(mtwr["win_rate"].iloc[0]) if len(mtwr) else float(focal["career_wr"])
# Head-to-head
a, b = sorted([int(focal["wrestler_id"]), int(opponent["wrestler_id"])])
h_row = H2H[(H2H["w_a"] == a) & (H2H["w_b"] == b)]
if len(h_row):
h_total = int(h_row["h2h_matches"].iloc[0])
a_wr = float(h_row["a_win_rate"].iloc[0])
# If focal is the smaller id (a), that's their direct rate; else flip
h_focal_wr = a_wr if focal["wrestler_id"] == a else (1.0 - a_wr)
else:
h_total, h_focal_wr = 0, 0.5
# Alignment encoding
align_focal = ALIGNMENT_INT.get(focal["alignment"], DEFAULT_ALIGNMENT)
align_oppo = ALIGNMENT_INT.get(opponent["alignment"], DEFAULT_ALIGNMENT)
is_face = int(focal["alignment"] == "face")
is_heel = int(focal["alignment"] == "heel")
face_heel = int((focal["alignment"] == "face" and opponent["alignment"] == "heel")
or (focal["alignment"] == "heel" and opponent["alignment"] == "face"))
row = {
"win_rate_30d": float(focal["win_rate_30d"]),
"win_rate_90d": float(focal["win_rate_90d"]),
"win_rate_365d": float(focal["win_rate_365d"]),
"current_win_streak": int(focal["current_win_streak"]),
"current_loss_streak": int(focal["current_loss_streak"]),
"is_ppv": int(is_ppv),
"is_title_match": int(is_title_match),
"card_position": 5.0, # mid-card default
"event_tier": int(is_ppv) * 2 + int(is_title_match),
"match_type_win_rate": match_type_win_rate,
**mt_flags,
"is_champion": int(focal["is_champion"]),
"num_defenses": int(focal["num_defenses"]),
"days_since_title_match": int(focal["days_since_title_match"]),
"years_active": float(focal["years_active"]),
"matches_last_90d": int(focal["matches_last_90d"]),
"days_since_last_match": int(focal["days_since_last_match"]),
"promotion_win_rate": float(focal["promotion_win_rate"]),
"h2h_win_rate": h_focal_wr,
"h2h_matches": h_total,
"alignment": align_focal,
"is_face": is_face,
"is_heel": is_heel,
"days_since_turn": int(focal["days_since_turn"]),
"turns_12m": int(focal["turns_12m"]),
"face_heel_matchup": face_heel,
"avg_match_rating": float(focal["avg_match_rating"]),
"card_position_momentum": float(focal["card_position_momentum"]),
}
return pd.DataFrame([row])[FEATURE_ORDER]
# โ”€โ”€โ”€ Inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def predict(wrestler_a: str, wrestler_b: str, is_ppv: bool, is_title_match: bool, match_type: str):
if wrestler_a == wrestler_b:
return ("โš ๏ธ Pick two different wrestlers.", None, "")
a = STATS[STATS["ring_name"] == wrestler_a].iloc[0]
b = STATS[STATS["ring_name"] == wrestler_b].iloc[0]
Xa = build_feature_row(a, b, is_ppv, is_title_match, match_type)
Xb = build_feature_row(b, a, is_ppv, is_title_match, match_type)
Xa_s = SCALER.transform(Xa)
Xb_s = SCALER.transform(Xb)
# Symmetric prediction: each wrestler's P(win) under the model, then renormalize
pa = float(XGB.predict_proba(Xa_s)[0, 1])
pb = float(XGB.predict_proba(Xb_s)[0, 1])
norm = pa + pb
pa_n, pb_n = pa / norm, pb / norm
summary = (
f"### Predicted win probabilities\n\n"
f"- **{wrestler_a}**: {pa_n:.0%}\n"
f"- **{wrestler_b}**: {pb_n:.0%}\n\n"
f"_Raw model outputs (independent calls): {pa:.2f} / {pb:.2f} โ€” these are normalized to sum to 1._"
)
# Top contributing features (XGBoost feature importances * focal feature deviation from mean)
imp = pd.Series(XGB.feature_importances_, index=FEATURE_ORDER).sort_values(ascending=False)
contrib = pd.DataFrame({
"feature": imp.index,
"importance": imp.values,
f"{wrestler_a}": Xa.iloc[0].values,
f"{wrestler_b}": Xb.iloc[0].values,
}).head(10)
explanation = (
"### Top 10 features driving the model\n\n"
f"For *any* prediction, these 10 features carry ~98% of XGBoost's signal. "
f"Compare {wrestler_a}'s values to {wrestler_b}'s to see why the model leans the way it does โ€” "
f"streak features and recent activity dominate. **Booking momentum is the story.**\n\n"
"โš ๏ธ Reminder: pro wrestling outcomes are scripted. This model predicts who tends to be **booked** to win, not who would win an athletic contest. Not for betting."
)
return (summary, contrib, explanation)
# โ”€โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
DESCRIPTION = """
# ๐Ÿคผ Ringside Predictor
Live demo of the [Ringside Analytics match-winner model](https://huggingface.co/datamatters24/ringside-match-winner) trained on **482K pro wrestling matches** (1980โ€“present).
Pick two wrestlers, set the match context, and the XGBoost model (test AUC 0.718) returns a win probability with feature attribution.
**How to read this:** the prediction reflects **booking patterns** the model learned from historical data โ€” not athletic ability. Pro wrestling outcomes are scripted. See the [paper](https://tedrubin80.github.io/wrastlingfirst/paper.html) for a full discussion of the kayfabe problem.
[Dataset](https://huggingface.co/datasets/datamatters24/ringside-analytics) ยท [Model](https://huggingface.co/datamatters24/ringside-match-winner) ยท [Source code](https://github.com/tedrubin80/wrastlingfirst)
"""
with gr.Blocks(title="Ringside Predictor", theme=gr.themes.Soft(primary_hue="red")) as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
a = gr.Dropdown(
choices=WRESTLER_NAMES, value="John Cena",
label="Wrestler A", filterable=True,
)
b = gr.Dropdown(
choices=WRESTLER_NAMES, value="Roman Reigns",
label="Wrestler B", filterable=True,
)
with gr.Row():
ppv = gr.Checkbox(label="PPV / Premium live event", value=False)
title = gr.Checkbox(label="Title match", value=False)
mt_in = gr.Dropdown(
choices=MATCH_TYPES, value="singles", label="Match type",
)
btn = gr.Button("๐Ÿ”ฎ Predict", variant="primary")
out_summary = gr.Markdown()
out_table = gr.Dataframe(label="Top features")
out_caveat = gr.Markdown()
btn.click(
predict,
inputs=[a, b, ppv, title, mt_in],
outputs=[out_summary, out_table, out_caveat],
)
gr.Examples(
examples=[
["John Cena", "Roman Reigns", False, False, "singles"],
["Stone Cold Steve Austin", "The Rock", True, True, "singles"],
["Hulk Hogan", "Andre the Giant", True, True, "singles"],
["Bret Hart", "Shawn Michaels", True, True, "cage"],
],
inputs=[a, b, ppv, title, mt_in],
)
if __name__ == "__main__":
demo.launch()