Spaces:

datamatters24
/

ringside-predictor

Running

App Files Files Community

ringside-predictor / app.py

datamatters24

Initial Space — Gradio demo for Ringside match-winner model

408e5ba verified about 1 month ago

raw

history blame contribute delete

10.8 kB

	"""Ringside Predictor — interactive demo for the Ringside Analytics XGBoost model.

	Picks two wrestlers from a dropdown of the top-500-by-match-count, lets users
	set match context, and returns win probability with feature attribution.

	Inference runs against bundled snapshot files (`data/`) — no DB, no internet
	needed at request time.
	"""
	from __future__ import annotations

	from pathlib import Path

	import gradio as gr
	import joblib
	import numpy as np
	import pandas as pd
	from huggingface_hub import hf_hub_download

	# ─── Constants ────────────────────────────────────────────────────────
	DATA_DIR = Path(__file__).parent / "data"
	MODEL_REPO = "datamatters24/ringside-match-winner"

	ALIGNMENT_INT = {"face": 0, "tweener": 1, "heel": 2}
	DEFAULT_ALIGNMENT = 1 # tweener / unknown — matches training distribution

	# 35-feature order, exact match to scaler.feature_names_in_
	FEATURE_ORDER = [
	"win_rate_30d", "win_rate_90d", "win_rate_365d",
	"current_win_streak", "current_loss_streak",
	"is_ppv", "is_title_match", "card_position", "event_tier",
	"match_type_win_rate",
	"is_singles", "is_tag_team", "is_triple_threat", "is_fatal_four_way",
	"is_ladder", "is_cage", "is_hell_in_a_cell", "is_royal_rumble",
	"is_champion", "num_defenses", "days_since_title_match",
	"years_active", "matches_last_90d", "days_since_last_match",
	"promotion_win_rate", "h2h_win_rate", "h2h_matches",
	"alignment", "is_face", "is_heel",
	"days_since_turn", "turns_12m", "face_heel_matchup",
	"avg_match_rating", "card_position_momentum",
	]

	MATCH_TYPES = [
	"singles", "tag_team", "triple_threat", "fatal_four_way",
	"ladder", "cage", "hell_in_a_cell", "royal_rumble",
	]

	# ─── Load model + snapshot ────────────────────────────────────────────
	def _load():
	print("Downloading model from HF Hub...")
	xgb_path = hf_hub_download(repo_id=MODEL_REPO, filename="xgboost.joblib")
	scaler_path = hf_hub_download(repo_id=MODEL_REPO, filename="scaler.joblib")

	xgb = joblib.load(xgb_path)
	scaler = joblib.load(scaler_path)

	print(f"Loading snapshot from {DATA_DIR}...")
	stats = pd.read_parquet(DATA_DIR / "wrestler_stats.parquet")
	h2h = pd.read_parquet(DATA_DIR / "h2h.parquet")
	mt = pd.read_parquet(DATA_DIR / "match_type_stats.parquet")
	return xgb, scaler, stats, h2h, mt

	XGB, SCALER, STATS, H2H, MT = _load()
	WRESTLER_NAMES = STATS.sort_values("ring_name")["ring_name"].tolist()


	# ─── Feature builder ──────────────────────────────────────────────────
	def build_feature_row(
	focal: pd.Series,
	opponent: pd.Series,
	is_ppv: bool,
	is_title_match: bool,
	match_type: str,
	) -> pd.DataFrame:
	"""Construct one row of 35 features for the focal wrestler."""
	# Match-type one-hots
	mt_flags = {f"is_{mt_key}": int(match_type == mt_key) for mt_key in MATCH_TYPES}

	# match_type_win_rate (focal in this match type)
	mtwr = MT[(MT["wrestler_id"] == focal["wrestler_id"]) & (MT["match_type"] == match_type)]
	match_type_win_rate = float(mtwr["win_rate"].iloc[0]) if len(mtwr) else float(focal["career_wr"])

	# Head-to-head
	a, b = sorted([int(focal["wrestler_id"]), int(opponent["wrestler_id"])])
	h_row = H2H[(H2H["w_a"] == a) & (H2H["w_b"] == b)]
	if len(h_row):
	h_total = int(h_row["h2h_matches"].iloc[0])
	a_wr = float(h_row["a_win_rate"].iloc[0])
	# If focal is the smaller id (a), that's their direct rate; else flip
	h_focal_wr = a_wr if focal["wrestler_id"] == a else (1.0 - a_wr)
	else:
	h_total, h_focal_wr = 0, 0.5

	# Alignment encoding
	align_focal = ALIGNMENT_INT.get(focal["alignment"], DEFAULT_ALIGNMENT)
	align_oppo = ALIGNMENT_INT.get(opponent["alignment"], DEFAULT_ALIGNMENT)
	is_face = int(focal["alignment"] == "face")
	is_heel = int(focal["alignment"] == "heel")
	face_heel = int((focal["alignment"] == "face" and opponent["alignment"] == "heel")
	or (focal["alignment"] == "heel" and opponent["alignment"] == "face"))

	row = {
	"win_rate_30d": float(focal["win_rate_30d"]),
	"win_rate_90d": float(focal["win_rate_90d"]),
	"win_rate_365d": float(focal["win_rate_365d"]),
	"current_win_streak": int(focal["current_win_streak"]),
	"current_loss_streak": int(focal["current_loss_streak"]),
	"is_ppv": int(is_ppv),
	"is_title_match": int(is_title_match),
	"card_position": 5.0, # mid-card default
	"event_tier": int(is_ppv) * 2 + int(is_title_match),
	"match_type_win_rate": match_type_win_rate,
	**mt_flags,
	"is_champion": int(focal["is_champion"]),
	"num_defenses": int(focal["num_defenses"]),
	"days_since_title_match": int(focal["days_since_title_match"]),
	"years_active": float(focal["years_active"]),
	"matches_last_90d": int(focal["matches_last_90d"]),
	"days_since_last_match": int(focal["days_since_last_match"]),
	"promotion_win_rate": float(focal["promotion_win_rate"]),
	"h2h_win_rate": h_focal_wr,
	"h2h_matches": h_total,
	"alignment": align_focal,
	"is_face": is_face,
	"is_heel": is_heel,
	"days_since_turn": int(focal["days_since_turn"]),
	"turns_12m": int(focal["turns_12m"]),
	"face_heel_matchup": face_heel,
	"avg_match_rating": float(focal["avg_match_rating"]),
	"card_position_momentum": float(focal["card_position_momentum"]),
	}
	return pd.DataFrame([row])[FEATURE_ORDER]


	# ─── Inference ────────────────────────────────────────────────────────
	def predict(wrestler_a: str, wrestler_b: str, is_ppv: bool, is_title_match: bool, match_type: str):
	if wrestler_a == wrestler_b:
	return ("⚠️ Pick two different wrestlers.", None, "")

	a = STATS[STATS["ring_name"] == wrestler_a].iloc[0]
	b = STATS[STATS["ring_name"] == wrestler_b].iloc[0]

	Xa = build_feature_row(a, b, is_ppv, is_title_match, match_type)
	Xb = build_feature_row(b, a, is_ppv, is_title_match, match_type)

	Xa_s = SCALER.transform(Xa)
	Xb_s = SCALER.transform(Xb)

	# Symmetric prediction: each wrestler's P(win) under the model, then renormalize
	pa = float(XGB.predict_proba(Xa_s)[0, 1])
	pb = float(XGB.predict_proba(Xb_s)[0, 1])
	norm = pa + pb
	pa_n, pb_n = pa / norm, pb / norm

	summary = (
	f"### Predicted win probabilities\n\n"
	f"- {wrestler_a}: {pa_n:.0%}\n"
	f"- {wrestler_b}: {pb_n:.0%}\n\n"
	f"_Raw model outputs (independent calls): {pa:.2f} / {pb:.2f} — these are normalized to sum to 1._"
	)

	# Top contributing features (XGBoost feature importances * focal feature deviation from mean)
	imp = pd.Series(XGB.feature_importances_, index=FEATURE_ORDER).sort_values(ascending=False)
	contrib = pd.DataFrame({
	"feature": imp.index,
	"importance": imp.values,
	f"{wrestler_a}": Xa.iloc[0].values,
	f"{wrestler_b}": Xb.iloc[0].values,
	}).head(10)

	explanation = (
	"### Top 10 features driving the model\n\n"
	f"For any prediction, these 10 features carry ~98% of XGBoost's signal. "
	f"Compare {wrestler_a}'s values to {wrestler_b}'s to see why the model leans the way it does — "
	f"streak features and recent activity dominate. Booking momentum is the story.\n\n"
	"⚠️ Reminder: pro wrestling outcomes are scripted. This model predicts who tends to be booked to win, not who would win an athletic contest. Not for betting."
	)

	return (summary, contrib, explanation)


	# ─── UI ───────────────────────────────────────────────────────────────
	DESCRIPTION = """
	# 🤼 Ringside Predictor

	Live demo of the [Ringside Analytics match-winner model](https://huggingface.co/datamatters24/ringside-match-winner) trained on 482K pro wrestling matches (1980–present).

	Pick two wrestlers, set the match context, and the XGBoost model (test AUC 0.718) returns a win probability with feature attribution.

	How to read this: the prediction reflects booking patterns the model learned from historical data — not athletic ability. Pro wrestling outcomes are scripted. See the [paper](https://tedrubin80.github.io/wrastlingfirst/paper.html) for a full discussion of the kayfabe problem.

	[Dataset](https://huggingface.co/datasets/datamatters24/ringside-analytics) · [Model](https://huggingface.co/datamatters24/ringside-match-winner) · [Source code](https://github.com/tedrubin80/wrastlingfirst)
	"""

	with gr.Blocks(title="Ringside Predictor", theme=gr.themes.Soft(primary_hue="red")) as demo:
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	a = gr.Dropdown(
	choices=WRESTLER_NAMES, value="John Cena",
	label="Wrestler A", filterable=True,
	)
	b = gr.Dropdown(
	choices=WRESTLER_NAMES, value="Roman Reigns",
	label="Wrestler B", filterable=True,
	)

	with gr.Row():
	ppv = gr.Checkbox(label="PPV / Premium live event", value=False)
	title = gr.Checkbox(label="Title match", value=False)
	mt_in = gr.Dropdown(
	choices=MATCH_TYPES, value="singles", label="Match type",
	)

	btn = gr.Button("🔮 Predict", variant="primary")

	out_summary = gr.Markdown()
	out_table = gr.Dataframe(label="Top features")
	out_caveat = gr.Markdown()

	btn.click(
	predict,
	inputs=[a, b, ppv, title, mt_in],
	outputs=[out_summary, out_table, out_caveat],
	)

	gr.Examples(
	examples=[
	["John Cena", "Roman Reigns", False, False, "singles"],
	["Stone Cold Steve Austin", "The Rock", True, True, "singles"],
	["Hulk Hogan", "Andre the Giant", True, True, "singles"],
	["Bret Hart", "Shawn Michaels", True, True, "cage"],
	],
	inputs=[a, b, ppv, title, mt_in],
	)


	if __name__ == "__main__":
	demo.launch()