tabbench / utils.py
alexandreabraham's picture
%↗ over XGBoost: baseline is xgboost_ensemble (the canonical "XGBoost")
52d607e
Raw
History Blame Contribute Delete
5.09 kB
import math
from collections import defaultdict
from typing import Dict, Iterable
import pandas as pd
import pandas as pd
from itertools import combinations
import scipy
def compute_pct_improvement_over_baseline(
df: pd.DataFrame,
baseline_model: str = "xgboost_ensemble",
metric: str = "Accuracy"
) -> pd.DataFrame:
"""
Compute the percentage improvement of each model over a baseline model.
For each dataset, computes: ((model_metric - baseline_metric) / baseline_metric) * 100
Args:
df: DataFrame with columns 'model', 'dataset_name', and the metric column
baseline_model: The model to use as baseline (default: "xgboost_ensemble")
metric: The metric to compute improvement on (default: "Accuracy")
Returns:
DataFrame with a new 'Pct_Improvement_over_XGBoost' column
"""
if df.empty:
return df
df = df.copy()
# Get baseline scores per dataset
baseline_scores = (
df[df["model"] == baseline_model]
.set_index("dataset_name")[metric]
.to_dict()
)
# Compute percentage improvement for each row
def calc_pct_improvement(row):
baseline = baseline_scores.get(row["dataset_name"])
if baseline is None or baseline == 0:
return None
return ((row[metric] - baseline) / baseline) * 100
df["%↗ over XGBoost"] = df.apply(calc_pct_improvement, axis=1)
return df
def scores_to_battles(df: pd.DataFrame, metric: str = "Accuracy") -> pd.DataFrame:
battles = []
for dataset, group in df.groupby("dataset_name"):
# Sort classifiers in descending order of metric
group_sorted = group.sort_values(by=metric, ascending=False)
for (i1, row1), (i2, row2) in combinations(group_sorted.iterrows(), 2):
if row1[metric] == row2[metric]:
winner = "tie"
elif row1[metric] > row2[metric]:
winner = "model_a"
else:
winner = "model_b"
battles.append({
"model_a": row1["model"],
"model_b": row2["model"],
"winner": winner,
"dataset": dataset,
})
return battles
def _sigmoid(x: float, eps: float = 1e-7) -> float:
"""Stable sigmoid with clipped output."""
val = 0.5 * (1 + math.tanh(0.5 * x))
return max(eps, min(1.0 - eps, val))
def compute_bt_elo(
battles: Iterable[Dict[str, str]],
SCALE: float = 400.0,
BASE: float = 10.0,
INIT_RATING: float = 1000.0,
lr: float = 0.05,
n_iter: int = 1000,
use_scipy: bool = True,
) -> Dict[str, float]:
"""Fit a Bradley--Terry model.
``BASE`` controls the link function scale. If ``BASE=10`` (the default),
the win probability follows the usual Elo form
``P(win) = 1 / (1 + BASE ** ((rating_b - rating_a) / SCALE))``.
The function will use :mod:`scipy.optimize` if available for a fast
optimisation of the negative log-likelihood. If SciPy is not installed,
it falls back to the simple gradient-descent routine previously used.
"""
models = sorted({b["model_a"] for b in battles} | {b["model_b"] for b in battles})
battles_list = list(battles)
if use_scipy:
try:
import numpy as np
from scipy.optimize import minimize
except Exception: # pragma: no cover - SciPy not available
use_scipy = False
if use_scipy:
idx = {m: k for k, m in enumerate(models)}
def nll(theta_vec: "np.ndarray") -> float:
loss = 0.0
for row in battles_list:
i = idx[row["model_a"]]
j = idx[row["model_b"]]
s = math.log(BASE) * (theta_vec[i] - theta_vec[j])
p = _sigmoid(s)
y = 1.0 if row["winner"] == "model_a" else 0.0
if str(row["winner"]).startswith("tie"):
y = 0.5
# Binary cross entropy with y in [0, 1]
loss -= y * math.log(p) + (1 - y) * math.log(1 - p)
return loss
theta0 = [0.0] * len(models)
res = minimize(nll, theta0, method="BFGS")
theta_opt = res.x - sum(res.x) / len(res.x)
theta = {m: theta_opt[idx[m]] for m in models}
else:
theta = {m: 0.0 for m in models}
for _ in range(n_iter):
grad = {m: 0.0 for m in models}
for row in battles_list:
i = row["model_a"]
j = row["model_b"]
w = row["winner"]
y = 1.0 if w == "model_a" else 0.0
if str(w).startswith("tie"):
y = 0.5
s = math.log(BASE) * (theta[i] - theta[j])
p = _sigmoid(s)
diff = (p - y) * math.log(BASE)
grad[i] += diff
grad[j] -= diff
for m in models:
theta[m] -= lr * grad[m] / len(battles_list)
return {m: SCALE * theta[m] + INIT_RATING for m in sorted(models, key=lambda x: -theta[x])}