Spaces:
Running
Running
| import math | |
| from collections import defaultdict | |
| from typing import Dict, Iterable | |
| import pandas as pd | |
| import pandas as pd | |
| from itertools import combinations | |
| import scipy | |
| def compute_pct_improvement_over_baseline( | |
| df: pd.DataFrame, | |
| baseline_model: str = "xgboost_ensemble", | |
| metric: str = "Accuracy" | |
| ) -> pd.DataFrame: | |
| """ | |
| Compute the percentage improvement of each model over a baseline model. | |
| For each dataset, computes: ((model_metric - baseline_metric) / baseline_metric) * 100 | |
| Args: | |
| df: DataFrame with columns 'model', 'dataset_name', and the metric column | |
| baseline_model: The model to use as baseline (default: "xgboost_ensemble") | |
| metric: The metric to compute improvement on (default: "Accuracy") | |
| Returns: | |
| DataFrame with a new 'Pct_Improvement_over_XGBoost' column | |
| """ | |
| if df.empty: | |
| return df | |
| df = df.copy() | |
| # Get baseline scores per dataset | |
| baseline_scores = ( | |
| df[df["model"] == baseline_model] | |
| .set_index("dataset_name")[metric] | |
| .to_dict() | |
| ) | |
| # Compute percentage improvement for each row | |
| def calc_pct_improvement(row): | |
| baseline = baseline_scores.get(row["dataset_name"]) | |
| if baseline is None or baseline == 0: | |
| return None | |
| return ((row[metric] - baseline) / baseline) * 100 | |
| df["%↗ over XGBoost"] = df.apply(calc_pct_improvement, axis=1) | |
| return df | |
| def scores_to_battles(df: pd.DataFrame, metric: str = "Accuracy") -> pd.DataFrame: | |
| battles = [] | |
| for dataset, group in df.groupby("dataset_name"): | |
| # Sort classifiers in descending order of metric | |
| group_sorted = group.sort_values(by=metric, ascending=False) | |
| for (i1, row1), (i2, row2) in combinations(group_sorted.iterrows(), 2): | |
| if row1[metric] == row2[metric]: | |
| winner = "tie" | |
| elif row1[metric] > row2[metric]: | |
| winner = "model_a" | |
| else: | |
| winner = "model_b" | |
| battles.append({ | |
| "model_a": row1["model"], | |
| "model_b": row2["model"], | |
| "winner": winner, | |
| "dataset": dataset, | |
| }) | |
| return battles | |
| def _sigmoid(x: float, eps: float = 1e-7) -> float: | |
| """Stable sigmoid with clipped output.""" | |
| val = 0.5 * (1 + math.tanh(0.5 * x)) | |
| return max(eps, min(1.0 - eps, val)) | |
| def compute_bt_elo( | |
| battles: Iterable[Dict[str, str]], | |
| SCALE: float = 400.0, | |
| BASE: float = 10.0, | |
| INIT_RATING: float = 1000.0, | |
| lr: float = 0.05, | |
| n_iter: int = 1000, | |
| use_scipy: bool = True, | |
| ) -> Dict[str, float]: | |
| """Fit a Bradley--Terry model. | |
| ``BASE`` controls the link function scale. If ``BASE=10`` (the default), | |
| the win probability follows the usual Elo form | |
| ``P(win) = 1 / (1 + BASE ** ((rating_b - rating_a) / SCALE))``. | |
| The function will use :mod:`scipy.optimize` if available for a fast | |
| optimisation of the negative log-likelihood. If SciPy is not installed, | |
| it falls back to the simple gradient-descent routine previously used. | |
| """ | |
| models = sorted({b["model_a"] for b in battles} | {b["model_b"] for b in battles}) | |
| battles_list = list(battles) | |
| if use_scipy: | |
| try: | |
| import numpy as np | |
| from scipy.optimize import minimize | |
| except Exception: # pragma: no cover - SciPy not available | |
| use_scipy = False | |
| if use_scipy: | |
| idx = {m: k for k, m in enumerate(models)} | |
| def nll(theta_vec: "np.ndarray") -> float: | |
| loss = 0.0 | |
| for row in battles_list: | |
| i = idx[row["model_a"]] | |
| j = idx[row["model_b"]] | |
| s = math.log(BASE) * (theta_vec[i] - theta_vec[j]) | |
| p = _sigmoid(s) | |
| y = 1.0 if row["winner"] == "model_a" else 0.0 | |
| if str(row["winner"]).startswith("tie"): | |
| y = 0.5 | |
| # Binary cross entropy with y in [0, 1] | |
| loss -= y * math.log(p) + (1 - y) * math.log(1 - p) | |
| return loss | |
| theta0 = [0.0] * len(models) | |
| res = minimize(nll, theta0, method="BFGS") | |
| theta_opt = res.x - sum(res.x) / len(res.x) | |
| theta = {m: theta_opt[idx[m]] for m in models} | |
| else: | |
| theta = {m: 0.0 for m in models} | |
| for _ in range(n_iter): | |
| grad = {m: 0.0 for m in models} | |
| for row in battles_list: | |
| i = row["model_a"] | |
| j = row["model_b"] | |
| w = row["winner"] | |
| y = 1.0 if w == "model_a" else 0.0 | |
| if str(w).startswith("tie"): | |
| y = 0.5 | |
| s = math.log(BASE) * (theta[i] - theta[j]) | |
| p = _sigmoid(s) | |
| diff = (p - y) * math.log(BASE) | |
| grad[i] += diff | |
| grad[j] -= diff | |
| for m in models: | |
| theta[m] -= lr * grad[m] / len(battles_list) | |
| return {m: SCALE * theta[m] + INIT_RATING for m in sorted(models, key=lambda x: -theta[x])} | |