Spaces:

Neuralk-AI
/

tabbench

Running

File size: 5,090 Bytes

import math
from collections import defaultdict
from typing import Dict, Iterable
import pandas as pd

import pandas as pd
from itertools import combinations
import scipy


def compute_pct_improvement_over_baseline(
    df: pd.DataFrame,
    baseline_model: str = "xgboost_ensemble",
    metric: str = "Accuracy"
) -> pd.DataFrame:
    """
    Compute the percentage improvement of each model over a baseline model.

    For each dataset, computes: ((model_metric - baseline_metric) / baseline_metric) * 100

    Args:
        df: DataFrame with columns 'model', 'dataset_name', and the metric column
        baseline_model: The model to use as baseline (default: "xgboost_ensemble")
        metric: The metric to compute improvement on (default: "Accuracy")

    Returns:
        DataFrame with a new 'Pct_Improvement_over_XGBoost' column
    """
    if df.empty:
        return df

    df = df.copy()

    # Get baseline scores per dataset
    baseline_scores = (
        df[df["model"] == baseline_model]
        .set_index("dataset_name")[metric]
        .to_dict()
    )

    # Compute percentage improvement for each row
    def calc_pct_improvement(row):
        baseline = baseline_scores.get(row["dataset_name"])
        if baseline is None or baseline == 0:
            return None
        return ((row[metric] - baseline) / baseline) * 100

    df["%↗ over XGBoost"] = df.apply(calc_pct_improvement, axis=1)

    return df


def scores_to_battles(df: pd.DataFrame, metric: str = "Accuracy") -> pd.DataFrame:
    battles = []
    
    for dataset, group in df.groupby("dataset_name"):
        # Sort classifiers in descending order of metric
        group_sorted = group.sort_values(by=metric, ascending=False)
        for (i1, row1), (i2, row2) in combinations(group_sorted.iterrows(), 2):
            if row1[metric] == row2[metric]:
                winner = "tie"
            elif row1[metric] > row2[metric]:
                winner = "model_a"
            else:
                winner = "model_b"
            battles.append({
                "model_a": row1["model"],
                "model_b": row2["model"],
                "winner": winner,
                "dataset": dataset,
            })
    
    return battles


def _sigmoid(x: float, eps: float = 1e-7) -> float:
    """Stable sigmoid with clipped output."""
    val = 0.5 * (1 + math.tanh(0.5 * x))
    return max(eps, min(1.0 - eps, val))


def compute_bt_elo(
    battles: Iterable[Dict[str, str]],
    SCALE: float = 400.0,
    BASE: float = 10.0,
    INIT_RATING: float = 1000.0,
    lr: float = 0.05,
    n_iter: int = 1000,
    use_scipy: bool = True,
) -> Dict[str, float]:
    """Fit a Bradley--Terry model.

    ``BASE`` controls the link function scale.  If ``BASE=10`` (the default),
    the win probability follows the usual Elo form

    ``P(win) = 1 / (1 + BASE ** ((rating_b - rating_a) / SCALE))``.

    The function will use :mod:`scipy.optimize` if available for a fast
    optimisation of the negative log-likelihood.  If SciPy is not installed,
    it falls back to the simple gradient-descent routine previously used.
    """

    models = sorted({b["model_a"] for b in battles} | {b["model_b"] for b in battles})
    battles_list = list(battles)

    if use_scipy:
        try:
            import numpy as np
            from scipy.optimize import minimize
        except Exception:  # pragma: no cover - SciPy not available
            use_scipy = False

    if use_scipy:
        idx = {m: k for k, m in enumerate(models)}

        def nll(theta_vec: "np.ndarray") -> float:
            loss = 0.0
            for row in battles_list:
                i = idx[row["model_a"]]
                j = idx[row["model_b"]]
                s = math.log(BASE) * (theta_vec[i] - theta_vec[j])
                p = _sigmoid(s)
                y = 1.0 if row["winner"] == "model_a" else 0.0
                if str(row["winner"]).startswith("tie"):
                    y = 0.5
                # Binary cross entropy with y in [0, 1]
                loss -= y * math.log(p) + (1 - y) * math.log(1 - p)
            return loss

        theta0 = [0.0] * len(models)
        res = minimize(nll, theta0, method="BFGS")
        theta_opt = res.x - sum(res.x) / len(res.x)
        theta = {m: theta_opt[idx[m]] for m in models}
    else:
        theta = {m: 0.0 for m in models}
        for _ in range(n_iter):
            grad = {m: 0.0 for m in models}
            for row in battles_list:
                i = row["model_a"]
                j = row["model_b"]
                w = row["winner"]
                y = 1.0 if w == "model_a" else 0.0
                if str(w).startswith("tie"):
                    y = 0.5
                s = math.log(BASE) * (theta[i] - theta[j])
                p = _sigmoid(s)
                diff = (p - y) * math.log(BASE)
                grad[i] += diff
                grad[j] -= diff
            for m in models:
                theta[m] -= lr * grad[m] / len(battles_list)

    return {m: SCALE * theta[m] + INIT_RATING for m in sorted(models, key=lambda x: -theta[x])}