|
|
import pandas as pd |
|
|
|
|
|
MEDALS = {0: "π₯", 1: "π₯", 2: "π₯"} |
|
|
|
|
|
|
|
|
def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame: |
|
|
"""Compute average rank per model for each metric. |
|
|
|
|
|
Ranking procedure: |
|
|
1. Rank models within each (metric, subdataset, frequency, cutoff) group. |
|
|
2. Average ranks across cutoff dates for each (metric, subdataset, frequency, model). |
|
|
3. Average across all (subdataset, frequency) combos for each (metric, model). |
|
|
|
|
|
Returns a dataframe with columns: model, rank CRPS, rank MASE |
|
|
""" |
|
|
ranked = df.copy() |
|
|
ranked["rank"] = ranked.groupby(["metric", "subdataset", "frequency", "cutoff"])[ |
|
|
"value" |
|
|
].rank(method="min") |
|
|
|
|
|
|
|
|
per_subdataset = ( |
|
|
ranked.groupby(["metric", "subdataset", "frequency", "model"])["rank"] |
|
|
.mean() |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
|
|
|
for metric in sorted(per_subdataset["metric"].unique()): |
|
|
print(f"\n{'=' * 60}") |
|
|
print(f"Metric: {metric}") |
|
|
print(f"{'=' * 60}") |
|
|
sub = per_subdataset[per_subdataset["metric"] == metric] |
|
|
pivot = sub.pivot_table( |
|
|
index=["subdataset", "frequency"], columns="model", values="rank" |
|
|
) |
|
|
print(pivot.to_string()) |
|
|
|
|
|
|
|
|
overall = per_subdataset.groupby(["metric", "model"])["rank"].mean().reset_index() |
|
|
|
|
|
|
|
|
leaderboard = overall.pivot(index="model", columns="metric", values="rank") |
|
|
leaderboard = leaderboard.rename( |
|
|
columns={m: f"rank {m.upper()}" for m in leaderboard.columns} |
|
|
) |
|
|
|
|
|
|
|
|
avg_values = ( |
|
|
df.groupby(["metric", "model"])["value"] |
|
|
.median() |
|
|
.reset_index() |
|
|
.pivot(index="model", columns="metric", values="value") |
|
|
) |
|
|
avg_values = avg_values.rename( |
|
|
columns={m: f"avg {m.upper()}" for m in avg_values.columns} |
|
|
) |
|
|
leaderboard = leaderboard.join(avg_values) |
|
|
|
|
|
|
|
|
rank_cols = [c for c in leaderboard.columns if c.startswith("rank ")] |
|
|
leaderboard["avg_rank"] = leaderboard[rank_cols].mean(axis=1) |
|
|
leaderboard = leaderboard.sort_values("avg_rank") |
|
|
leaderboard = leaderboard.drop(columns="avg_rank").reset_index() |
|
|
|
|
|
|
|
|
for col in leaderboard.columns: |
|
|
if col.startswith("rank ") or col.startswith("avg "): |
|
|
leaderboard[col] = leaderboard[col].round(3) |
|
|
|
|
|
|
|
|
leaderboard = leaderboard.reset_index(drop=True) |
|
|
leaderboard["model"] = [ |
|
|
f"{MEDALS.get(i, '')} {m}".strip() for i, m in enumerate(leaderboard["model"]) |
|
|
] |
|
|
|
|
|
|
|
|
avg_cols = sorted(c for c in leaderboard.columns if c.startswith("avg ")) |
|
|
rank_cols = sorted(c for c in leaderboard.columns if c.startswith("rank ")) |
|
|
leaderboard = leaderboard[["model"] + avg_cols + rank_cols] |
|
|
|
|
|
return leaderboard |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
from data import load_data |
|
|
|
|
|
df = load_data() |
|
|
lb = compute_leaderboard(df) |
|
|
print(f"\n{'=' * 60}") |
|
|
print("LEADERBOARD") |
|
|
print(f"{'=' * 60}") |
|
|
print(lb.to_string(index=False)) |
|
|
|