ImpermanentLeaderboard / rank_through_time.py
geoalgo's picture
css
ff61dce
raw
history blame
8.78 kB
import matplotlib
matplotlib.use("Agg")
matplotlib.rcParams["figure.dpi"] = 150
import pathlib
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
def _add_ranks(df):
df = df.copy()
df["cutoff"] = pd.to_datetime(df["cutoff"])
df["rank"] = df.groupby(["metric", "subdataset", "frequency", "cutoff"])[
"value"
].rank(method="min")
return df
def _style_rank_ax(ax, n_models):
ax.set_ylabel("Rank")
ax.set_ylim(n_models + 0.5, 0.5)
ax.yaxis.set_major_locator(mticker.MultipleLocator(1))
ax.tick_params(axis="x", rotation=45)
ax.grid(True, alpha=0.3)
def _style_value_ax(ax, metric):
ax.set_ylabel(metric)
ax.tick_params(axis="x", rotation=45)
ax.grid(True, alpha=0.3)
def _finish_fig(fig):
"""Add a single shared legend at the bottom and adjust layout."""
handles, labels = fig.axes[0].get_legend_handles_labels()
fig.legend(
handles, labels,
loc="lower center",
ncol=min(len(labels), 4),
fontsize="small",
bbox_to_anchor=(0.5, 0),
)
fig.subplots_adjust(bottom=0.18)
fig.tight_layout(rect=[0, 0.08, 1, 1])
# ── Public figure builders ───────────────────────────────────────────────────
def plot_rank_per_category(df, metric):
"""Grid of rank-over-time subplots, one per (subdataset, frequency)."""
df = _add_ranks(df)
models = sorted(df["model"].unique())
n_models = len(models)
categories = sorted(
df[["subdataset", "frequency"]]
.drop_duplicates()
.itertuples(index=False, name=None)
)
fig, axes = plt.subplots(
nrows=len(categories), ncols=1,
figsize=(10, 4 * len(categories)),
sharex=False, sharey=True,
)
if len(categories) == 1:
axes = [axes]
for ax, (subdataset, frequency) in zip(axes, categories):
sub = df[
(df["metric"] == metric)
& (df["subdataset"] == subdataset)
& (df["frequency"] == frequency)
]
pivot = sub.pivot_table(index="cutoff", columns="model", values="rank").sort_index()
for model in models:
if model in pivot.columns:
ax.plot(pivot.index, pivot[model], marker="o", label=model)
ax.set_title(f"{subdataset} / {frequency}")
_style_rank_ax(ax, n_models)
fig.suptitle(f"Rank through time β€” {metric.upper()}", fontsize=14)
_finish_fig(fig)
return fig
def plot_avg_rank(df, metric):
"""Average rank across all categories over time."""
df = _add_ranks(df)
models = sorted(df["model"].unique())
n_models = len(models)
sub = df[df["metric"] == metric]
avg_rank = (
sub.groupby(["model", "cutoff"])["rank"]
.mean()
.reset_index()
.rename(columns={"rank": "avg_rank"})
)
pivot = avg_rank.pivot_table(index="cutoff", columns="model", values="avg_rank").sort_index()
fig, ax = plt.subplots(figsize=(10, 5))
for model in models:
if model in pivot.columns:
ax.plot(pivot.index, pivot[model], marker="o", label=model)
ax.set_title(f"Average rank across all categories β€” {metric}", fontsize=14)
ax.set_xlabel("Cutoff date")
_style_rank_ax(ax, n_models)
_finish_fig(fig)
return fig
def plot_value_per_category(df, metric):
"""Grid of raw-metric-over-time subplots, one per (subdataset, frequency)."""
df = df.copy()
df["cutoff"] = pd.to_datetime(df["cutoff"])
models = sorted(df["model"].unique())
categories = sorted(
df[["subdataset", "frequency"]]
.drop_duplicates()
.itertuples(index=False, name=None)
)
fig, axes = plt.subplots(
nrows=len(categories), ncols=1,
figsize=(10, 4 * len(categories)),
sharex=False,
)
if len(categories) == 1:
axes = [axes]
for ax, (subdataset, frequency) in zip(axes, categories):
sub = df[
(df["metric"] == metric)
& (df["subdataset"] == subdataset)
& (df["frequency"] == frequency)
]
pivot = sub.pivot_table(index="cutoff", columns="model", values="value").sort_index()
for model in models:
if model in pivot.columns:
ax.plot(pivot.index, pivot[model], marker="o", label=model)
ax.set_title(f"{subdataset} / {frequency}")
_style_value_ax(ax, metric)
fig.suptitle(f"Model {metric.upper()} through time", fontsize=14)
_finish_fig(fig)
return fig
def plot_avg_value(df, metric):
"""Average raw metric across all categories over time."""
df = df.copy()
df["cutoff"] = pd.to_datetime(df["cutoff"])
models = sorted(df["model"].unique())
sub = df[df["metric"] == metric]
avg_val = (
sub.groupby(["model", "cutoff"])["value"]
.mean()
.reset_index()
.rename(columns={"value": "avg_value"})
)
pivot = avg_val.pivot_table(index="cutoff", columns="model", values="avg_value").sort_index()
fig, ax = plt.subplots(figsize=(10, 5))
for model in models:
if model in pivot.columns:
ax.plot(pivot.index, pivot[model], marker="o", label=model)
ax.set_title(f"Average {metric} across all categories", fontsize=14)
ax.set_xlabel("Cutoff date")
_style_value_ax(ax, metric)
_finish_fig(fig)
return fig
def plot_rank_for_subdataset(df, metric, subdataset):
"""Rank over time for a single subdataset (all frequencies as subplots)."""
df = _add_ranks(df)
models = sorted(df["model"].unique())
n_models = len(models)
frequencies = sorted(
df[df["subdataset"] == subdataset]["frequency"].unique()
)
fig, axes = plt.subplots(
nrows=len(frequencies), ncols=1,
figsize=(10, 4 * len(frequencies)),
sharex=False, sharey=True,
squeeze=False,
)
for ax_row, frequency in zip(axes, frequencies):
ax = ax_row[0]
sub = df[
(df["metric"] == metric)
& (df["subdataset"] == subdataset)
& (df["frequency"] == frequency)
]
pivot = sub.pivot_table(index="cutoff", columns="model", values="rank").sort_index()
for model in models:
if model in pivot.columns:
ax.plot(pivot.index, pivot[model], marker="o", label=model)
ax.set_title(f"{subdataset} / {frequency}")
_style_rank_ax(ax, n_models)
fig.suptitle(f"Rank through time β€” {metric.upper()}", fontsize=14)
_finish_fig(fig)
return fig
def plot_value_for_subdataset(df, metric, subdataset):
"""Raw metric over time for a single subdataset (all frequencies as subplots)."""
df = df.copy()
df["cutoff"] = pd.to_datetime(df["cutoff"])
models = sorted(df["model"].unique())
frequencies = sorted(
df[df["subdataset"] == subdataset]["frequency"].unique()
)
fig, axes = plt.subplots(
nrows=len(frequencies), ncols=1,
figsize=(10, 4 * len(frequencies)),
sharex=False,
squeeze=False,
)
for ax_row, frequency in zip(axes, frequencies):
ax = ax_row[0]
sub = df[
(df["metric"] == metric)
& (df["subdataset"] == subdataset)
& (df["frequency"] == frequency)
]
pivot = sub.pivot_table(index="cutoff", columns="model", values="value").sort_index()
for model in models:
if model in pivot.columns:
ax.plot(pivot.index, pivot[model], marker="o", label=model)
ax.set_title(f"{subdataset} / {frequency}")
_style_value_ax(ax, metric)
fig.suptitle(f"Model {metric.upper()} through time", fontsize=14)
_finish_fig(fig)
return fig
# ── CLI: save all figures to disk ────────────────────────────────────────────
if __name__ == "__main__":
OUT = pathlib.Path("figures/rank_through_time")
OUT.mkdir(parents=True, exist_ok=True)
raw = pd.read_csv("mock_evaluation_results.csv")
raw = raw[raw["model"] != "zero_model"]
metrics = sorted(raw["metric"].unique())
for metric in metrics:
for fn, prefix in [
(plot_rank_per_category, "rank_per_category"),
(plot_value_per_category, "value_per_category"),
(plot_avg_rank, "avg_rank"),
(plot_avg_value, "avg_value"),
]:
fig = fn(raw, metric)
path = OUT / f"{prefix}_{metric}.png"
fig.savefig(path, dpi=150, bbox_inches="tight")
plt.close(fig)
print(f"Saved {path}")
print("Done.")