import matplotlib matplotlib.use("Agg") matplotlib.rcParams["figure.dpi"] = 150 import pathlib import pandas as pd import matplotlib.pyplot as plt import matplotlib.ticker as mticker def _add_ranks(df): df = df.copy() df["cutoff"] = pd.to_datetime(df["cutoff"]) df["rank"] = df.groupby(["metric", "subdataset", "frequency", "cutoff"])[ "value" ].rank(method="min") return df def _style_rank_ax(ax, n_models): ax.set_ylabel("Rank") ax.set_ylim(n_models + 0.5, 0.5) ax.yaxis.set_major_locator(mticker.MultipleLocator(1)) ax.tick_params(axis="x", rotation=45) ax.grid(True, alpha=0.3) def _style_value_ax(ax, metric): ax.set_ylabel(metric) ax.tick_params(axis="x", rotation=45) ax.grid(True, alpha=0.3) def _finish_fig(fig): """Add a single shared legend at the bottom and adjust layout.""" handles, labels = fig.axes[0].get_legend_handles_labels() fig.legend( handles, labels, loc="lower center", ncol=min(len(labels), 4), fontsize="small", bbox_to_anchor=(0.5, 0), ) fig.subplots_adjust(bottom=0.18) fig.tight_layout(rect=[0, 0.08, 1, 1]) # ── Public figure builders ─────────────────────────────────────────────────── def plot_rank_per_category(df, metric): """Grid of rank-over-time subplots, one per (subdataset, frequency).""" df = _add_ranks(df) models = sorted(df["model"].unique()) n_models = len(models) categories = sorted( df[["subdataset", "frequency"]] .drop_duplicates() .itertuples(index=False, name=None) ) fig, axes = plt.subplots( nrows=len(categories), ncols=1, figsize=(10, 4 * len(categories)), sharex=False, sharey=True, ) if len(categories) == 1: axes = [axes] for ax, (subdataset, frequency) in zip(axes, categories): sub = df[ (df["metric"] == metric) & (df["subdataset"] == subdataset) & (df["frequency"] == frequency) ] pivot = sub.pivot_table(index="cutoff", columns="model", values="rank").sort_index() for model in models: if model in pivot.columns: ax.plot(pivot.index, pivot[model], marker="o", label=model) ax.set_title(f"{subdataset} / {frequency}") _style_rank_ax(ax, n_models) fig.suptitle(f"Rank through time — {metric.upper()}", fontsize=14) _finish_fig(fig) return fig def plot_avg_rank(df, metric): """Average rank across all categories over time.""" df = _add_ranks(df) models = sorted(df["model"].unique()) n_models = len(models) sub = df[df["metric"] == metric] avg_rank = ( sub.groupby(["model", "cutoff"])["rank"] .mean() .reset_index() .rename(columns={"rank": "avg_rank"}) ) pivot = avg_rank.pivot_table(index="cutoff", columns="model", values="avg_rank").sort_index() fig, ax = plt.subplots(figsize=(10, 5)) for model in models: if model in pivot.columns: ax.plot(pivot.index, pivot[model], marker="o", label=model) ax.set_title(f"Average rank across all categories — {metric}", fontsize=14) ax.set_xlabel("Cutoff date") _style_rank_ax(ax, n_models) _finish_fig(fig) return fig def plot_value_per_category(df, metric): """Grid of raw-metric-over-time subplots, one per (subdataset, frequency).""" df = df.copy() df["cutoff"] = pd.to_datetime(df["cutoff"]) models = sorted(df["model"].unique()) categories = sorted( df[["subdataset", "frequency"]] .drop_duplicates() .itertuples(index=False, name=None) ) fig, axes = plt.subplots( nrows=len(categories), ncols=1, figsize=(10, 4 * len(categories)), sharex=False, ) if len(categories) == 1: axes = [axes] for ax, (subdataset, frequency) in zip(axes, categories): sub = df[ (df["metric"] == metric) & (df["subdataset"] == subdataset) & (df["frequency"] == frequency) ] pivot = sub.pivot_table(index="cutoff", columns="model", values="value").sort_index() for model in models: if model in pivot.columns: ax.plot(pivot.index, pivot[model], marker="o", label=model) ax.set_title(f"{subdataset} / {frequency}") _style_value_ax(ax, metric) fig.suptitle(f"Model {metric.upper()} through time", fontsize=14) _finish_fig(fig) return fig def plot_avg_value(df, metric): """Average raw metric across all categories over time.""" df = df.copy() df["cutoff"] = pd.to_datetime(df["cutoff"]) models = sorted(df["model"].unique()) sub = df[df["metric"] == metric] avg_val = ( sub.groupby(["model", "cutoff"])["value"] .mean() .reset_index() .rename(columns={"value": "avg_value"}) ) pivot = avg_val.pivot_table(index="cutoff", columns="model", values="avg_value").sort_index() fig, ax = plt.subplots(figsize=(10, 5)) for model in models: if model in pivot.columns: ax.plot(pivot.index, pivot[model], marker="o", label=model) ax.set_title(f"Average {metric} across all categories", fontsize=14) ax.set_xlabel("Cutoff date") _style_value_ax(ax, metric) _finish_fig(fig) return fig def plot_rank_for_subdataset(df, metric, subdataset): """Rank over time for a single subdataset (all frequencies as subplots).""" df = _add_ranks(df) models = sorted(df["model"].unique()) n_models = len(models) frequencies = sorted( df[df["subdataset"] == subdataset]["frequency"].unique() ) fig, axes = plt.subplots( nrows=len(frequencies), ncols=1, figsize=(10, 4 * len(frequencies)), sharex=False, sharey=True, squeeze=False, ) for ax_row, frequency in zip(axes, frequencies): ax = ax_row[0] sub = df[ (df["metric"] == metric) & (df["subdataset"] == subdataset) & (df["frequency"] == frequency) ] pivot = sub.pivot_table(index="cutoff", columns="model", values="rank").sort_index() for model in models: if model in pivot.columns: ax.plot(pivot.index, pivot[model], marker="o", label=model) ax.set_title(f"{subdataset} / {frequency}") _style_rank_ax(ax, n_models) fig.suptitle(f"Rank through time — {metric.upper()}", fontsize=14) _finish_fig(fig) return fig def plot_value_for_subdataset(df, metric, subdataset): """Raw metric over time for a single subdataset (all frequencies as subplots).""" df = df.copy() df["cutoff"] = pd.to_datetime(df["cutoff"]) models = sorted(df["model"].unique()) frequencies = sorted( df[df["subdataset"] == subdataset]["frequency"].unique() ) fig, axes = plt.subplots( nrows=len(frequencies), ncols=1, figsize=(10, 4 * len(frequencies)), sharex=False, squeeze=False, ) for ax_row, frequency in zip(axes, frequencies): ax = ax_row[0] sub = df[ (df["metric"] == metric) & (df["subdataset"] == subdataset) & (df["frequency"] == frequency) ] pivot = sub.pivot_table(index="cutoff", columns="model", values="value").sort_index() for model in models: if model in pivot.columns: ax.plot(pivot.index, pivot[model], marker="o", label=model) ax.set_title(f"{subdataset} / {frequency}") _style_value_ax(ax, metric) fig.suptitle(f"Model {metric.upper()} through time", fontsize=14) _finish_fig(fig) return fig # ── CLI: save all figures to disk ──────────────────────────────────────────── if __name__ == "__main__": OUT = pathlib.Path("figures/rank_through_time") OUT.mkdir(parents=True, exist_ok=True) raw = pd.read_csv("mock_evaluation_results.csv") raw = raw[raw["model"] != "zero_model"] metrics = sorted(raw["metric"].unique()) for metric in metrics: for fn, prefix in [ (plot_rank_per_category, "rank_per_category"), (plot_value_per_category, "value_per_category"), (plot_avg_rank, "avg_rank"), (plot_avg_value, "avg_value"), ]: fig = fn(raw, metric) path = OUT / f"{prefix}_{metric}.png" fig.savefig(path, dpi=150, bbox_inches="tight") plt.close(fig) print(f"Saved {path}") print("Done.")