geoalgo commited on
Commit
d64ffef
Β·
0 Parent(s):

first version

Browse files
Files changed (9) hide show
  1. .gitignore +10 -0
  2. .python-version +1 -0
  3. README.md +0 -0
  4. leaderboard.py +84 -0
  5. main.py +117 -0
  6. mock_evaluation_results.csv +449 -0
  7. pyproject.toml +12 -0
  8. rank_through_time.py +266 -0
  9. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
README.md ADDED
File without changes
leaderboard.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
5
+ """Compute average rank per model for each metric.
6
+
7
+ Ranking procedure:
8
+ 1. Rank models within each (metric, subdataset, frequency, cutoff) group.
9
+ 2. Average ranks across cutoff dates for each (metric, subdataset, frequency, model).
10
+ 3. Average across all (subdataset, frequency) combos for each (metric, model).
11
+
12
+ Returns a dataframe with columns: model, rank CRPS, rank MASE
13
+ """
14
+ ranked = df.copy()
15
+ ranked["rank"] = ranked.groupby(
16
+ ["metric", "subdataset", "frequency", "cutoff"]
17
+ )["value"].rank(method="min")
18
+
19
+ # Step 2: average ranks across cutoffs per (metric, subdataset, frequency, model)
20
+ per_subdataset = (
21
+ ranked.groupby(["metric", "subdataset", "frequency", "model"])["rank"]
22
+ .mean()
23
+ .reset_index()
24
+ )
25
+
26
+ # Print per-subdataset ranks for manual inspection
27
+ for metric in sorted(per_subdataset["metric"].unique()):
28
+ print(f"\n{'='*60}")
29
+ print(f"Metric: {metric}")
30
+ print(f"{'='*60}")
31
+ sub = per_subdataset[per_subdataset["metric"] == metric]
32
+ pivot = sub.pivot_table(
33
+ index=["subdataset", "frequency"], columns="model", values="rank"
34
+ )
35
+ print(pivot.to_string())
36
+
37
+ # Step 3: average across all (subdataset, frequency) combos
38
+ overall = (
39
+ per_subdataset.groupby(["metric", "model"])["rank"]
40
+ .mean()
41
+ .reset_index()
42
+ )
43
+
44
+ # Pivot so each metric becomes a column
45
+ leaderboard = overall.pivot(index="model", columns="metric", values="rank")
46
+ leaderboard = leaderboard.rename(
47
+ columns={m: f"rank {m.upper()}" for m in leaderboard.columns}
48
+ )
49
+
50
+ # Average metric values: mean across all (subdataset, frequency, cutoff) per (metric, model)
51
+ avg_values = (
52
+ df.groupby(["metric", "model"])["value"]
53
+ .mean()
54
+ .reset_index()
55
+ .pivot(index="model", columns="metric", values="value")
56
+ )
57
+ avg_values = avg_values.rename(
58
+ columns={m: f"avg {m.upper()}" for m in avg_values.columns}
59
+ )
60
+ leaderboard = leaderboard.join(avg_values)
61
+
62
+ # Re-rank by average of the two rank columns for ordering
63
+ rank_cols = [c for c in leaderboard.columns if c.startswith("rank ")]
64
+ leaderboard["avg_rank"] = leaderboard[rank_cols].mean(axis=1)
65
+ leaderboard = leaderboard.sort_values("avg_rank")
66
+ leaderboard = leaderboard.drop(columns="avg_rank").reset_index()
67
+
68
+ # Round for display
69
+ for col in leaderboard.columns:
70
+ if col.startswith("rank "):
71
+ leaderboard[col] = leaderboard[col].round(2)
72
+ elif col.startswith("avg "):
73
+ leaderboard[col] = leaderboard[col].round(4)
74
+
75
+ return leaderboard
76
+
77
+
78
+ if __name__ == "__main__":
79
+ df = pd.read_csv("mock_evaluation_results.csv")
80
+ lb = compute_leaderboard(df)
81
+ print(f"\n{'='*60}")
82
+ print("LEADERBOARD")
83
+ print(f"{'='*60}")
84
+ print(lb.to_string(index=False))
main.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib
3
+ matplotlib.use("Agg")
4
+ import matplotlib.pyplot as plt
5
+ import pandas as pd
6
+
7
+ from leaderboard import compute_leaderboard
8
+ from rank_through_time import (
9
+ plot_rank_for_subdataset,
10
+ plot_value_for_subdataset,
11
+ )
12
+
13
+ df = pd.read_csv("mock_evaluation_results.csv")
14
+
15
+ ALL_METRICS = sorted(df["metric"].unique().tolist())
16
+ ALL_SUBDATASETS = sorted(df["subdataset"].unique().tolist())
17
+ ALL_MODELS = sorted(df["model"].unique().tolist())
18
+
19
+
20
+ def build_table(metric, subdataset, models):
21
+ sub = df[df["metric"] == metric]
22
+ if subdataset != "All":
23
+ sub = sub[sub["subdataset"] == subdataset]
24
+ if models:
25
+ sub = sub[sub["model"].isin(models)]
26
+ pivot = sub.pivot_table(
27
+ index=["subdataset", "cutoff"], columns="model", values="value"
28
+ )
29
+ pivot = pivot.sort_index()
30
+ pivot = pivot.reset_index()
31
+ return pivot
32
+
33
+
34
+ def build_plots(metric, subdataset):
35
+ fig_rank = plot_rank_for_subdataset(df, metric, subdataset)
36
+ fig_value = plot_value_for_subdataset(df, metric, subdataset)
37
+ # Gradio expects the figure objects directly
38
+ ret = fig_rank, fig_value
39
+ return ret
40
+
41
+
42
+ with gr.Blocks(title="Impermanent Leaderboard") as app:
43
+ gr.Markdown("# Impermanent Leaderboard")
44
+
45
+ with gr.Tab("Leaderboard"):
46
+ leaderboard_table = gr.Dataframe(
47
+ value=compute_leaderboard(df),
48
+ label="Leaderboard",
49
+ )
50
+
51
+ with gr.Tab("All results"):
52
+ with gr.Row():
53
+ metric_dd = gr.Dropdown(
54
+ choices=ALL_METRICS,
55
+ value=ALL_METRICS[0],
56
+ label="Metric",
57
+ )
58
+ subdataset_dd = gr.Dropdown(
59
+ choices=["All"] + ALL_SUBDATASETS,
60
+ value="All",
61
+ label="Subdataset",
62
+ )
63
+ models_dd = gr.Dropdown(
64
+ choices=ALL_MODELS,
65
+ value=ALL_MODELS,
66
+ multiselect=True,
67
+ label="Models",
68
+ )
69
+
70
+ results_table = gr.Dataframe(
71
+ value=build_table(ALL_METRICS[0], "All", ALL_MODELS),
72
+ label="Results",
73
+ )
74
+
75
+ for control in [metric_dd, subdataset_dd, models_dd]:
76
+ control.change(
77
+ fn=build_table,
78
+ inputs=[metric_dd, subdataset_dd, models_dd],
79
+ outputs=results_table,
80
+ )
81
+
82
+ with gr.Tab("Results over time"):
83
+ with gr.Row():
84
+ time_metric_dd = gr.Dropdown(
85
+ choices=ALL_METRICS,
86
+ value=ALL_METRICS[0],
87
+ label="Metric",
88
+ )
89
+ time_subdataset_dd = gr.Dropdown(
90
+ choices=ALL_SUBDATASETS,
91
+ value=ALL_SUBDATASETS[0],
92
+ label="Subdataset",
93
+ )
94
+
95
+ rank_plot = gr.Plot(label="Rank over time")
96
+ value_plot = gr.Plot(label="Metric value over time")
97
+
98
+ def update_plots(metric, subdataset):
99
+ fig_rank, fig_value = build_plots(metric, subdataset)
100
+ return fig_rank, fig_value
101
+
102
+ # Initial render
103
+ app.load(
104
+ fn=update_plots,
105
+ inputs=[time_metric_dd, time_subdataset_dd],
106
+ outputs=[rank_plot, value_plot],
107
+ )
108
+
109
+ for control in [time_metric_dd, time_subdataset_dd]:
110
+ control.change(
111
+ fn=update_plots,
112
+ inputs=[time_metric_dd, time_subdataset_dd],
113
+ outputs=[rank_plot, value_plot],
114
+ )
115
+
116
+ if __name__ == "__main__":
117
+ app.launch()
mock_evaluation_results.csv ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,subdataset,frequency,cutoff,metric,model,value
2
+ gh-archive,stars,daily,2026-01-08,mase,zero_model,2.841
3
+ gh-archive,stars,daily,2026-01-08,mase,seasonal_naive,1.012
4
+ gh-archive,stars,daily,2026-01-08,mase,auto_arima,0.874
5
+ gh-archive,stars,daily,2026-01-08,mase,auto_ets,0.891
6
+ gh-archive,stars,daily,2026-01-08,mase,auto_lgbm,0.782
7
+ gh-archive,stars,daily,2026-01-08,mase,chronos,0.643
8
+ gh-archive,stars,daily,2026-01-08,mase,moirai,0.701
9
+ gh-archive,stars,daily,2026-01-08,mase,timesfm,0.668
10
+ gh-archive,stars,daily,2026-01-08,scaled_crps,zero_model,0.421
11
+ gh-archive,stars,daily,2026-01-08,scaled_crps,seasonal_naive,0.183
12
+ gh-archive,stars,daily,2026-01-08,scaled_crps,auto_arima,0.142
13
+ gh-archive,stars,daily,2026-01-08,scaled_crps,auto_ets,0.149
14
+ gh-archive,stars,daily,2026-01-08,scaled_crps,auto_lgbm,0.121
15
+ gh-archive,stars,daily,2026-01-08,scaled_crps,chronos,0.089
16
+ gh-archive,stars,daily,2026-01-08,scaled_crps,moirai,0.098
17
+ gh-archive,stars,daily,2026-01-08,scaled_crps,timesfm,0.093
18
+ gh-archive,stars,daily,2026-01-15,mase,zero_model,2.793
19
+ gh-archive,stars,daily,2026-01-15,mase,seasonal_naive,1.034
20
+ gh-archive,stars,daily,2026-01-15,mase,auto_arima,0.862
21
+ gh-archive,stars,daily,2026-01-15,mase,auto_ets,0.879
22
+ gh-archive,stars,daily,2026-01-15,mase,auto_lgbm,0.801
23
+ gh-archive,stars,daily,2026-01-15,mase,chronos,0.651
24
+ gh-archive,stars,daily,2026-01-15,mase,moirai,0.694
25
+ gh-archive,stars,daily,2026-01-15,mase,timesfm,0.672
26
+ gh-archive,stars,daily,2026-01-15,scaled_crps,zero_model,0.415
27
+ gh-archive,stars,daily,2026-01-15,scaled_crps,seasonal_naive,0.187
28
+ gh-archive,stars,daily,2026-01-15,scaled_crps,auto_arima,0.139
29
+ gh-archive,stars,daily,2026-01-15,scaled_crps,auto_ets,0.146
30
+ gh-archive,stars,daily,2026-01-15,scaled_crps,auto_lgbm,0.125
31
+ gh-archive,stars,daily,2026-01-15,scaled_crps,chronos,0.091
32
+ gh-archive,stars,daily,2026-01-15,scaled_crps,moirai,0.096
33
+ gh-archive,stars,daily,2026-01-15,scaled_crps,timesfm,0.094
34
+ gh-archive,stars,daily,2026-01-22,mase,zero_model,2.867
35
+ gh-archive,stars,daily,2026-01-22,mase,seasonal_naive,0.987
36
+ gh-archive,stars,daily,2026-01-22,mase,auto_arima,0.851
37
+ gh-archive,stars,daily,2026-01-22,mase,auto_ets,0.870
38
+ gh-archive,stars,daily,2026-01-22,mase,auto_lgbm,0.769
39
+ gh-archive,stars,daily,2026-01-22,mase,chronos,0.634
40
+ gh-archive,stars,daily,2026-01-22,mase,moirai,0.687
41
+ gh-archive,stars,daily,2026-01-22,mase,timesfm,0.659
42
+ gh-archive,stars,daily,2026-01-22,scaled_crps,zero_model,0.428
43
+ gh-archive,stars,daily,2026-01-22,scaled_crps,seasonal_naive,0.178
44
+ gh-archive,stars,daily,2026-01-22,scaled_crps,auto_arima,0.136
45
+ gh-archive,stars,daily,2026-01-22,scaled_crps,auto_ets,0.143
46
+ gh-archive,stars,daily,2026-01-22,scaled_crps,auto_lgbm,0.118
47
+ gh-archive,stars,daily,2026-01-22,scaled_crps,chronos,0.086
48
+ gh-archive,stars,daily,2026-01-22,scaled_crps,moirai,0.094
49
+ gh-archive,stars,daily,2026-01-22,scaled_crps,timesfm,0.090
50
+ gh-archive,stars,daily,2026-01-29,mase,zero_model,2.912
51
+ gh-archive,stars,daily,2026-01-29,mase,seasonal_naive,1.005
52
+ gh-archive,stars,daily,2026-01-29,mase,auto_arima,0.883
53
+ gh-archive,stars,daily,2026-01-29,mase,auto_ets,0.898
54
+ gh-archive,stars,daily,2026-01-29,mase,auto_lgbm,0.793
55
+ gh-archive,stars,daily,2026-01-29,mase,chronos,0.657
56
+ gh-archive,stars,daily,2026-01-29,mase,moirai,0.712
57
+ gh-archive,stars,daily,2026-01-29,mase,timesfm,0.681
58
+ gh-archive,stars,daily,2026-01-29,scaled_crps,zero_model,0.434
59
+ gh-archive,stars,daily,2026-01-29,scaled_crps,seasonal_naive,0.185
60
+ gh-archive,stars,daily,2026-01-29,scaled_crps,auto_arima,0.145
61
+ gh-archive,stars,daily,2026-01-29,scaled_crps,auto_ets,0.151
62
+ gh-archive,stars,daily,2026-01-29,scaled_crps,auto_lgbm,0.128
63
+ gh-archive,stars,daily,2026-01-29,scaled_crps,chronos,0.092
64
+ gh-archive,stars,daily,2026-01-29,scaled_crps,moirai,0.101
65
+ gh-archive,stars,daily,2026-01-29,scaled_crps,timesfm,0.096
66
+ gh-archive,prs_opened,daily,2026-01-08,mase,zero_model,3.214
67
+ gh-archive,prs_opened,daily,2026-01-08,mase,seasonal_naive,1.087
68
+ gh-archive,prs_opened,daily,2026-01-08,mase,auto_arima,0.952
69
+ gh-archive,prs_opened,daily,2026-01-08,mase,auto_ets,0.971
70
+ gh-archive,prs_opened,daily,2026-01-08,mase,auto_lgbm,0.845
71
+ gh-archive,prs_opened,daily,2026-01-08,mase,chronos,0.712
72
+ gh-archive,prs_opened,daily,2026-01-08,mase,moirai,0.768
73
+ gh-archive,prs_opened,daily,2026-01-08,mase,timesfm,0.734
74
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,zero_model,0.478
75
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,seasonal_naive,0.201
76
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,auto_arima,0.162
77
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,auto_ets,0.168
78
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,auto_lgbm,0.139
79
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,chronos,0.104
80
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,moirai,0.115
81
+ gh-archive,prs_opened,daily,2026-01-08,scaled_crps,timesfm,0.108
82
+ gh-archive,prs_opened,daily,2026-01-15,mase,zero_model,3.178
83
+ gh-archive,prs_opened,daily,2026-01-15,mase,seasonal_naive,1.065
84
+ gh-archive,prs_opened,daily,2026-01-15,mase,auto_arima,0.941
85
+ gh-archive,prs_opened,daily,2026-01-15,mase,auto_ets,0.958
86
+ gh-archive,prs_opened,daily,2026-01-15,mase,auto_lgbm,0.861
87
+ gh-archive,prs_opened,daily,2026-01-15,mase,chronos,0.723
88
+ gh-archive,prs_opened,daily,2026-01-15,mase,moirai,0.751
89
+ gh-archive,prs_opened,daily,2026-01-15,mase,timesfm,0.729
90
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,zero_model,0.471
91
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,seasonal_naive,0.196
92
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,auto_arima,0.158
93
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,auto_ets,0.164
94
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,auto_lgbm,0.142
95
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,chronos,0.107
96
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,moirai,0.112
97
+ gh-archive,prs_opened,daily,2026-01-15,scaled_crps,timesfm,0.105
98
+ gh-archive,prs_opened,daily,2026-01-22,mase,zero_model,3.251
99
+ gh-archive,prs_opened,daily,2026-01-22,mase,seasonal_naive,1.098
100
+ gh-archive,prs_opened,daily,2026-01-22,mase,auto_arima,0.963
101
+ gh-archive,prs_opened,daily,2026-01-22,mase,auto_ets,0.982
102
+ gh-archive,prs_opened,daily,2026-01-22,mase,auto_lgbm,0.837
103
+ gh-archive,prs_opened,daily,2026-01-22,mase,chronos,0.698
104
+ gh-archive,prs_opened,daily,2026-01-22,mase,moirai,0.759
105
+ gh-archive,prs_opened,daily,2026-01-22,mase,timesfm,0.721
106
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,zero_model,0.483
107
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,seasonal_naive,0.205
108
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,auto_arima,0.165
109
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,auto_ets,0.171
110
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,auto_lgbm,0.136
111
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,chronos,0.101
112
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,moirai,0.113
113
+ gh-archive,prs_opened,daily,2026-01-22,scaled_crps,timesfm,0.106
114
+ gh-archive,prs_opened,daily,2026-01-29,mase,zero_model,3.192
115
+ gh-archive,prs_opened,daily,2026-01-29,mase,seasonal_naive,1.078
116
+ gh-archive,prs_opened,daily,2026-01-29,mase,auto_arima,0.947
117
+ gh-archive,prs_opened,daily,2026-01-29,mase,auto_ets,0.965
118
+ gh-archive,prs_opened,daily,2026-01-29,mase,auto_lgbm,0.852
119
+ gh-archive,prs_opened,daily,2026-01-29,mase,chronos,0.731
120
+ gh-archive,prs_opened,daily,2026-01-29,mase,moirai,0.774
121
+ gh-archive,prs_opened,daily,2026-01-29,mase,timesfm,0.745
122
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,zero_model,0.475
123
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,seasonal_naive,0.199
124
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,auto_arima,0.160
125
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,auto_ets,0.166
126
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,auto_lgbm,0.141
127
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,chronos,0.109
128
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,moirai,0.117
129
+ gh-archive,prs_opened,daily,2026-01-29,scaled_crps,timesfm,0.111
130
+ gh-archive,issues_opened,daily,2026-01-08,mase,zero_model,3.567
131
+ gh-archive,issues_opened,daily,2026-01-08,mase,seasonal_naive,1.142
132
+ gh-archive,issues_opened,daily,2026-01-08,mase,auto_arima,1.023
133
+ gh-archive,issues_opened,daily,2026-01-08,mase,auto_ets,1.041
134
+ gh-archive,issues_opened,daily,2026-01-08,mase,auto_lgbm,0.912
135
+ gh-archive,issues_opened,daily,2026-01-08,mase,chronos,0.789
136
+ gh-archive,issues_opened,daily,2026-01-08,mase,moirai,0.834
137
+ gh-archive,issues_opened,daily,2026-01-08,mase,timesfm,0.801
138
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,zero_model,0.512
139
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,seasonal_naive,0.218
140
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,auto_arima,0.179
141
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,auto_ets,0.185
142
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,auto_lgbm,0.152
143
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,chronos,0.119
144
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,moirai,0.128
145
+ gh-archive,issues_opened,daily,2026-01-08,scaled_crps,timesfm,0.122
146
+ gh-archive,issues_opened,daily,2026-01-15,mase,zero_model,3.521
147
+ gh-archive,issues_opened,daily,2026-01-15,mase,seasonal_naive,1.128
148
+ gh-archive,issues_opened,daily,2026-01-15,mase,auto_arima,1.008
149
+ gh-archive,issues_opened,daily,2026-01-15,mase,auto_ets,1.029
150
+ gh-archive,issues_opened,daily,2026-01-15,mase,auto_lgbm,0.925
151
+ gh-archive,issues_opened,daily,2026-01-15,mase,chronos,0.798
152
+ gh-archive,issues_opened,daily,2026-01-15,mase,moirai,0.841
153
+ gh-archive,issues_opened,daily,2026-01-15,mase,timesfm,0.812
154
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,zero_model,0.508
155
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,seasonal_naive,0.214
156
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,auto_arima,0.175
157
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,auto_ets,0.181
158
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,auto_lgbm,0.155
159
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,chronos,0.121
160
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,moirai,0.131
161
+ gh-archive,issues_opened,daily,2026-01-15,scaled_crps,timesfm,0.124
162
+ gh-archive,issues_opened,daily,2026-01-22,mase,zero_model,3.601
163
+ gh-archive,issues_opened,daily,2026-01-22,mase,seasonal_naive,1.156
164
+ gh-archive,issues_opened,daily,2026-01-22,mase,auto_arima,1.035
165
+ gh-archive,issues_opened,daily,2026-01-22,mase,auto_ets,1.052
166
+ gh-archive,issues_opened,daily,2026-01-22,mase,auto_lgbm,0.898
167
+ gh-archive,issues_opened,daily,2026-01-22,mase,chronos,0.774
168
+ gh-archive,issues_opened,daily,2026-01-22,mase,moirai,0.821
169
+ gh-archive,issues_opened,daily,2026-01-22,mase,timesfm,0.793
170
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,zero_model,0.519
171
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,seasonal_naive,0.221
172
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,auto_arima,0.182
173
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,auto_ets,0.188
174
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,auto_lgbm,0.148
175
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,chronos,0.116
176
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,moirai,0.126
177
+ gh-archive,issues_opened,daily,2026-01-22,scaled_crps,timesfm,0.119
178
+ gh-archive,issues_opened,daily,2026-01-29,mase,zero_model,3.548
179
+ gh-archive,issues_opened,daily,2026-01-29,mase,seasonal_naive,1.134
180
+ gh-archive,issues_opened,daily,2026-01-29,mase,auto_arima,1.018
181
+ gh-archive,issues_opened,daily,2026-01-29,mase,auto_ets,1.038
182
+ gh-archive,issues_opened,daily,2026-01-29,mase,auto_lgbm,0.921
183
+ gh-archive,issues_opened,daily,2026-01-29,mase,chronos,0.805
184
+ gh-archive,issues_opened,daily,2026-01-29,mase,moirai,0.847
185
+ gh-archive,issues_opened,daily,2026-01-29,mase,timesfm,0.818
186
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,zero_model,0.514
187
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,seasonal_naive,0.216
188
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,auto_arima,0.177
189
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,auto_ets,0.183
190
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,auto_lgbm,0.153
191
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,chronos,0.123
192
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,moirai,0.132
193
+ gh-archive,issues_opened,daily,2026-01-29,scaled_crps,timesfm,0.125
194
+ gh-archive,pushes,daily,2026-01-08,mase,zero_model,2.634
195
+ gh-archive,pushes,daily,2026-01-08,mase,seasonal_naive,0.967
196
+ gh-archive,pushes,daily,2026-01-08,mase,auto_arima,0.812
197
+ gh-archive,pushes,daily,2026-01-08,mase,auto_ets,0.831
198
+ gh-archive,pushes,daily,2026-01-08,mase,auto_lgbm,0.723
199
+ gh-archive,pushes,daily,2026-01-08,mase,chronos,0.589
200
+ gh-archive,pushes,daily,2026-01-08,mase,moirai,0.641
201
+ gh-archive,pushes,daily,2026-01-08,mase,timesfm,0.612
202
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,zero_model,0.392
203
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,seasonal_naive,0.168
204
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,auto_arima,0.128
205
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,auto_ets,0.134
206
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,auto_lgbm,0.108
207
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,chronos,0.078
208
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,moirai,0.087
209
+ gh-archive,pushes,daily,2026-01-08,scaled_crps,timesfm,0.082
210
+ gh-archive,pushes,daily,2026-01-15,mase,zero_model,2.601
211
+ gh-archive,pushes,daily,2026-01-15,mase,seasonal_naive,0.951
212
+ gh-archive,pushes,daily,2026-01-15,mase,auto_arima,0.798
213
+ gh-archive,pushes,daily,2026-01-15,mase,auto_ets,0.819
214
+ gh-archive,pushes,daily,2026-01-15,mase,auto_lgbm,0.735
215
+ gh-archive,pushes,daily,2026-01-15,mase,chronos,0.598
216
+ gh-archive,pushes,daily,2026-01-15,mase,moirai,0.634
217
+ gh-archive,pushes,daily,2026-01-15,mase,timesfm,0.608
218
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,zero_model,0.387
219
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,seasonal_naive,0.164
220
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,auto_arima,0.124
221
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,auto_ets,0.131
222
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,auto_lgbm,0.111
223
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,chronos,0.081
224
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,moirai,0.085
225
+ gh-archive,pushes,daily,2026-01-15,scaled_crps,timesfm,0.079
226
+ gh-archive,pushes,daily,2026-01-22,mase,zero_model,2.687
227
+ gh-archive,pushes,daily,2026-01-22,mase,seasonal_naive,0.978
228
+ gh-archive,pushes,daily,2026-01-22,mase,auto_arima,0.823
229
+ gh-archive,pushes,daily,2026-01-22,mase,auto_ets,0.841
230
+ gh-archive,pushes,daily,2026-01-22,mase,auto_lgbm,0.712
231
+ gh-archive,pushes,daily,2026-01-22,mase,chronos,0.581
232
+ gh-archive,pushes,daily,2026-01-22,mase,moirai,0.628
233
+ gh-archive,pushes,daily,2026-01-22,mase,timesfm,0.601
234
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,zero_model,0.398
235
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,seasonal_naive,0.171
236
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,auto_arima,0.131
237
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,auto_ets,0.137
238
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,auto_lgbm,0.105
239
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,chronos,0.075
240
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,moirai,0.083
241
+ gh-archive,pushes,daily,2026-01-22,scaled_crps,timesfm,0.078
242
+ gh-archive,pushes,daily,2026-01-29,mase,zero_model,2.718
243
+ gh-archive,pushes,daily,2026-01-29,mase,seasonal_naive,0.992
244
+ gh-archive,pushes,daily,2026-01-29,mase,auto_arima,0.834
245
+ gh-archive,pushes,daily,2026-01-29,mase,auto_ets,0.852
246
+ gh-archive,pushes,daily,2026-01-29,mase,auto_lgbm,0.741
247
+ gh-archive,pushes,daily,2026-01-29,mase,chronos,0.604
248
+ gh-archive,pushes,daily,2026-01-29,mase,moirai,0.651
249
+ gh-archive,pushes,daily,2026-01-29,mase,timesfm,0.623
250
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,zero_model,0.403
251
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,seasonal_naive,0.174
252
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,auto_arima,0.134
253
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,auto_ets,0.140
254
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,auto_lgbm,0.112
255
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,chronos,0.082
256
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,moirai,0.090
257
+ gh-archive,pushes,daily,2026-01-29,scaled_crps,timesfm,0.085
258
+ gh-archive,stars,weekly,2026-01-12,mase,zero_model,2.512
259
+ gh-archive,stars,weekly,2026-01-12,mase,seasonal_naive,0.934
260
+ gh-archive,stars,weekly,2026-01-12,mase,auto_arima,0.798
261
+ gh-archive,stars,weekly,2026-01-12,mase,auto_ets,0.812
262
+ gh-archive,stars,weekly,2026-01-12,mase,auto_lgbm,0.701
263
+ gh-archive,stars,weekly,2026-01-12,mase,chronos,0.578
264
+ gh-archive,stars,weekly,2026-01-12,mase,moirai,0.623
265
+ gh-archive,stars,weekly,2026-01-12,mase,timesfm,0.598
266
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,zero_model,0.378
267
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,seasonal_naive,0.159
268
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,auto_arima,0.121
269
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,auto_ets,0.128
270
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,auto_lgbm,0.103
271
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,chronos,0.074
272
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,moirai,0.082
273
+ gh-archive,stars,weekly,2026-01-12,scaled_crps,timesfm,0.077
274
+ gh-archive,stars,weekly,2026-01-19,mase,zero_model,2.478
275
+ gh-archive,stars,weekly,2026-01-19,mase,seasonal_naive,0.921
276
+ gh-archive,stars,weekly,2026-01-19,mase,auto_arima,0.784
277
+ gh-archive,stars,weekly,2026-01-19,mase,auto_ets,0.801
278
+ gh-archive,stars,weekly,2026-01-19,mase,auto_lgbm,0.689
279
+ gh-archive,stars,weekly,2026-01-19,mase,chronos,0.563
280
+ gh-archive,stars,weekly,2026-01-19,mase,moirai,0.612
281
+ gh-archive,stars,weekly,2026-01-19,mase,timesfm,0.584
282
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,zero_model,0.371
283
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,seasonal_naive,0.154
284
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,auto_arima,0.117
285
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,auto_ets,0.124
286
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,auto_lgbm,0.098
287
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,chronos,0.071
288
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,moirai,0.079
289
+ gh-archive,stars,weekly,2026-01-19,scaled_crps,timesfm,0.074
290
+ gh-archive,stars,weekly,2026-01-26,mase,zero_model,2.549
291
+ gh-archive,stars,weekly,2026-01-26,mase,seasonal_naive,0.945
292
+ gh-archive,stars,weekly,2026-01-26,mase,auto_arima,0.811
293
+ gh-archive,stars,weekly,2026-01-26,mase,auto_ets,0.825
294
+ gh-archive,stars,weekly,2026-01-26,mase,auto_lgbm,0.714
295
+ gh-archive,stars,weekly,2026-01-26,mase,chronos,0.587
296
+ gh-archive,stars,weekly,2026-01-26,mase,moirai,0.638
297
+ gh-archive,stars,weekly,2026-01-26,mase,timesfm,0.608
298
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,zero_model,0.384
299
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,seasonal_naive,0.162
300
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,auto_arima,0.124
301
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,auto_ets,0.131
302
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,auto_lgbm,0.106
303
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,chronos,0.076
304
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,moirai,0.085
305
+ gh-archive,stars,weekly,2026-01-26,scaled_crps,timesfm,0.080
306
+ gh-archive,prs_opened,weekly,2026-01-12,mase,zero_model,2.891
307
+ gh-archive,prs_opened,weekly,2026-01-12,mase,seasonal_naive,1.012
308
+ gh-archive,prs_opened,weekly,2026-01-12,mase,auto_arima,0.878
309
+ gh-archive,prs_opened,weekly,2026-01-12,mase,auto_ets,0.894
310
+ gh-archive,prs_opened,weekly,2026-01-12,mase,auto_lgbm,0.768
311
+ gh-archive,prs_opened,weekly,2026-01-12,mase,chronos,0.641
312
+ gh-archive,prs_opened,weekly,2026-01-12,mase,moirai,0.694
313
+ gh-archive,prs_opened,weekly,2026-01-12,mase,timesfm,0.662
314
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,zero_model,0.431
315
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,seasonal_naive,0.178
316
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,auto_arima,0.141
317
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,auto_ets,0.148
318
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,auto_lgbm,0.119
319
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,chronos,0.088
320
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,moirai,0.097
321
+ gh-archive,prs_opened,weekly,2026-01-12,scaled_crps,timesfm,0.091
322
+ gh-archive,prs_opened,weekly,2026-01-19,mase,zero_model,2.856
323
+ gh-archive,prs_opened,weekly,2026-01-19,mase,seasonal_naive,0.998
324
+ gh-archive,prs_opened,weekly,2026-01-19,mase,auto_arima,0.865
325
+ gh-archive,prs_opened,weekly,2026-01-19,mase,auto_ets,0.882
326
+ gh-archive,prs_opened,weekly,2026-01-19,mase,auto_lgbm,0.754
327
+ gh-archive,prs_opened,weekly,2026-01-19,mase,chronos,0.628
328
+ gh-archive,prs_opened,weekly,2026-01-19,mase,moirai,0.681
329
+ gh-archive,prs_opened,weekly,2026-01-19,mase,timesfm,0.649
330
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,zero_model,0.425
331
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,seasonal_naive,0.173
332
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,auto_arima,0.137
333
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,auto_ets,0.144
334
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,auto_lgbm,0.115
335
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,chronos,0.084
336
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,moirai,0.093
337
+ gh-archive,prs_opened,weekly,2026-01-19,scaled_crps,timesfm,0.087
338
+ gh-archive,prs_opened,weekly,2026-01-26,mase,zero_model,2.923
339
+ gh-archive,prs_opened,weekly,2026-01-26,mase,seasonal_naive,1.024
340
+ gh-archive,prs_opened,weekly,2026-01-26,mase,auto_arima,0.891
341
+ gh-archive,prs_opened,weekly,2026-01-26,mase,auto_ets,0.907
342
+ gh-archive,prs_opened,weekly,2026-01-26,mase,auto_lgbm,0.781
343
+ gh-archive,prs_opened,weekly,2026-01-26,mase,chronos,0.652
344
+ gh-archive,prs_opened,weekly,2026-01-26,mase,moirai,0.708
345
+ gh-archive,prs_opened,weekly,2026-01-26,mase,timesfm,0.674
346
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,zero_model,0.437
347
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,seasonal_naive,0.182
348
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,auto_arima,0.145
349
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,auto_ets,0.151
350
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,auto_lgbm,0.122
351
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,chronos,0.091
352
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,moirai,0.100
353
+ gh-archive,prs_opened,weekly,2026-01-26,scaled_crps,timesfm,0.094
354
+ gh-archive,issues_opened,weekly,2026-01-12,mase,zero_model,3.189
355
+ gh-archive,issues_opened,weekly,2026-01-12,mase,seasonal_naive,1.068
356
+ gh-archive,issues_opened,weekly,2026-01-12,mase,auto_arima,0.945
357
+ gh-archive,issues_opened,weekly,2026-01-12,mase,auto_ets,0.962
358
+ gh-archive,issues_opened,weekly,2026-01-12,mase,auto_lgbm,0.834
359
+ gh-archive,issues_opened,weekly,2026-01-12,mase,chronos,0.712
360
+ gh-archive,issues_opened,weekly,2026-01-12,mase,moirai,0.758
361
+ gh-archive,issues_opened,weekly,2026-01-12,mase,timesfm,0.731
362
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,zero_model,0.468
363
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,seasonal_naive,0.195
364
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,auto_arima,0.158
365
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,auto_ets,0.164
366
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,auto_lgbm,0.131
367
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,chronos,0.101
368
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,moirai,0.110
369
+ gh-archive,issues_opened,weekly,2026-01-12,scaled_crps,timesfm,0.104
370
+ gh-archive,issues_opened,weekly,2026-01-19,mase,zero_model,3.147
371
+ gh-archive,issues_opened,weekly,2026-01-19,mase,seasonal_naive,1.051
372
+ gh-archive,issues_opened,weekly,2026-01-19,mase,auto_arima,0.932
373
+ gh-archive,issues_opened,weekly,2026-01-19,mase,auto_ets,0.948
374
+ gh-archive,issues_opened,weekly,2026-01-19,mase,auto_lgbm,0.821
375
+ gh-archive,issues_opened,weekly,2026-01-19,mase,chronos,0.698
376
+ gh-archive,issues_opened,weekly,2026-01-19,mase,moirai,0.745
377
+ gh-archive,issues_opened,weekly,2026-01-19,mase,timesfm,0.718
378
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,zero_model,0.461
379
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,seasonal_naive,0.191
380
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,auto_arima,0.154
381
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,auto_ets,0.160
382
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,auto_lgbm,0.127
383
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,chronos,0.097
384
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,moirai,0.106
385
+ gh-archive,issues_opened,weekly,2026-01-19,scaled_crps,timesfm,0.100
386
+ gh-archive,issues_opened,weekly,2026-01-26,mase,zero_model,3.221
387
+ gh-archive,issues_opened,weekly,2026-01-26,mase,seasonal_naive,1.082
388
+ gh-archive,issues_opened,weekly,2026-01-26,mase,auto_arima,0.958
389
+ gh-archive,issues_opened,weekly,2026-01-26,mase,auto_ets,0.974
390
+ gh-archive,issues_opened,weekly,2026-01-26,mase,auto_lgbm,0.847
391
+ gh-archive,issues_opened,weekly,2026-01-26,mase,chronos,0.724
392
+ gh-archive,issues_opened,weekly,2026-01-26,mase,moirai,0.771
393
+ gh-archive,issues_opened,weekly,2026-01-26,mase,timesfm,0.743
394
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,zero_model,0.474
395
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,seasonal_naive,0.198
396
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,auto_arima,0.161
397
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,auto_ets,0.167
398
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,auto_lgbm,0.134
399
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,chronos,0.104
400
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,moirai,0.113
401
+ gh-archive,issues_opened,weekly,2026-01-26,scaled_crps,timesfm,0.107
402
+ gh-archive,pushes,weekly,2026-01-12,mase,zero_model,2.312
403
+ gh-archive,pushes,weekly,2026-01-12,mase,seasonal_naive,0.891
404
+ gh-archive,pushes,weekly,2026-01-12,mase,auto_arima,0.745
405
+ gh-archive,pushes,weekly,2026-01-12,mase,auto_ets,0.762
406
+ gh-archive,pushes,weekly,2026-01-12,mase,auto_lgbm,0.651
407
+ gh-archive,pushes,weekly,2026-01-12,mase,chronos,0.523
408
+ gh-archive,pushes,weekly,2026-01-12,mase,moirai,0.571
409
+ gh-archive,pushes,weekly,2026-01-12,mase,timesfm,0.548
410
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,zero_model,0.351
411
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,seasonal_naive,0.148
412
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,auto_arima,0.112
413
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,auto_ets,0.118
414
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,auto_lgbm,0.092
415
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,chronos,0.065
416
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,moirai,0.073
417
+ gh-archive,pushes,weekly,2026-01-12,scaled_crps,timesfm,0.068
418
+ gh-archive,pushes,weekly,2026-01-19,mase,zero_model,2.278
419
+ gh-archive,pushes,weekly,2026-01-19,mase,seasonal_naive,0.878
420
+ gh-archive,pushes,weekly,2026-01-19,mase,auto_arima,0.731
421
+ gh-archive,pushes,weekly,2026-01-19,mase,auto_ets,0.749
422
+ gh-archive,pushes,weekly,2026-01-19,mase,auto_lgbm,0.638
423
+ gh-archive,pushes,weekly,2026-01-19,mase,chronos,0.512
424
+ gh-archive,pushes,weekly,2026-01-19,mase,moirai,0.558
425
+ gh-archive,pushes,weekly,2026-01-19,mase,timesfm,0.534
426
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,zero_model,0.345
427
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,seasonal_naive,0.144
428
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,auto_arima,0.108
429
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,auto_ets,0.114
430
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,auto_lgbm,0.088
431
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,chronos,0.062
432
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,moirai,0.070
433
+ gh-archive,pushes,weekly,2026-01-19,scaled_crps,timesfm,0.065
434
+ gh-archive,pushes,weekly,2026-01-26,mase,zero_model,2.351
435
+ gh-archive,pushes,weekly,2026-01-26,mase,seasonal_naive,0.904
436
+ gh-archive,pushes,weekly,2026-01-26,mase,auto_arima,0.758
437
+ gh-archive,pushes,weekly,2026-01-26,mase,auto_ets,0.774
438
+ gh-archive,pushes,weekly,2026-01-26,mase,auto_lgbm,0.664
439
+ gh-archive,pushes,weekly,2026-01-26,mase,chronos,0.535
440
+ gh-archive,pushes,weekly,2026-01-26,mase,moirai,0.584
441
+ gh-archive,pushes,weekly,2026-01-26,mase,timesfm,0.558
442
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,zero_model,0.357
443
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,seasonal_naive,0.151
444
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,auto_arima,0.115
445
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,auto_ets,0.121
446
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,auto_lgbm,0.095
447
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,chronos,0.068
448
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,moirai,0.076
449
+ gh-archive,pushes,weekly,2026-01-26,scaled_crps,timesfm,0.071
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "impermanent-leaderboard"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio>=6.5.1",
9
+ "ipython>=9.10.0",
10
+ "matplotlib>=3.10.8",
11
+ "pandas>=3.0.0",
12
+ ]
rank_through_time.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib
2
+ matplotlib.use("Agg")
3
+ import pathlib
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ import matplotlib.ticker as mticker
7
+
8
+
9
+ def _add_ranks(df):
10
+ df = df.copy()
11
+ df["cutoff"] = pd.to_datetime(df["cutoff"])
12
+ df["rank"] = df.groupby(["metric", "subdataset", "frequency", "cutoff"])[
13
+ "value"
14
+ ].rank(method="min")
15
+ return df
16
+
17
+
18
+ def _style_rank_ax(ax, n_models):
19
+ ax.set_ylabel("Rank")
20
+ ax.set_ylim(n_models + 0.5, 0.5)
21
+ ax.yaxis.set_major_locator(mticker.MultipleLocator(1))
22
+ ax.tick_params(axis="x", rotation=45)
23
+ ax.grid(True, alpha=0.3)
24
+
25
+
26
+ def _style_value_ax(ax, metric):
27
+ ax.set_ylabel(metric)
28
+ ax.tick_params(axis="x", rotation=45)
29
+ ax.grid(True, alpha=0.3)
30
+
31
+
32
+ def _finish_fig(fig):
33
+ """Add a single shared legend at the bottom and adjust layout."""
34
+ handles, labels = fig.axes[0].get_legend_handles_labels()
35
+ fig.legend(
36
+ handles, labels,
37
+ loc="lower center",
38
+ ncol=min(len(labels), 4),
39
+ fontsize="small",
40
+ bbox_to_anchor=(0.5, 0),
41
+ )
42
+ fig.subplots_adjust(bottom=0.18)
43
+ fig.tight_layout(rect=[0, 0.08, 1, 1])
44
+
45
+
46
+ # ── Public figure builders ───────────────────────────────────────────────────
47
+
48
+
49
+ def plot_rank_per_category(df, metric):
50
+ """Grid of rank-over-time subplots, one per (subdataset, frequency)."""
51
+ df = _add_ranks(df)
52
+ models = sorted(df["model"].unique())
53
+ n_models = len(models)
54
+ categories = sorted(
55
+ df[["subdataset", "frequency"]]
56
+ .drop_duplicates()
57
+ .itertuples(index=False, name=None)
58
+ )
59
+
60
+ fig, axes = plt.subplots(
61
+ nrows=len(categories), ncols=1,
62
+ figsize=(10, 4 * len(categories)),
63
+ sharex=False, sharey=True,
64
+ )
65
+ if len(categories) == 1:
66
+ axes = [axes]
67
+
68
+ for ax, (subdataset, frequency) in zip(axes, categories):
69
+ sub = df[
70
+ (df["metric"] == metric)
71
+ & (df["subdataset"] == subdataset)
72
+ & (df["frequency"] == frequency)
73
+ ]
74
+ pivot = sub.pivot_table(index="cutoff", columns="model", values="rank").sort_index()
75
+ for model in models:
76
+ if model in pivot.columns:
77
+ ax.plot(pivot.index, pivot[model], marker="o", label=model)
78
+ ax.set_title(f"{subdataset} / {frequency}")
79
+ _style_rank_ax(ax, n_models)
80
+
81
+ fig.suptitle(f"Model rank through time β€” {metric}", fontsize=14)
82
+ _finish_fig(fig)
83
+ return fig
84
+
85
+
86
+ def plot_avg_rank(df, metric):
87
+ """Average rank across all categories over time."""
88
+ df = _add_ranks(df)
89
+ models = sorted(df["model"].unique())
90
+ n_models = len(models)
91
+ sub = df[df["metric"] == metric]
92
+ avg_rank = (
93
+ sub.groupby(["model", "cutoff"])["rank"]
94
+ .mean()
95
+ .reset_index()
96
+ .rename(columns={"rank": "avg_rank"})
97
+ )
98
+ pivot = avg_rank.pivot_table(index="cutoff", columns="model", values="avg_rank").sort_index()
99
+
100
+ fig, ax = plt.subplots(figsize=(10, 5))
101
+ for model in models:
102
+ if model in pivot.columns:
103
+ ax.plot(pivot.index, pivot[model], marker="o", label=model)
104
+ ax.set_title(f"Average rank across all categories β€” {metric}", fontsize=14)
105
+ ax.set_xlabel("Cutoff date")
106
+ _style_rank_ax(ax, n_models)
107
+ _finish_fig(fig)
108
+ return fig
109
+
110
+
111
+ def plot_value_per_category(df, metric):
112
+ """Grid of raw-metric-over-time subplots, one per (subdataset, frequency)."""
113
+ df = df.copy()
114
+ df["cutoff"] = pd.to_datetime(df["cutoff"])
115
+ models = sorted(df["model"].unique())
116
+ categories = sorted(
117
+ df[["subdataset", "frequency"]]
118
+ .drop_duplicates()
119
+ .itertuples(index=False, name=None)
120
+ )
121
+
122
+ fig, axes = plt.subplots(
123
+ nrows=len(categories), ncols=1,
124
+ figsize=(10, 4 * len(categories)),
125
+ sharex=False,
126
+ )
127
+ if len(categories) == 1:
128
+ axes = [axes]
129
+
130
+ for ax, (subdataset, frequency) in zip(axes, categories):
131
+ sub = df[
132
+ (df["metric"] == metric)
133
+ & (df["subdataset"] == subdataset)
134
+ & (df["frequency"] == frequency)
135
+ ]
136
+ pivot = sub.pivot_table(index="cutoff", columns="model", values="value").sort_index()
137
+ for model in models:
138
+ if model in pivot.columns:
139
+ ax.plot(pivot.index, pivot[model], marker="o", label=model)
140
+ ax.set_title(f"{subdataset} / {frequency}")
141
+ _style_value_ax(ax, metric)
142
+
143
+ fig.suptitle(f"Model {metric} through time", fontsize=14)
144
+ _finish_fig(fig)
145
+ return fig
146
+
147
+
148
+ def plot_avg_value(df, metric):
149
+ """Average raw metric across all categories over time."""
150
+ df = df.copy()
151
+ df["cutoff"] = pd.to_datetime(df["cutoff"])
152
+ models = sorted(df["model"].unique())
153
+ sub = df[df["metric"] == metric]
154
+ avg_val = (
155
+ sub.groupby(["model", "cutoff"])["value"]
156
+ .mean()
157
+ .reset_index()
158
+ .rename(columns={"value": "avg_value"})
159
+ )
160
+ pivot = avg_val.pivot_table(index="cutoff", columns="model", values="avg_value").sort_index()
161
+
162
+ fig, ax = plt.subplots(figsize=(10, 5))
163
+ for model in models:
164
+ if model in pivot.columns:
165
+ ax.plot(pivot.index, pivot[model], marker="o", label=model)
166
+ ax.set_title(f"Average {metric} across all categories", fontsize=14)
167
+ ax.set_xlabel("Cutoff date")
168
+ _style_value_ax(ax, metric)
169
+ _finish_fig(fig)
170
+ return fig
171
+
172
+
173
+ def plot_rank_for_subdataset(df, metric, subdataset):
174
+ """Rank over time for a single subdataset (all frequencies as subplots)."""
175
+ df = _add_ranks(df)
176
+ models = sorted(df["model"].unique())
177
+ n_models = len(models)
178
+ frequencies = sorted(
179
+ df[df["subdataset"] == subdataset]["frequency"].unique()
180
+ )
181
+
182
+ fig, axes = plt.subplots(
183
+ nrows=len(frequencies), ncols=1,
184
+ figsize=(10, 4 * len(frequencies)),
185
+ sharex=False, sharey=True,
186
+ squeeze=False,
187
+ )
188
+
189
+ for ax_row, frequency in zip(axes, frequencies):
190
+ ax = ax_row[0]
191
+ sub = df[
192
+ (df["metric"] == metric)
193
+ & (df["subdataset"] == subdataset)
194
+ & (df["frequency"] == frequency)
195
+ ]
196
+ pivot = sub.pivot_table(index="cutoff", columns="model", values="rank").sort_index()
197
+ for model in models:
198
+ if model in pivot.columns:
199
+ ax.plot(pivot.index, pivot[model], marker="o", label=model)
200
+ ax.set_title(f"{subdataset} / {frequency}")
201
+ _style_rank_ax(ax, n_models)
202
+
203
+ fig.suptitle(f"Model rank through time β€” {metric}", fontsize=14)
204
+ _finish_fig(fig)
205
+ return fig
206
+
207
+
208
+ def plot_value_for_subdataset(df, metric, subdataset):
209
+ """Raw metric over time for a single subdataset (all frequencies as subplots)."""
210
+ df = df.copy()
211
+ df["cutoff"] = pd.to_datetime(df["cutoff"])
212
+ models = sorted(df["model"].unique())
213
+ frequencies = sorted(
214
+ df[df["subdataset"] == subdataset]["frequency"].unique()
215
+ )
216
+
217
+ fig, axes = plt.subplots(
218
+ nrows=len(frequencies), ncols=1,
219
+ figsize=(10, 4 * len(frequencies)),
220
+ sharex=False,
221
+ squeeze=False,
222
+ )
223
+
224
+ for ax_row, frequency in zip(axes, frequencies):
225
+ ax = ax_row[0]
226
+ sub = df[
227
+ (df["metric"] == metric)
228
+ & (df["subdataset"] == subdataset)
229
+ & (df["frequency"] == frequency)
230
+ ]
231
+ pivot = sub.pivot_table(index="cutoff", columns="model", values="value").sort_index()
232
+ for model in models:
233
+ if model in pivot.columns:
234
+ ax.plot(pivot.index, pivot[model], marker="o", label=model)
235
+ ax.set_title(f"{subdataset} / {frequency}")
236
+ _style_value_ax(ax, metric)
237
+
238
+ fig.suptitle(f"Model {metric} through time", fontsize=14)
239
+ _finish_fig(fig)
240
+ return fig
241
+
242
+
243
+ # ── CLI: save all figures to disk ────────────────────────────────────────────
244
+
245
+ if __name__ == "__main__":
246
+ OUT = pathlib.Path("figures/rank_through_time")
247
+ OUT.mkdir(parents=True, exist_ok=True)
248
+
249
+ raw = pd.read_csv("mock_evaluation_results.csv")
250
+ raw = raw[raw["model"] != "zero_model"]
251
+ metrics = sorted(raw["metric"].unique())
252
+
253
+ for metric in metrics:
254
+ for fn, prefix in [
255
+ (plot_rank_per_category, "rank_per_category"),
256
+ (plot_value_per_category, "value_per_category"),
257
+ (plot_avg_rank, "avg_rank"),
258
+ (plot_avg_value, "avg_value"),
259
+ ]:
260
+ fig = fn(raw, metric)
261
+ path = OUT / f"{prefix}_{metric}.png"
262
+ fig.savefig(path, dpi=150, bbox_inches="tight")
263
+ plt.close(fig)
264
+ print(f"Saved {path}")
265
+
266
+ print("Done.")
uv.lock ADDED
The diff for this file is too large to render. See raw diff