geoalgo commited on
Commit
43752dc
Β·
1 Parent(s): d64ffef
Files changed (3) hide show
  1. leaderboard.py +16 -0
  2. main.py +54 -41
  3. rank_through_time.py +1 -0
leaderboard.py CHANGED
@@ -1,5 +1,7 @@
1
  import pandas as pd
2
 
 
 
3
 
4
  def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
5
  """Compute average rank per model for each metric.
@@ -72,9 +74,23 @@ def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
72
  elif col.startswith("avg "):
73
  leaderboard[col] = leaderboard[col].round(4)
74
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  return leaderboard
76
 
77
 
 
 
78
  if __name__ == "__main__":
79
  df = pd.read_csv("mock_evaluation_results.csv")
80
  lb = compute_leaderboard(df)
 
1
  import pandas as pd
2
 
3
+ MEDALS = {0: "πŸ₯‡", 1: "πŸ₯ˆ", 2: "πŸ₯‰"}
4
+
5
 
6
  def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
7
  """Compute average rank per model for each metric.
 
74
  elif col.startswith("avg "):
75
  leaderboard[col] = leaderboard[col].round(4)
76
 
77
+ # Add medals to model names
78
+ leaderboard = leaderboard.reset_index(drop=True)
79
+ leaderboard["model"] = [
80
+ f"{MEDALS.get(i, '')} {m}".strip()
81
+ for i, m in enumerate(leaderboard["model"])
82
+ ]
83
+
84
+ # Reorder: model, avg columns, rank columns
85
+ avg_cols = sorted(c for c in leaderboard.columns if c.startswith("avg "))
86
+ rank_cols = sorted(c for c in leaderboard.columns if c.startswith("rank "))
87
+ leaderboard = leaderboard[["model"] + avg_cols + rank_cols]
88
+
89
  return leaderboard
90
 
91
 
92
+
93
+
94
  if __name__ == "__main__":
95
  df = pd.read_csv("mock_evaluation_results.csv")
96
  lb = compute_leaderboard(df)
main.py CHANGED
@@ -34,52 +34,34 @@ def build_table(metric, subdataset, models):
34
  def build_plots(metric, subdataset):
35
  fig_rank = plot_rank_for_subdataset(df, metric, subdataset)
36
  fig_value = plot_value_for_subdataset(df, metric, subdataset)
37
- # Gradio expects the figure objects directly
38
- ret = fig_rank, fig_value
39
- return ret
40
 
41
 
 
 
 
 
 
 
42
  with gr.Blocks(title="Impermanent Leaderboard") as app:
43
  gr.Markdown("# Impermanent Leaderboard")
 
 
 
 
 
 
44
 
45
- with gr.Tab("Leaderboard"):
46
- leaderboard_table = gr.Dataframe(
47
- value=compute_leaderboard(df),
48
- label="Leaderboard",
49
- )
50
-
51
- with gr.Tab("All results"):
52
- with gr.Row():
53
- metric_dd = gr.Dropdown(
54
- choices=ALL_METRICS,
55
- value=ALL_METRICS[0],
56
- label="Metric",
57
- )
58
- subdataset_dd = gr.Dropdown(
59
- choices=["All"] + ALL_SUBDATASETS,
60
- value="All",
61
- label="Subdataset",
62
- )
63
- models_dd = gr.Dropdown(
64
- choices=ALL_MODELS,
65
- value=ALL_MODELS,
66
- multiselect=True,
67
- label="Models",
68
- )
69
-
70
- results_table = gr.Dataframe(
71
- value=build_table(ALL_METRICS[0], "All", ALL_MODELS),
72
- label="Results",
73
  )
74
 
75
- for control in [metric_dd, subdataset_dd, models_dd]:
76
- control.change(
77
- fn=build_table,
78
- inputs=[metric_dd, subdataset_dd, models_dd],
79
- outputs=results_table,
80
- )
81
-
82
- with gr.Tab("Results over time"):
83
  with gr.Row():
84
  time_metric_dd = gr.Dropdown(
85
  choices=ALL_METRICS,
@@ -99,7 +81,6 @@ with gr.Blocks(title="Impermanent Leaderboard") as app:
99
  fig_rank, fig_value = build_plots(metric, subdataset)
100
  return fig_rank, fig_value
101
 
102
- # Initial render
103
  app.load(
104
  fn=update_plots,
105
  inputs=[time_metric_dd, time_subdataset_dd],
@@ -113,5 +94,37 @@ with gr.Blocks(title="Impermanent Leaderboard") as app:
113
  outputs=[rank_plot, value_plot],
114
  )
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  if __name__ == "__main__":
117
- app.launch()
 
34
  def build_plots(metric, subdataset):
35
  fig_rank = plot_rank_for_subdataset(df, metric, subdataset)
36
  fig_value = plot_value_for_subdataset(df, metric, subdataset)
37
+ return fig_rank, fig_value
 
 
38
 
39
 
40
+ HEADER_CSS = """\
41
+ .table-wrap thead th {
42
+ background-color: #e2e8f0 !important;
43
+ }
44
+ """
45
+
46
  with gr.Blocks(title="Impermanent Leaderboard") as app:
47
  gr.Markdown("# Impermanent Leaderboard")
48
+ gr.Markdown(
49
+ "A **live** time-series forecasting benchmark designed to avoid data contamination. "
50
+ "Automated pipelines continuously fetch fresh data from GitHub β€” including the number of "
51
+ "open issues, opened PRs, pushes, and stars β€” ensuring that models are always evaluated "
52
+ "on data they could not have seen during training."
53
+ )
54
 
55
+ with gr.Tab("Leaderboard πŸ†"):
56
+ lb = compute_leaderboard(df)
57
+ gr.Dataframe(
58
+ value=lb,
59
+ #label="Leaderboard",
60
+ interactive=False,
61
+ headers=[f"**{c}**" for c in lb.columns],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  )
63
 
64
+ with gr.Tab("Results over time πŸ“ˆ"):
 
 
 
 
 
 
 
65
  with gr.Row():
66
  time_metric_dd = gr.Dropdown(
67
  choices=ALL_METRICS,
 
81
  fig_rank, fig_value = build_plots(metric, subdataset)
82
  return fig_rank, fig_value
83
 
 
84
  app.load(
85
  fn=update_plots,
86
  inputs=[time_metric_dd, time_subdataset_dd],
 
94
  outputs=[rank_plot, value_plot],
95
  )
96
 
97
+ with gr.Tab("All results πŸ“‹"):
98
+ with gr.Row():
99
+ metric_dd = gr.Dropdown(
100
+ choices=ALL_METRICS,
101
+ value=ALL_METRICS[0],
102
+ label="Metric",
103
+ )
104
+ subdataset_dd = gr.Dropdown(
105
+ choices=["All"] + ALL_SUBDATASETS,
106
+ value="All",
107
+ label="Subdataset",
108
+ )
109
+ models_dd = gr.Dropdown(
110
+ choices=ALL_MODELS,
111
+ value=ALL_MODELS,
112
+ multiselect=True,
113
+ label="Models",
114
+ )
115
+
116
+ results_table = gr.Dataframe(
117
+ value=build_table(ALL_METRICS[0], "All", ALL_MODELS),
118
+ label="Results",
119
+ interactive=False,
120
+ )
121
+
122
+ for control in [metric_dd, subdataset_dd, models_dd]:
123
+ control.change(
124
+ fn=build_table,
125
+ inputs=[metric_dd, subdataset_dd, models_dd],
126
+ outputs=results_table,
127
+ )
128
+
129
  if __name__ == "__main__":
130
+ app.launch(css=HEADER_CSS)
rank_through_time.py CHANGED
@@ -1,5 +1,6 @@
1
  import matplotlib
2
  matplotlib.use("Agg")
 
3
  import pathlib
4
  import pandas as pd
5
  import matplotlib.pyplot as plt
 
1
  import matplotlib
2
  matplotlib.use("Agg")
3
+ matplotlib.rcParams["figure.dpi"] = 150
4
  import pathlib
5
  import pandas as pd
6
  import matplotlib.pyplot as plt