Spaces:

TimeCopilot
/

ImpermanentLeaderboard

Running

App Files Files Community

geoalgo commited on 10 days ago

Commit

43752dc

1 Parent(s): d64ffef

edit

Browse files

Files changed (3) hide show

leaderboard.py +16 -0
main.py +54 -41
rank_through_time.py +1 -0

leaderboard.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import pandas as pd
 def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
     """Compute average rank per model for each metric.
@@ -72,9 +74,23 @@ def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
         elif col.startswith("avg "):
             leaderboard[col] = leaderboard[col].round(4)
     return leaderboard
 if __name__ == "__main__":
     df = pd.read_csv("mock_evaluation_results.csv")
     lb = compute_leaderboard(df)

 import pandas as pd
+MEDALS = {0: "🥇", 1: "🥈", 2: "🥉"}
 def compute_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
     """Compute average rank per model for each metric.
         elif col.startswith("avg "):
             leaderboard[col] = leaderboard[col].round(4)
+    # Add medals to model names
+    leaderboard = leaderboard.reset_index(drop=True)
+    leaderboard["model"] = [
+        f"{MEDALS.get(i, '')} {m}".strip()
+        for i, m in enumerate(leaderboard["model"])
+    ]
+    # Reorder: model, avg columns, rank columns
+    avg_cols = sorted(c for c in leaderboard.columns if c.startswith("avg "))
+    rank_cols = sorted(c for c in leaderboard.columns if c.startswith("rank "))
+    leaderboard = leaderboard[["model"] + avg_cols + rank_cols]
     return leaderboard
 if __name__ == "__main__":
     df = pd.read_csv("mock_evaluation_results.csv")
     lb = compute_leaderboard(df)

main.py CHANGED Viewed

@@ -34,52 +34,34 @@ def build_table(metric, subdataset, models):
 def build_plots(metric, subdataset):
     fig_rank = plot_rank_for_subdataset(df, metric, subdataset)
     fig_value = plot_value_for_subdataset(df, metric, subdataset)
-    # Gradio expects the figure objects directly
-    ret = fig_rank, fig_value
-    return ret
 with gr.Blocks(title="Impermanent Leaderboard") as app:
     gr.Markdown("# Impermanent Leaderboard")
-    with gr.Tab("Leaderboard"):
-        leaderboard_table = gr.Dataframe(
-            value=compute_leaderboard(df),
-            label="Leaderboard",
-        )
-    with gr.Tab("All results"):
-        with gr.Row():
-            metric_dd = gr.Dropdown(
-                choices=ALL_METRICS,
-                value=ALL_METRICS[0],
-                label="Metric",
-            )
-            subdataset_dd = gr.Dropdown(
-                choices=["All"] + ALL_SUBDATASETS,
-                value="All",
-                label="Subdataset",
-            )
-            models_dd = gr.Dropdown(
-                choices=ALL_MODELS,
-                value=ALL_MODELS,
-                multiselect=True,
-                label="Models",
-            )
-        results_table = gr.Dataframe(
-            value=build_table(ALL_METRICS[0], "All", ALL_MODELS),
-            label="Results",
         )
-        for control in [metric_dd, subdataset_dd, models_dd]:
-            control.change(
-                fn=build_table,
-                inputs=[metric_dd, subdataset_dd, models_dd],
-                outputs=results_table,
-            )
-    with gr.Tab("Results over time"):
         with gr.Row():
             time_metric_dd = gr.Dropdown(
                 choices=ALL_METRICS,
@@ -99,7 +81,6 @@ with gr.Blocks(title="Impermanent Leaderboard") as app:
             fig_rank, fig_value = build_plots(metric, subdataset)
             return fig_rank, fig_value
-        # Initial render
         app.load(
             fn=update_plots,
             inputs=[time_metric_dd, time_subdataset_dd],
@@ -113,5 +94,37 @@ with gr.Blocks(title="Impermanent Leaderboard") as app:
                 outputs=[rank_plot, value_plot],
             )
 if __name__ == "__main__":
-    app.launch()

 def build_plots(metric, subdataset):
     fig_rank = plot_rank_for_subdataset(df, metric, subdataset)
     fig_value = plot_value_for_subdataset(df, metric, subdataset)
+    return fig_rank, fig_value
+HEADER_CSS = """\
+.table-wrap thead th {
+    background-color: #e2e8f0 !important;
+}
+"""
 with gr.Blocks(title="Impermanent Leaderboard") as app:
     gr.Markdown("# Impermanent Leaderboard")
+    gr.Markdown(
+        "A **live** time-series forecasting benchmark designed to avoid data contamination. "
+        "Automated pipelines continuously fetch fresh data from GitHub — including the number of "
+        "open issues, opened PRs, pushes, and stars — ensuring that models are always evaluated "
+        "on data they could not have seen during training."
+    )
+    with gr.Tab("Leaderboard 🏆"):
+        lb = compute_leaderboard(df)
+        gr.Dataframe(
+            value=lb,
+            #label="Leaderboard",
+            interactive=False,
+            headers=[f"**{c}**" for c in lb.columns],
         )
+    with gr.Tab("Results over time 📈"):
         with gr.Row():
             time_metric_dd = gr.Dropdown(
                 choices=ALL_METRICS,
             fig_rank, fig_value = build_plots(metric, subdataset)
             return fig_rank, fig_value
         app.load(
             fn=update_plots,
             inputs=[time_metric_dd, time_subdataset_dd],
                 outputs=[rank_plot, value_plot],
             )
+    with gr.Tab("All results 📋"):
+        with gr.Row():
+            metric_dd = gr.Dropdown(
+                choices=ALL_METRICS,
+                value=ALL_METRICS[0],
+                label="Metric",
+            )
+            subdataset_dd = gr.Dropdown(
+                choices=["All"] + ALL_SUBDATASETS,
+                value="All",
+                label="Subdataset",
+            )
+            models_dd = gr.Dropdown(
+                choices=ALL_MODELS,
+                value=ALL_MODELS,
+                multiselect=True,
+                label="Models",
+            )
+        results_table = gr.Dataframe(
+            value=build_table(ALL_METRICS[0], "All", ALL_MODELS),
+            label="Results",
+            interactive=False,
+        )
+        for control in [metric_dd, subdataset_dd, models_dd]:
+            control.change(
+                fn=build_table,
+                inputs=[metric_dd, subdataset_dd, models_dd],
+                outputs=results_table,
+            )
 if __name__ == "__main__":
+    app.launch(css=HEADER_CSS)

rank_through_time.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import matplotlib
 matplotlib.use("Agg")
 import pathlib
 import pandas as pd
 import matplotlib.pyplot as plt

 import matplotlib
 matplotlib.use("Agg")
+matplotlib.rcParams["figure.dpi"] = 150
 import pathlib
 import pandas as pd
 import matplotlib.pyplot as plt