eval-leaderboard

Sleeping

xeon27 commited on Feb 12

Commit

1f6d554

1 Parent(s): b1accaf

Fix bug

Files changed (2) hide show

app.py CHANGED Viewed

@@ -64,6 +64,9 @@ def init_leaderboard(dataframe, benchmark_type):
     if benchmark_type == "agentic":
         # Include agent column
         non_task_cols.append("Agent")
     AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
     # styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])

     if benchmark_type == "agentic":
         # Include agent column
         non_task_cols.append("Agent")
+    elif benchmark_type == "base":
+        # Drop agent column
+        dataframe = dataframe.drop(columns=["Agent"])
     AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
     # styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])

src/populate.py CHANGED Viewed

@@ -46,7 +46,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     df = df[cols].round(decimals=2)
     # subset for model and benchmark cols
-    df = df[[AutoEvalColumn.model.name] + benchmark_cols]
     # drop rows for which all benchmark cols are empty
     df = df.dropna(subset=benchmark_cols, axis=0, how="all")

     df = df[cols].round(decimals=2)
     # subset for model and benchmark cols
+    df = df[[AutoEvalColumn.model.name, AutoEvalColumn.agent.name] + benchmark_cols]
     # drop rows for which all benchmark cols are empty
     df = df.dropna(subset=benchmark_cols, axis=0, how="all")