Spaces:
Sleeping
Sleeping
xeon27
commited on
Commit
·
1f6d554
1
Parent(s):
b1accaf
Fix bug
Browse files- app.py +3 -0
- src/populate.py +1 -1
app.py
CHANGED
|
@@ -64,6 +64,9 @@ def init_leaderboard(dataframe, benchmark_type):
|
|
| 64 |
if benchmark_type == "agentic":
|
| 65 |
# Include agent column
|
| 66 |
non_task_cols.append("Agent")
|
|
|
|
|
|
|
|
|
|
| 67 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
| 68 |
|
| 69 |
# styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
|
|
|
|
| 64 |
if benchmark_type == "agentic":
|
| 65 |
# Include agent column
|
| 66 |
non_task_cols.append("Agent")
|
| 67 |
+
elif benchmark_type == "base":
|
| 68 |
+
# Drop agent column
|
| 69 |
+
dataframe = dataframe.drop(columns=["Agent"])
|
| 70 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
| 71 |
|
| 72 |
# styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
|
src/populate.py
CHANGED
|
@@ -46,7 +46,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 46 |
df = df[cols].round(decimals=2)
|
| 47 |
|
| 48 |
# subset for model and benchmark cols
|
| 49 |
-
df = df[[AutoEvalColumn.model.name] + benchmark_cols]
|
| 50 |
|
| 51 |
# drop rows for which all benchmark cols are empty
|
| 52 |
df = df.dropna(subset=benchmark_cols, axis=0, how="all")
|
|
|
|
| 46 |
df = df[cols].round(decimals=2)
|
| 47 |
|
| 48 |
# subset for model and benchmark cols
|
| 49 |
+
df = df[[AutoEvalColumn.model.name, AutoEvalColumn.agent.name] + benchmark_cols]
|
| 50 |
|
| 51 |
# drop rows for which all benchmark cols are empty
|
| 52 |
df = df.dropna(subset=benchmark_cols, axis=0, how="all")
|