xeon27
commited on
Commit
·
b1accaf
1
Parent(s):
bad4049
Fix bug
Browse files- app.py +1 -2
- src/leaderboard/read_evals.py +2 -0
app.py
CHANGED
|
@@ -62,8 +62,7 @@ def init_leaderboard(dataframe, benchmark_type):
|
|
| 62 |
|
| 63 |
non_task_cols = ["Model"]
|
| 64 |
if benchmark_type == "agentic":
|
| 65 |
-
#
|
| 66 |
-
dataframe["Agent"] = ["[Basic Agent](https://inspect.ai-safety-institute.org.uk/agents.html#sec-basic-agent)"]*(dataframe.shape[0])
|
| 67 |
non_task_cols.append("Agent")
|
| 68 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
| 69 |
|
|
|
|
| 62 |
|
| 63 |
non_task_cols = ["Model"]
|
| 64 |
if benchmark_type == "agentic":
|
| 65 |
+
# Include agent column
|
|
|
|
| 66 |
non_task_cols.append("Agent")
|
| 67 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
| 68 |
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -118,6 +118,8 @@ class EvalResult:
|
|
| 118 |
data_dict = {
|
| 119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 120 |
AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
|
|
|
|
|
|
|
| 121 |
}
|
| 122 |
|
| 123 |
for task in Tasks:
|
|
|
|
| 118 |
data_dict = {
|
| 119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 120 |
AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
|
| 121 |
+
# As of now all models use the basic inspect agent
|
| 122 |
+
AutoEvalColumn.agent.name: "[Basic Agent](https://inspect.ai-safety-institute.org.uk/agents.html#sec-basic-agent)"
|
| 123 |
}
|
| 124 |
|
| 125 |
for task in Tasks:
|