xeon27
commited on
Commit
·
36244aa
1
Parent(s):
15e5347
Make task names clickable and link to inspect-evals repo
Browse files- src/display/utils.py +1 -1
- src/populate.py +4 -0
src/display/utils.py
CHANGED
|
@@ -28,7 +28,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
-
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(
|
| 32 |
# # Model information
|
| 33 |
# auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
+
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])
|
| 32 |
# # Model information
|
| 33 |
# auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
src/populate.py
CHANGED
|
@@ -14,6 +14,7 @@ for task in Tasks:
|
|
| 14 |
TASK_NAME_INVERSE_MAP[task.value.col_name] = {
|
| 15 |
"name": task.value.benchmark,
|
| 16 |
"type": task.value.type,
|
|
|
|
| 17 |
}
|
| 18 |
|
| 19 |
|
|
@@ -45,6 +46,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 45 |
for col in benchmark_cols:
|
| 46 |
df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})", axis=1)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
return df
|
| 49 |
|
| 50 |
|
|
|
|
| 14 |
TASK_NAME_INVERSE_MAP[task.value.col_name] = {
|
| 15 |
"name": task.value.benchmark,
|
| 16 |
"type": task.value.type,
|
| 17 |
+
"source": task.value.source,
|
| 18 |
}
|
| 19 |
|
| 20 |
|
|
|
|
| 46 |
for col in benchmark_cols:
|
| 47 |
df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})", axis=1)
|
| 48 |
|
| 49 |
+
# make task names clickable and link to inspect-evals repository
|
| 50 |
+
df = df.rename(columns={col: f"[{col}]({TASK_NAME_INVERSE_MAP[col]['source']})" for col in benchmark_cols})
|
| 51 |
+
|
| 52 |
return df
|
| 53 |
|
| 54 |
|