Spaces:
Running
Running
ycy
commited on
Commit
·
979e0a3
1
Parent(s):
ad93dc4
test
Browse files- src/about.py +1 -1
- src/display/utils.py +2 -2
- src/leaderboard/read_evals.py +2 -2
- src/populate.py +1 -1
src/about.py
CHANGED
|
@@ -13,7 +13,7 @@ class Task:
|
|
| 13 |
#TODO 指标
|
| 14 |
class Tasks(Enum):
|
| 15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 16 |
-
task0 = Task("Score_avg", "score", "Score_Avg")
|
| 17 |
task1 = Task("Score_gpt", "score", "Score_GPT")
|
| 18 |
task2 = Task("Score_cog", "score", "Score_COG")
|
| 19 |
task3 = Task("Score_cpm", "score", "Score_CPM")
|
|
|
|
| 13 |
#TODO 指标
|
| 14 |
class Tasks(Enum):
|
| 15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 16 |
+
task0 = Task("Score_avg", "score", "Score_Avg ⬆️")
|
| 17 |
task1 = Task("Score_gpt", "score", "Score_GPT")
|
| 18 |
task2 = Task("Score_cog", "score", "Score_COG")
|
| 19 |
task3 = Task("Score_cpm", "score", "Score_CPM")
|
src/display/utils.py
CHANGED
|
@@ -26,8 +26,8 @@ auto_eval_column_dict = []
|
|
| 26 |
# Init
|
| 27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 29 |
-
#Scores
|
| 30 |
-
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 31 |
for task in Tasks:
|
| 32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "float", True)])
|
| 33 |
# Model information
|
|
|
|
| 26 |
# Init
|
| 27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 29 |
+
# #Scores
|
| 30 |
+
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 31 |
for task in Tasks:
|
| 32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "float", True)])
|
| 33 |
# Model information
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -111,7 +111,7 @@ class EvalResult:
|
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
|
| 113 |
# The first one is the average
|
| 114 |
-
average = next(iter(self.results.values()))
|
| 115 |
data_dict = {
|
| 116 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 117 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
@@ -121,7 +121,7 @@ class EvalResult:
|
|
| 121 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 122 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 123 |
AutoEvalColumn.revision.name: self.revision,
|
| 124 |
-
AutoEvalColumn.average.name: average,
|
| 125 |
AutoEvalColumn.license.name: self.license,
|
| 126 |
AutoEvalColumn.likes.name: self.likes,
|
| 127 |
AutoEvalColumn.params.name: self.num_params,
|
|
|
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
|
| 113 |
# The first one is the average
|
| 114 |
+
#average = next(iter(self.results.values()))
|
| 115 |
data_dict = {
|
| 116 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 117 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
|
| 121 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 122 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 123 |
AutoEvalColumn.revision.name: self.revision,
|
| 124 |
+
#AutoEvalColumn.average.name: average,
|
| 125 |
AutoEvalColumn.license.name: self.license,
|
| 126 |
AutoEvalColumn.likes.name: self.likes,
|
| 127 |
AutoEvalColumn.params.name: self.num_params,
|
src/populate.py
CHANGED
|
@@ -54,7 +54,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 54 |
|
| 55 |
df = pd.DataFrame.from_records(all_data_json)
|
| 56 |
|
| 57 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
| 58 |
df = df[cols].round(decimals=2)
|
| 59 |
|
| 60 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 54 |
|
| 55 |
df = pd.DataFrame.from_records(all_data_json)
|
| 56 |
|
| 57 |
+
df = df.sort_values(by=[AutoEvalColumn.task0.name], ascending=False)
|
| 58 |
df = df[cols].round(decimals=2)
|
| 59 |
|
| 60 |
# filter out if any of the benchmarks have not been produced
|