Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
- src/leaderboard/read_evals.py +87 -16
src/leaderboard/read_evals.py
CHANGED
|
@@ -108,22 +108,93 @@ class EvalResult:
|
|
| 108 |
def to_dict(self):
|
| 109 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 110 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 111 |
-
data_dict = {
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
for task in Tasks:
|
| 129 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
|
|
|
| 108 |
def to_dict(self):
|
| 109 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 110 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 111 |
+
# data_dict = {
|
| 112 |
+
# "eval_name": self.eval_name, # not a column, just a save name,
|
| 113 |
+
# AutoEvalColumn.precision.name: self.precision.value.name,
|
| 114 |
+
# AutoEvalColumn.model_type.name: self.model_type.value.name,
|
| 115 |
+
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 116 |
+
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 117 |
+
# AutoEvalColumn.architecture.name: self.architecture,
|
| 118 |
+
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 119 |
+
# AutoEvalColumn.dummy.name: self.full_model,
|
| 120 |
+
# AutoEvalColumn.revision.name: self.revision,
|
| 121 |
+
# AutoEvalColumn.average.name: average,
|
| 122 |
+
# AutoEvalColumn.license.name: self.license,
|
| 123 |
+
# AutoEvalColumn.likes.name: self.likes,
|
| 124 |
+
# AutoEvalColumn.params.name: self.num_params,
|
| 125 |
+
# AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
| 126 |
+
# }
|
| 127 |
+
try:
|
| 128 |
+
data_dict["eval_name"] = self.eval_name
|
| 129 |
+
except KeyError:
|
| 130 |
+
print(f"Could not find eval name")
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
data_dict[AutoEvalColumn.precision.name] = self.precision.value.name
|
| 134 |
+
except KeyError:
|
| 135 |
+
print(f"Could not find precision")
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
data_dict[AutoEvalColumn.model_type.name] = self.model_type.value.name
|
| 139 |
+
except KeyError:
|
| 140 |
+
print(f"Could not find model type")
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
data_dict[AutoEvalColumn.model_type_symbol.name] = self.model_type.value.symbol
|
| 144 |
+
except KeyError:
|
| 145 |
+
print(f"Could not find model type symbol")
|
| 146 |
+
|
| 147 |
+
try:
|
| 148 |
+
data_dict[AutoEvalColumn.weight_type.name] = self.weight_type.value.name
|
| 149 |
+
except KeyError:
|
| 150 |
+
print(f"Could not find weight type")
|
| 151 |
+
|
| 152 |
+
try:
|
| 153 |
+
data_dict[AutoEvalColumn.architecture.name] = self.architecture
|
| 154 |
+
except KeyError:
|
| 155 |
+
print(f"Could not find architecture")
|
| 156 |
+
|
| 157 |
+
try:
|
| 158 |
+
data_dict[AutoEvalColumn.model.name] = make_clickable_model(self.full_model)
|
| 159 |
+
except KeyError:
|
| 160 |
+
print(f"Could not find model")
|
| 161 |
+
|
| 162 |
+
try:
|
| 163 |
+
data_dict[AutoEvalColumn.dummy.name] = self.full_model
|
| 164 |
+
except KeyError:
|
| 165 |
+
print(f"Could not find dummy")
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
data_dict[AutoEvalColumn.revision.name] = self.revision
|
| 169 |
+
except KeyError:
|
| 170 |
+
print(f"Could not find revision")
|
| 171 |
+
|
| 172 |
+
try:
|
| 173 |
+
data_dict[AutoEvalColumn.average.name] = average
|
| 174 |
+
except KeyError:
|
| 175 |
+
print(f"Could not find average")
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
data_dict[AutoEvalColumn.license.name] = self.license
|
| 179 |
+
except KeyError:
|
| 180 |
+
print(f"Could not find license")
|
| 181 |
+
|
| 182 |
+
try:
|
| 183 |
+
data_dict[AutoEvalColumn.likes.name] = self.likes
|
| 184 |
+
except KeyError:
|
| 185 |
+
print(f"Could not find likes")
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
data_dict[AutoEvalColumn.params.name] = self.num_params
|
| 189 |
+
except KeyError:
|
| 190 |
+
print(f"Could not find params")
|
| 191 |
+
|
| 192 |
+
try:
|
| 193 |
+
data_dict[AutoEvalColumn.still_on_hub.name] = self.still_on_hub
|
| 194 |
+
except KeyError:
|
| 195 |
+
print(f"Could not find still on hub")
|
| 196 |
+
|
| 197 |
+
|
| 198 |
|
| 199 |
for task in Tasks:
|
| 200 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|