Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
Β·
59399bc
1
Parent(s):
4717ca8
Make model text exibit precision if there's more than one eval or precision is not float16 or bfloat16
Browse files- src/display/formatting.py +12 -3
- src/leaderboard/read_evals.py +14 -1
src/display/formatting.py
CHANGED
|
@@ -24,7 +24,7 @@ def make_requests_clickable_model(model_name, json_path=None):
|
|
| 24 |
|
| 25 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "π")
|
| 26 |
|
| 27 |
-
def make_clickable_model(model_name, json_path=None, revision=None):
|
| 28 |
link = f"https://huggingface.co/{model_name}"
|
| 29 |
|
| 30 |
#details_model_name = model_name.replace("/", "__")
|
|
@@ -35,11 +35,20 @@ def make_clickable_model(model_name, json_path=None, revision=None):
|
|
| 35 |
if json_path is not None:
|
| 36 |
details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
|
| 37 |
|
|
|
|
| 38 |
if revision is not None and revision != "" and revision != "main":
|
| 39 |
if len(revision) > 12:
|
| 40 |
revision = revision[:7]
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
def styled_error(error):
|
|
|
|
| 24 |
|
| 25 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "π")
|
| 26 |
|
| 27 |
+
def make_clickable_model(model_name, json_path=None, revision=None, precision=None, num_evals_same_model=1):
|
| 28 |
link = f"https://huggingface.co/{model_name}"
|
| 29 |
|
| 30 |
#details_model_name = model_name.replace("/", "__")
|
|
|
|
| 35 |
if json_path is not None:
|
| 36 |
details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
|
| 37 |
|
| 38 |
+
posfix = ""
|
| 39 |
if revision is not None and revision != "" and revision != "main":
|
| 40 |
if len(revision) > 12:
|
| 41 |
revision = revision[:7]
|
| 42 |
+
posfix += f" (rev: {revision})"
|
| 43 |
+
if precision is not None:
|
| 44 |
+
if num_evals_same_model == 1 and precision in ['float16', 'bfloat16']:
|
| 45 |
+
pass
|
| 46 |
+
else:
|
| 47 |
+
#if precision not in model_name:
|
| 48 |
+
posfix += f" [{precision}]"
|
| 49 |
+
posfix = posfix.strip()
|
| 50 |
+
|
| 51 |
+
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "π") + " " + posfix
|
| 52 |
|
| 53 |
|
| 54 |
def styled_error(error):
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -43,6 +43,7 @@ class EvalResult:
|
|
| 43 |
eval_time: float = 0.0
|
| 44 |
original_benchmark_average: float = None
|
| 45 |
hidden: bool = False # Do not show on the leaderboard
|
|
|
|
| 46 |
|
| 47 |
@classmethod
|
| 48 |
def init_from_json_file(self, json_filepath, is_original=False):
|
|
@@ -188,7 +189,7 @@ class EvalResult:
|
|
| 188 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 189 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 190 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 191 |
-
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=self.revision),
|
| 192 |
AutoEvalColumn.dummy.name: self.full_model,
|
| 193 |
AutoEvalColumn.revision.name: self.revision,
|
| 194 |
AutoEvalColumn.average.name: average,
|
|
@@ -263,6 +264,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
| 263 |
with open(dynamic_path) as f:
|
| 264 |
dynamic_data = json.load(f)
|
| 265 |
|
|
|
|
| 266 |
eval_results = {}
|
| 267 |
for model_result_filepath in model_result_filepaths:
|
| 268 |
# Creation of result
|
|
@@ -279,10 +281,21 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
| 279 |
else:
|
| 280 |
eval_results[eval_name] = eval_result
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
results = []
|
| 283 |
for v in eval_results.values():
|
| 284 |
try:
|
| 285 |
if v.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not v.hidden:
|
|
|
|
|
|
|
|
|
|
| 286 |
v.to_dict() # we test if the dict version is complete
|
| 287 |
results.append(v)
|
| 288 |
except KeyError as e: # not all eval values present
|
|
|
|
| 43 |
eval_time: float = 0.0
|
| 44 |
original_benchmark_average: float = None
|
| 45 |
hidden: bool = False # Do not show on the leaderboard
|
| 46 |
+
num_evals_model_rev: int = 1
|
| 47 |
|
| 48 |
@classmethod
|
| 49 |
def init_from_json_file(self, json_filepath, is_original=False):
|
|
|
|
| 189 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 190 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 191 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 192 |
+
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=self.revision, precision=self.precision.value.name, num_evals_same_model=self.num_evals_model_rev),
|
| 193 |
AutoEvalColumn.dummy.name: self.full_model,
|
| 194 |
AutoEvalColumn.revision.name: self.revision,
|
| 195 |
AutoEvalColumn.average.name: average,
|
|
|
|
| 264 |
with open(dynamic_path) as f:
|
| 265 |
dynamic_data = json.load(f)
|
| 266 |
|
| 267 |
+
count_model_rev = {}
|
| 268 |
eval_results = {}
|
| 269 |
for model_result_filepath in model_result_filepaths:
|
| 270 |
# Creation of result
|
|
|
|
| 281 |
else:
|
| 282 |
eval_results[eval_name] = eval_result
|
| 283 |
|
| 284 |
+
#count model_revision to display precision if duplicate
|
| 285 |
+
if eval_result.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not eval_result.hidden:
|
| 286 |
+
model_rev_key = f"{eval_result.full_model}_{eval_result.revision}"
|
| 287 |
+
if model_rev_key not in count_model_rev:
|
| 288 |
+
count_model_rev[model_rev_key] = 1
|
| 289 |
+
else:
|
| 290 |
+
count_model_rev[model_rev_key] += 1
|
| 291 |
+
|
| 292 |
results = []
|
| 293 |
for v in eval_results.values():
|
| 294 |
try:
|
| 295 |
if v.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not v.hidden:
|
| 296 |
+
model_rev_key = f"{v.full_model}_{v.revision}"
|
| 297 |
+
v.num_evals_model_rev = count_model_rev[model_rev_key]
|
| 298 |
+
|
| 299 |
v.to_dict() # we test if the dict version is complete
|
| 300 |
results.append(v)
|
| 301 |
except KeyError as e: # not all eval values present
|