Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
a3b0a0f
1
Parent(s):
5a3de19
Permit different revision
Browse files- src/display/formatting.py +5 -1
- src/display/utils.py +4 -1
- src/leaderboard/read_evals.py +21 -7
- src/submission/submit.py +7 -1
src/display/formatting.py
CHANGED
|
@@ -24,7 +24,7 @@ def make_requests_clickable_model(model_name, json_path=None):
|
|
| 24 |
|
| 25 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
| 26 |
|
| 27 |
-
def make_clickable_model(model_name, json_path=None):
|
| 28 |
link = f"https://huggingface.co/{model_name}"
|
| 29 |
|
| 30 |
#details_model_name = model_name.replace("/", "__")
|
|
@@ -35,6 +35,10 @@ def make_clickable_model(model_name, json_path=None):
|
|
| 35 |
if json_path is not None:
|
| 36 |
details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
| 39 |
|
| 40 |
|
|
|
|
| 24 |
|
| 25 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
| 26 |
|
| 27 |
+
def make_clickable_model(model_name, json_path=None, revision=None):
|
| 28 |
link = f"https://huggingface.co/{model_name}"
|
| 29 |
|
| 30 |
#details_model_name = model_name.replace("/", "__")
|
|
|
|
| 35 |
if json_path is not None:
|
| 36 |
details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
|
| 37 |
|
| 38 |
+
if revision is not None and revision != "main":
|
| 39 |
+
if len(revision) > 12:
|
| 40 |
+
revision = revision[:7]
|
| 41 |
+
model_name += f" (rev: {revision})"
|
| 42 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
| 43 |
|
| 44 |
|
src/display/utils.py
CHANGED
|
@@ -61,7 +61,8 @@ auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub Licen
|
|
| 61 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 62 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 63 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
|
| 64 |
-
auto_eval_column_dict.append(["
|
|
|
|
| 65 |
auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
|
| 66 |
auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
|
| 67 |
auto_eval_column_dict.append(["eval_time", ColumnContent, ColumnContent("Evaluation Time (s)", "number", False)])
|
|
@@ -88,6 +89,7 @@ class EvalQueueColumn: # Queue column
|
|
| 88 |
baseline_row = {
|
| 89 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
| 90 |
AutoEvalColumn.revision.name: "N/A",
|
|
|
|
| 91 |
AutoEvalColumn.precision.name: "?",
|
| 92 |
AutoEvalColumn.merged.name: False,
|
| 93 |
#AutoEvalColumn.average.name: 31.0,
|
|
@@ -131,6 +133,7 @@ if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
|
| 131 |
human_baseline_row = {
|
| 132 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
| 133 |
AutoEvalColumn.revision.name: "N/A",
|
|
|
|
| 134 |
AutoEvalColumn.precision.name: "?",
|
| 135 |
#AutoEvalColumn.average.name: 92.75,
|
| 136 |
AutoEvalColumn.merged.name: False,
|
|
|
|
| 61 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 62 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 63 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
|
| 64 |
+
auto_eval_column_dict.append(["model_sha", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 65 |
+
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Revision", "str", False, False)])
|
| 66 |
auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
|
| 67 |
auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
|
| 68 |
auto_eval_column_dict.append(["eval_time", ColumnContent, ColumnContent("Evaluation Time (s)", "number", False)])
|
|
|
|
| 89 |
baseline_row = {
|
| 90 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
| 91 |
AutoEvalColumn.revision.name: "N/A",
|
| 92 |
+
AutoEvalColumn.model_sha.name: "N/A",
|
| 93 |
AutoEvalColumn.precision.name: "?",
|
| 94 |
AutoEvalColumn.merged.name: False,
|
| 95 |
#AutoEvalColumn.average.name: 31.0,
|
|
|
|
| 133 |
human_baseline_row = {
|
| 134 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
| 135 |
AutoEvalColumn.revision.name: "N/A",
|
| 136 |
+
AutoEvalColumn.model_sha.name: "N/A",
|
| 137 |
AutoEvalColumn.precision.name: "?",
|
| 138 |
#AutoEvalColumn.average.name: 92.75,
|
| 139 |
AutoEvalColumn.merged.name: False,
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -22,7 +22,8 @@ class EvalResult:
|
|
| 22 |
full_model: str # org/model (path on hub)
|
| 23 |
org: str
|
| 24 |
model: str
|
| 25 |
-
|
|
|
|
| 26 |
results: dict
|
| 27 |
precision: Precision = Precision.Unknown
|
| 28 |
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
|
@@ -120,8 +121,9 @@ class EvalResult:
|
|
| 120 |
org=org,
|
| 121 |
model=model,
|
| 122 |
results=results,
|
| 123 |
-
precision=precision,
|
| 124 |
-
|
|
|
|
| 125 |
json_filename=json_filename,
|
| 126 |
eval_time=config.get("total_evaluation_time_seconds", 0.0),
|
| 127 |
num_params=num_params
|
|
@@ -129,7 +131,7 @@ class EvalResult:
|
|
| 129 |
|
| 130 |
def update_with_request_file(self, requests_path):
|
| 131 |
"""Finds the relevant request file for the current model and updates info with it"""
|
| 132 |
-
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
| 133 |
|
| 134 |
try:
|
| 135 |
with open(request_file, "r") as f:
|
|
@@ -144,7 +146,7 @@ class EvalResult:
|
|
| 144 |
self.main_language = request.get("main_language", "?")
|
| 145 |
except Exception as e:
|
| 146 |
self.status = "FAILED"
|
| 147 |
-
print(f"Could not find request file for {self.org}/{self.model}")
|
| 148 |
|
| 149 |
def update_with_dynamic_file_dict(self, file_dict):
|
| 150 |
self.license = file_dict.get("license", "?")
|
|
@@ -174,6 +176,12 @@ class EvalResult:
|
|
| 174 |
average = round(sum(average)/len(average), 2)
|
| 175 |
npm = round(sum(npm)/len(npm), 2)
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
data_dict = {
|
| 178 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 179 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
@@ -181,7 +189,7 @@ class EvalResult:
|
|
| 181 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 182 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 183 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 184 |
-
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename),
|
| 185 |
AutoEvalColumn.dummy.name: self.full_model,
|
| 186 |
AutoEvalColumn.revision.name: self.revision,
|
| 187 |
AutoEvalColumn.average.name: average,
|
|
@@ -207,7 +215,7 @@ class EvalResult:
|
|
| 207 |
return data_dict
|
| 208 |
|
| 209 |
|
| 210 |
-
def get_request_file_for_model(requests_path, model_name, precision):
|
| 211 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
| 212 |
request_files = os.path.join(
|
| 213 |
requests_path,
|
|
@@ -215,15 +223,21 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 215 |
)
|
| 216 |
request_files = glob.glob(request_files)
|
| 217 |
|
|
|
|
|
|
|
|
|
|
| 218 |
# Select correct request file (precision)
|
| 219 |
request_file = ""
|
| 220 |
request_files = sorted(request_files, reverse=True)
|
| 221 |
for tmp_request_file in request_files:
|
| 222 |
with open(tmp_request_file, "r") as f:
|
| 223 |
req_content = json.load(f)
|
|
|
|
|
|
|
| 224 |
if (
|
| 225 |
req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"]
|
| 226 |
and req_content["precision"] == precision.split(".")[-1]
|
|
|
|
| 227 |
):
|
| 228 |
request_file = tmp_request_file
|
| 229 |
return request_file
|
|
|
|
| 22 |
full_model: str # org/model (path on hub)
|
| 23 |
org: str
|
| 24 |
model: str
|
| 25 |
+
model_sha: str # commit hash, "" if main
|
| 26 |
+
revision: str = "main"
|
| 27 |
results: dict
|
| 28 |
precision: Precision = Precision.Unknown
|
| 29 |
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
|
|
|
| 121 |
org=org,
|
| 122 |
model=model,
|
| 123 |
results=results,
|
| 124 |
+
precision=precision,
|
| 125 |
+
model_sha=config.get("model_sha", ""),
|
| 126 |
+
revision=config.get("model_revision", "main"),
|
| 127 |
json_filename=json_filename,
|
| 128 |
eval_time=config.get("total_evaluation_time_seconds", 0.0),
|
| 129 |
num_params=num_params
|
|
|
|
| 131 |
|
| 132 |
def update_with_request_file(self, requests_path):
|
| 133 |
"""Finds the relevant request file for the current model and updates info with it"""
|
| 134 |
+
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name, self.revision)
|
| 135 |
|
| 136 |
try:
|
| 137 |
with open(request_file, "r") as f:
|
|
|
|
| 146 |
self.main_language = request.get("main_language", "?")
|
| 147 |
except Exception as e:
|
| 148 |
self.status = "FAILED"
|
| 149 |
+
print(f"Could not find request file for {self.org}/{self.model}, precision {self.precision.value.name}")
|
| 150 |
|
| 151 |
def update_with_dynamic_file_dict(self, file_dict):
|
| 152 |
self.license = file_dict.get("license", "?")
|
|
|
|
| 176 |
average = round(sum(average)/len(average), 2)
|
| 177 |
npm = round(sum(npm)/len(npm), 2)
|
| 178 |
|
| 179 |
+
rev_name = None
|
| 180 |
+
if self.revision != "main":
|
| 181 |
+
rev_name = self.revision
|
| 182 |
+
if rev_name > 10:
|
| 183 |
+
rev_name = rev_name[:7]
|
| 184 |
+
|
| 185 |
data_dict = {
|
| 186 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 187 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
|
| 189 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 190 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 191 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 192 |
+
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=rev_name),
|
| 193 |
AutoEvalColumn.dummy.name: self.full_model,
|
| 194 |
AutoEvalColumn.revision.name: self.revision,
|
| 195 |
AutoEvalColumn.average.name: average,
|
|
|
|
| 215 |
return data_dict
|
| 216 |
|
| 217 |
|
| 218 |
+
def get_request_file_for_model(requests_path, model_name, precision, revision):
|
| 219 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
| 220 |
request_files = os.path.join(
|
| 221 |
requests_path,
|
|
|
|
| 223 |
)
|
| 224 |
request_files = glob.glob(request_files)
|
| 225 |
|
| 226 |
+
if revision is None or revision == "":
|
| 227 |
+
revision = "main"
|
| 228 |
+
|
| 229 |
# Select correct request file (precision)
|
| 230 |
request_file = ""
|
| 231 |
request_files = sorted(request_files, reverse=True)
|
| 232 |
for tmp_request_file in request_files:
|
| 233 |
with open(tmp_request_file, "r") as f:
|
| 234 |
req_content = json.load(f)
|
| 235 |
+
if req_content["revision"] is None or req_content["revision"] == "":
|
| 236 |
+
req_content["revision"] = "main"
|
| 237 |
if (
|
| 238 |
req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"]
|
| 239 |
and req_content["precision"] == precision.split(".")[-1]
|
| 240 |
+
and req_content["revision"] == revision
|
| 241 |
):
|
| 242 |
request_file = tmp_request_file
|
| 243 |
return request_file
|
src/submission/submit.py
CHANGED
|
@@ -146,7 +146,13 @@ def add_new_eval(
|
|
| 146 |
print("Creating eval file")
|
| 147 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
| 148 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
with open(out_path, "w", encoding="utf-8") as f:
|
| 152 |
json.dump(eval_entry, f, indent=4, ensure_ascii=False)
|
|
|
|
| 146 |
print("Creating eval file")
|
| 147 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
| 148 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 149 |
+
if revision == "main":
|
| 150 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
|
| 151 |
+
else:
|
| 152 |
+
rev_name = revision
|
| 153 |
+
if "rev_name" > 30:
|
| 154 |
+
rev_name = rev_name[:7]
|
| 155 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request_{rev_name}_{private}_{precision}_{weight_type}.json"
|
| 156 |
|
| 157 |
with open(out_path, "w", encoding="utf-8") as f:
|
| 158 |
json.dump(eval_entry, f, indent=4, ensure_ascii=False)
|