Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
79b2cd5
1
Parent(s):
dadbd30
wip improvement
Browse files- src/leaderboard/read_evals.py +27 -25
src/leaderboard/read_evals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
from pathlib import Path
|
| 3 |
-
import
|
| 4 |
from json import JSONDecodeError
|
| 5 |
import logging
|
| 6 |
import math
|
|
@@ -189,39 +189,40 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 189 |
# Return empty string if no file found that matches criteria
|
| 190 |
return request_file
|
| 191 |
|
| 192 |
-
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
| 193 |
-
"""From the path of the results folder root, extract all needed info for results"""
|
| 194 |
-
model_result_filepaths = []
|
| 195 |
-
|
| 196 |
-
results_path = Path(results_path)
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
# Sort the files by extracting the datetime from filenames assumed to be of the form "results_YYYYMMDD.json"
|
| 207 |
try:
|
| 208 |
-
|
| 209 |
-
except
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
for file in files:
|
| 213 |
-
# Construct file path correctly, ensuring no duplication of path parts
|
| 214 |
-
model_result_filepath = file.resolve()
|
| 215 |
-
model_result_filepaths.append(model_result_filepath)
|
| 216 |
|
|
|
|
|
|
|
| 217 |
with open(dynamic_path) as f:
|
| 218 |
dynamic_data = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
eval_results = {}
|
| 221 |
-
for model_result_filepath in
|
| 222 |
# Creation of result
|
| 223 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 224 |
eval_result.update_with_request_file(requests_path)
|
|
|
|
| 225 |
if eval_result.full_model in dynamic_data:
|
| 226 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
| 227 |
# Hardcoding because of gating problem
|
|
@@ -236,12 +237,13 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
| 236 |
eval_results[eval_name] = eval_result
|
| 237 |
|
| 238 |
results = []
|
| 239 |
-
for v in eval_results.
|
| 240 |
try:
|
| 241 |
if v.status == "FINISHED":
|
| 242 |
v.to_dict() # we test if the dict version is complete
|
| 243 |
results.append(v)
|
| 244 |
-
except KeyError
|
|
|
|
| 245 |
continue
|
| 246 |
|
| 247 |
return results
|
|
|
|
| 1 |
import json
|
| 2 |
from pathlib import Path
|
| 3 |
+
from datetime import datetime
|
| 4 |
from json import JSONDecodeError
|
| 5 |
import logging
|
| 6 |
import math
|
|
|
|
| 189 |
# Return empty string if no file found that matches criteria
|
| 190 |
return request_file
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
+
def parse_datetime(datetime_str):
|
| 194 |
+
formats = [
|
| 195 |
+
"%Y-%m-%dT%H-%M-%S.%f", # Format with dashes
|
| 196 |
+
"%Y-%m-%dT%H:%M:%S.%f", # Standard format with colons
|
| 197 |
+
"%Y-%m-%dT%H %M %S.%f", # Spaces as separator
|
| 198 |
+
]
|
| 199 |
+
|
| 200 |
+
for fmt in formats:
|
|
|
|
| 201 |
try:
|
| 202 |
+
return datetime.strptime(datetime_str, fmt)
|
| 203 |
+
except ValueError:
|
| 204 |
+
continue
|
| 205 |
+
# in rare cases set unix start time for files with incorrect time (legacy files)
|
| 206 |
+
logging.error(f"No valid date format found for: {datetime_str}")
|
| 207 |
+
return datetime(1970, 1, 1)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
+
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
| 211 |
+
"""From the path of the results folder root, extract all needed info for results"""
|
| 212 |
with open(dynamic_path) as f:
|
| 213 |
dynamic_data = json.load(f)
|
| 214 |
+
|
| 215 |
+
results_path = Path(results_path)
|
| 216 |
+
|
| 217 |
+
model_files = list(results_path.rglob('results_*.json'))
|
| 218 |
+
model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
|
| 219 |
|
| 220 |
eval_results = {}
|
| 221 |
+
for model_result_filepath in model_files:
|
| 222 |
# Creation of result
|
| 223 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 224 |
eval_result.update_with_request_file(requests_path)
|
| 225 |
+
|
| 226 |
if eval_result.full_model in dynamic_data:
|
| 227 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
| 228 |
# Hardcoding because of gating problem
|
|
|
|
| 237 |
eval_results[eval_name] = eval_result
|
| 238 |
|
| 239 |
results = []
|
| 240 |
+
for k, v in eval_results.items():
|
| 241 |
try:
|
| 242 |
if v.status == "FINISHED":
|
| 243 |
v.to_dict() # we test if the dict version is complete
|
| 244 |
results.append(v)
|
| 245 |
+
except KeyError as e:
|
| 246 |
+
logging.error(f"Error while checking model {k} dict, no key: {e}") # not all eval values present
|
| 247 |
continue
|
| 248 |
|
| 249 |
return results
|