Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
6b9cbbe
1
Parent(s):
2293858
Updated populate.py
Browse files- src/populate.py +55 -46
src/populate.py
CHANGED
|
@@ -1,65 +1,74 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
-
|
| 4 |
import pandas as pd
|
| 5 |
-
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
|
| 8 |
from src.leaderboard.filter_models import filter_models_flags
|
| 9 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 10 |
|
| 11 |
|
| 12 |
-
def
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
-
df = pd.DataFrame.from_records(all_data_json)
|
| 21 |
-
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 22 |
-
df = df[cols].round(decimals=2)
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
-
def get_evaluation_queue_df(save_path
|
| 30 |
-
|
| 31 |
all_evals = []
|
| 32 |
-
|
| 33 |
for entry in entries:
|
| 34 |
-
if ".
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
-
elif ".md" not in entry:
|
| 44 |
-
# this is a folder
|
| 45 |
-
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
|
| 46 |
-
for sub_entry in sub_entries:
|
| 47 |
-
file_path = os.path.join(save_path, entry, sub_entry)
|
| 48 |
-
with open(file_path) as fp:
|
| 49 |
-
try:
|
| 50 |
-
data = json.load(fp)
|
| 51 |
-
except json.JSONDecodeError:
|
| 52 |
-
print(f"Error reading {file_path}")
|
| 53 |
-
continue
|
| 54 |
|
| 55 |
-
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
| 56 |
-
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 57 |
-
all_evals.append(data)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
|
|
|
| 3 |
import pandas as pd
|
|
|
|
| 4 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 5 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
|
| 6 |
from src.leaderboard.filter_models import filter_models_flags
|
| 7 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 8 |
|
| 9 |
|
| 10 |
+
def _load_json_data(file_path):
|
| 11 |
+
"""Safely load JSON data from a file."""
|
| 12 |
+
try:
|
| 13 |
+
with open(file_path, "r") as file:
|
| 14 |
+
return json.load(file)
|
| 15 |
+
except json.JSONDecodeError:
|
| 16 |
+
print(f"Error reading JSON from {file_path}")
|
| 17 |
+
return None # Or raise an exception
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
def _process_model_data(entry, model_name_key="model", revision_key="revision"):
|
| 21 |
+
"""Enrich model data with clickable links and revisions."""
|
| 22 |
+
entry[EvalQueueColumn.model.name] = make_clickable_model(entry.get(model_name_key, ""))
|
| 23 |
+
entry[EvalQueueColumn.revision.name] = entry.get(revision_key, "main")
|
| 24 |
+
return entry
|
| 25 |
|
| 26 |
|
| 27 |
+
def get_evaluation_queue_df(save_path, cols):
|
| 28 |
+
"""Generate dataframes for pending, running, and finished evaluation entries."""
|
| 29 |
all_evals = []
|
| 30 |
+
entries = os.listdir(save_path)
|
| 31 |
for entry in entries:
|
| 32 |
+
if entry.startswith(".") or entry.endswith(".md"):
|
| 33 |
+
continue
|
| 34 |
+
file_path = os.path.join(save_path, entry)
|
| 35 |
+
if os.path.isfile(file_path): # Check if it's a file
|
| 36 |
+
data = _load_json_data(file_path)
|
| 37 |
+
if data:
|
| 38 |
+
all_evals.append(_process_model_data(data))
|
| 39 |
+
else:
|
| 40 |
+
# Optionally handle directory contents if needed
|
| 41 |
+
sub_entries = os.listdir(file_path)
|
| 42 |
+
for sub_entry in sub_entries:
|
| 43 |
+
sub_file_path = os.path.join(file_path, sub_entry)
|
| 44 |
+
if os.path.isfile(sub_file_path):
|
| 45 |
+
data = _load_json_data(sub_file_path)
|
| 46 |
+
if data:
|
| 47 |
+
all_evals.append(_process_model_data(data))
|
| 48 |
|
| 49 |
+
# Organizing data by status
|
| 50 |
+
status_map = {
|
| 51 |
+
"PENDING": ["PENDING", "RERUN"],
|
| 52 |
+
"RUNNING": ["RUNNING"],
|
| 53 |
+
"FINISHED": ["FINISHED", "PENDING_NEW_EVAL"],
|
| 54 |
+
}
|
| 55 |
+
status_dfs = {status: [] for status in status_map}
|
| 56 |
+
for eval_data in all_evals:
|
| 57 |
+
for status, extra_statuses in status_map.items():
|
| 58 |
+
if eval_data["status"] in extra_statuses:
|
| 59 |
+
status_dfs[status].append(eval_data)
|
| 60 |
|
| 61 |
+
return tuple(pd.DataFrame(status_dfs[status], columns=cols) for status in ["FINISHED", "RUNNING", "PENDING"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
+
def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmark_cols):
|
| 65 |
+
"""Retrieve and process leaderboard data."""
|
| 66 |
+
raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
|
| 67 |
+
all_data_json = [model.to_dict() for model in raw_data] + [baseline_row]
|
| 68 |
+
filter_models_flags(all_data_json)
|
| 69 |
+
|
| 70 |
+
df = pd.DataFrame.from_records(all_data_json)
|
| 71 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 72 |
+
df = df[cols].round(decimals=2)
|
| 73 |
+
df = df[has_no_nan_values(df, benchmark_cols)]
|
| 74 |
+
return raw_data, df
|