Spaces:
Running
Running
Nikhil Raghavan commited on
Commit ·
304343a
1
Parent(s): d400470
CHANGES
Browse files- src/display/utils.py +2 -1
- src/envs.py +2 -0
- src/populate.py +24 -11
src/display/utils.py
CHANGED
|
@@ -28,7 +28,7 @@ for task in Tasks:
|
|
| 28 |
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda t=_task: ColumnContent(t.value.col_name, "number", True))])
|
| 29 |
|
| 30 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 31 |
-
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
| 32 |
|
| 33 |
## For the queue columns in the submission tab
|
| 34 |
@dataclass(frozen=True)
|
|
@@ -80,6 +80,7 @@ class Precision(Enum):
|
|
| 80 |
bfloat16 = ModelDetails("bfloat16")
|
| 81 |
Unknown = ModelDetails("?")
|
| 82 |
|
|
|
|
| 83 |
def from_str(precision):
|
| 84 |
if precision in ["torch.float16", "float16"]:
|
| 85 |
return Precision.float16
|
|
|
|
| 28 |
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda t=_task: ColumnContent(t.value.col_name, "number", True))])
|
| 29 |
|
| 30 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 31 |
+
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)()
|
| 32 |
|
| 33 |
## For the queue columns in the submission tab
|
| 34 |
@dataclass(frozen=True)
|
|
|
|
| 80 |
bfloat16 = ModelDetails("bfloat16")
|
| 81 |
Unknown = ModelDetails("?")
|
| 82 |
|
| 83 |
+
@staticmethod
|
| 84 |
def from_str(precision):
|
| 85 |
if precision in ["torch.float16", "float16"]:
|
| 86 |
return Precision.float16
|
src/envs.py
CHANGED
|
@@ -17,5 +17,7 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
|
|
| 17 |
|
| 18 |
# Local caches
|
| 19 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
|
|
|
|
|
|
| 20 |
|
| 21 |
API = HfApi(token=TOKEN)
|
|
|
|
| 17 |
|
| 18 |
# Local caches
|
| 19 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
| 20 |
+
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 21 |
+
QUEUE_REPO = f"{OWNER}/requests"
|
| 22 |
|
| 23 |
API = HfApi(token=TOKEN)
|
src/populate.py
CHANGED
|
@@ -1,7 +1,17 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def get_leaderboard_df(results_path: str, requests_path: str = None, cols: list = None, benchmark_cols: list = None) -> pd.DataFrame:
|
| 7 |
"""Creates a dataframe from all the individual experiment results"""
|
|
@@ -9,16 +19,17 @@ def get_leaderboard_df(results_path: str, requests_path: str = None, cols: list
|
|
| 9 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 10 |
|
| 11 |
df = pd.DataFrame.from_records(all_data_json)
|
| 12 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
return df
|
| 18 |
|
| 19 |
|
| 20 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
| 21 |
-
"""Creates the different dataframes for the evaluation queues
|
| 22 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
| 23 |
all_evals = []
|
| 24 |
|
|
@@ -28,19 +39,21 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
| 28 |
with open(file_path) as fp:
|
| 29 |
data = json.load(fp)
|
| 30 |
|
| 31 |
-
data[EvalQueueColumn.model.name] =
|
| 32 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 33 |
|
| 34 |
all_evals.append(data)
|
| 35 |
elif ".md" not in entry:
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
for sub_entry in sub_entries:
|
| 39 |
file_path = os.path.join(save_path, entry, sub_entry)
|
| 40 |
with open(file_path) as fp:
|
| 41 |
data = json.load(fp)
|
| 42 |
|
| 43 |
-
data[EvalQueueColumn.model.name] =
|
| 44 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 45 |
all_evals.append(data)
|
| 46 |
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
+
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
| 7 |
+
from src.leaderboard.read_evals import get_raw_eval_results
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def has_no_nan_values(df, cols):
|
| 11 |
+
if not cols:
|
| 12 |
+
return [True] * len(df)
|
| 13 |
+
return df[cols].notna().all(axis=1)
|
| 14 |
+
|
| 15 |
|
| 16 |
def get_leaderboard_df(results_path: str, requests_path: str = None, cols: list = None, benchmark_cols: list = None) -> pd.DataFrame:
|
| 17 |
"""Creates a dataframe from all the individual experiment results"""
|
|
|
|
| 19 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 20 |
|
| 21 |
df = pd.DataFrame.from_records(all_data_json)
|
| 22 |
+
df = df.sort_values(by=[AutoEvalColumn.technique.name], ascending=True)
|
| 23 |
+
if cols:
|
| 24 |
+
df = df[[c for c in cols if c in df.columns]].round(decimals=2)
|
| 25 |
|
| 26 |
+
if benchmark_cols:
|
| 27 |
+
df = df[has_no_nan_values(df, benchmark_cols)]
|
| 28 |
return df
|
| 29 |
|
| 30 |
|
| 31 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
| 32 |
+
"""Creates the different dataframes for the evaluation queues"""
|
| 33 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
| 34 |
all_evals = []
|
| 35 |
|
|
|
|
| 39 |
with open(file_path) as fp:
|
| 40 |
data = json.load(fp)
|
| 41 |
|
| 42 |
+
data[EvalQueueColumn.model.name] = data["model"]
|
| 43 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 44 |
|
| 45 |
all_evals.append(data)
|
| 46 |
elif ".md" not in entry:
|
| 47 |
+
sub_entries = [
|
| 48 |
+
e for e in os.listdir(f"{save_path}/{entry}")
|
| 49 |
+
if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")
|
| 50 |
+
]
|
| 51 |
for sub_entry in sub_entries:
|
| 52 |
file_path = os.path.join(save_path, entry, sub_entry)
|
| 53 |
with open(file_path) as fp:
|
| 54 |
data = json.load(fp)
|
| 55 |
|
| 56 |
+
data[EvalQueueColumn.model.name] = data["model"]
|
| 57 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 58 |
all_evals.append(data)
|
| 59 |
|