Spaces:
Runtime error
Runtime error
Commit
·
ed90aae
1
Parent(s):
04dab39
Refactor AutoEvalColumn usage for consistency and clarity; add debug prints for NaN checks in formatting functions
Browse files- app.py +9 -9
- src/display/formatting.py +2 -0
- src/display/utils.py +1 -1
- src/leaderboard/read_evals.py +18 -26
- src/populate.py +2 -7
app.py
CHANGED
|
@@ -62,26 +62,26 @@ def init_leaderboard(dataframe):
|
|
| 62 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 63 |
return Leaderboard(
|
| 64 |
value=dataframe,
|
| 65 |
-
datatype=[c.type for c in fields(AutoEvalColumn)],
|
| 66 |
select_columns=SelectColumns(
|
| 67 |
-
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
| 68 |
-
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
| 69 |
label="Select Columns to Display:",
|
| 70 |
),
|
| 71 |
-
search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
| 72 |
-
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
| 73 |
filter_columns=[
|
| 74 |
-
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 75 |
-
ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
| 76 |
ColumnFilter(
|
| 77 |
-
AutoEvalColumn.params.name,
|
| 78 |
type="slider",
|
| 79 |
min=0.01,
|
| 80 |
max=150,
|
| 81 |
label="Select the number of parameters (B)",
|
| 82 |
),
|
| 83 |
ColumnFilter(
|
| 84 |
-
AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
|
| 85 |
),
|
| 86 |
],
|
| 87 |
bool_checkboxgroup_label="Hide models",
|
|
|
|
| 62 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 63 |
return Leaderboard(
|
| 64 |
value=dataframe,
|
| 65 |
+
datatype=[c.type for c in fields(AutoEvalColumn())],
|
| 66 |
select_columns=SelectColumns(
|
| 67 |
+
default_selection=[c.name for c in fields(AutoEvalColumn()) if c.displayed_by_default],
|
| 68 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn()) if c.never_hidden],
|
| 69 |
label="Select Columns to Display:",
|
| 70 |
),
|
| 71 |
+
search_columns=[AutoEvalColumn().model.name, AutoEvalColumn().license.name],
|
| 72 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn()) if c.hidden],
|
| 73 |
filter_columns=[
|
| 74 |
+
ColumnFilter(AutoEvalColumn().model_type.name, type="checkboxgroup", label="Model types"),
|
| 75 |
+
ColumnFilter(AutoEvalColumn().precision.name, type="checkboxgroup", label="Precision"),
|
| 76 |
ColumnFilter(
|
| 77 |
+
AutoEvalColumn().params.name,
|
| 78 |
type="slider",
|
| 79 |
min=0.01,
|
| 80 |
max=150,
|
| 81 |
label="Select the number of parameters (B)",
|
| 82 |
),
|
| 83 |
ColumnFilter(
|
| 84 |
+
AutoEvalColumn().still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
|
| 85 |
),
|
| 86 |
],
|
| 87 |
bool_checkboxgroup_label="Hide models",
|
src/display/formatting.py
CHANGED
|
@@ -20,6 +20,8 @@ def styled_message(message):
|
|
| 20 |
|
| 21 |
|
| 22 |
def has_no_nan_values(df, columns):
|
|
|
|
|
|
|
| 23 |
return df[columns].notna().all(axis=1)
|
| 24 |
|
| 25 |
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def has_no_nan_values(df, columns):
|
| 23 |
+
print(df.columns)
|
| 24 |
+
print(columns)
|
| 25 |
return df[columns].notna().all(axis=1)
|
| 26 |
|
| 27 |
|
src/display/utils.py
CHANGED
|
@@ -99,7 +99,7 @@ class Precision(Enum):
|
|
| 99 |
return Precision.Unknown
|
| 100 |
|
| 101 |
# Column selection
|
| 102 |
-
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 103 |
|
| 104 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
| 105 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
|
|
|
| 99 |
return Precision.Unknown
|
| 100 |
|
| 101 |
# Column selection
|
| 102 |
+
COLS = [c.name for c in fields(AutoEvalColumn()) if not c.hidden]
|
| 103 |
|
| 104 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
| 105 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -110,20 +110,21 @@ class EvalResult:
|
|
| 110 |
def to_dict(self):
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
|
|
|
| 113 |
data_dict = {
|
| 114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
}
|
| 128 |
|
| 129 |
for task in Tasks:
|
|
@@ -157,19 +158,10 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 157 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
| 158 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 159 |
model_result_filepaths = []
|
| 160 |
-
|
| 161 |
for root, _, files in os.walk(results_path):
|
| 162 |
-
#
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
# Sort the files by date
|
| 167 |
-
try:
|
| 168 |
-
files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
|
| 169 |
-
except dateutil.parser._parser.ParserError:
|
| 170 |
-
files = [files[-1]]
|
| 171 |
-
|
| 172 |
-
for file in files:
|
| 173 |
model_result_filepaths.append(os.path.join(root, file))
|
| 174 |
|
| 175 |
eval_results = {}
|
|
@@ -190,7 +182,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 190 |
try:
|
| 191 |
v.to_dict() # we test if the dict version is complete
|
| 192 |
results.append(v)
|
| 193 |
-
except KeyError: # not all eval values present
|
|
|
|
| 194 |
continue
|
| 195 |
-
|
| 196 |
return results
|
|
|
|
| 110 |
def to_dict(self):
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 113 |
+
AutoEvalColumnInstance = AutoEvalColumn()
|
| 114 |
data_dict = {
|
| 115 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 116 |
+
AutoEvalColumnInstance.precision.name: self.precision.value.name,
|
| 117 |
+
AutoEvalColumnInstance.model_type.name: self.model_type.value.name,
|
| 118 |
+
AutoEvalColumnInstance.model_type_symbol.name: self.model_type.value.symbol,
|
| 119 |
+
AutoEvalColumnInstance.weight_type.name: self.weight_type.value.name,
|
| 120 |
+
AutoEvalColumnInstance.architecture.name: self.architecture,
|
| 121 |
+
AutoEvalColumnInstance.model.name: make_clickable_model(self.full_model),
|
| 122 |
+
AutoEvalColumnInstance.revision.name: self.revision,
|
| 123 |
+
AutoEvalColumnInstance.average.name: average,
|
| 124 |
+
AutoEvalColumnInstance.license.name: self.license,
|
| 125 |
+
AutoEvalColumnInstance.likes.name: self.likes,
|
| 126 |
+
AutoEvalColumnInstance.params.name: self.num_params,
|
| 127 |
+
AutoEvalColumnInstance.still_on_hub.name: self.still_on_hub,
|
| 128 |
}
|
| 129 |
|
| 130 |
for task in Tasks:
|
|
|
|
| 158 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
| 159 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 160 |
model_result_filepaths = []
|
|
|
|
| 161 |
for root, _, files in os.walk(results_path):
|
| 162 |
+
# Only process .json files
|
| 163 |
+
json_files = [f for f in files if f.endswith(".json")]
|
| 164 |
+
for file in json_files:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
model_result_filepaths.append(os.path.join(root, file))
|
| 166 |
|
| 167 |
eval_results = {}
|
|
|
|
| 182 |
try:
|
| 183 |
v.to_dict() # we test if the dict version is complete
|
| 184 |
results.append(v)
|
| 185 |
+
except KeyError as e: # not all eval values present
|
| 186 |
+
print(e)
|
| 187 |
continue
|
|
|
|
| 188 |
return results
|
src/populate.py
CHANGED
|
@@ -12,13 +12,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 12 |
"""Creates a dataframe from all the individual experiment results"""
|
| 13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
-
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
-
print("Columns:", df.columns.tolist())
|
| 18 |
-
|
| 19 |
df = df.sort_values(by=[AutoEvalColumn().average.name], ascending=False)
|
| 20 |
df = df[cols].round(decimals=2)
|
| 21 |
-
|
| 22 |
# filter out if any of the benchmarks have not been produced
|
| 23 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
| 24 |
return df
|
|
@@ -39,14 +35,13 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
| 39 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 40 |
|
| 41 |
all_evals.append(data)
|
| 42 |
-
elif ".md" not in entry:
|
| 43 |
# this is a folder
|
| 44 |
-
sub_entries = [e for e in os.listdir(
|
| 45 |
for sub_entry in sub_entries:
|
| 46 |
file_path = os.path.join(save_path, entry, sub_entry)
|
| 47 |
with open(file_path) as fp:
|
| 48 |
data = json.load(fp)
|
| 49 |
-
|
| 50 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
| 51 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 52 |
all_evals.append(data)
|
|
|
|
| 12 |
"""Creates a dataframe from all the individual experiment results"""
|
| 13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
|
|
|
| 15 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
|
|
|
|
| 16 |
df = df.sort_values(by=[AutoEvalColumn().average.name], ascending=False)
|
| 17 |
df = df[cols].round(decimals=2)
|
|
|
|
| 18 |
# filter out if any of the benchmarks have not been produced
|
| 19 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
| 20 |
return df
|
|
|
|
| 35 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 36 |
|
| 37 |
all_evals.append(data)
|
| 38 |
+
elif ".md" not in entry and os.path.isdir(os.path.join(save_path, entry)):
|
| 39 |
# this is a folder
|
| 40 |
+
sub_entries = [e for e in os.listdir(os.path.join(save_path, entry)) if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
|
| 41 |
for sub_entry in sub_entries:
|
| 42 |
file_path = os.path.join(save_path, entry, sub_entry)
|
| 43 |
with open(file_path) as fp:
|
| 44 |
data = json.load(fp)
|
|
|
|
| 45 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
| 46 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
| 47 |
all_evals.append(data)
|