hamzabouajila commited on
Commit
ed90aae
·
1 Parent(s): 04dab39

Refactor AutoEvalColumn usage for consistency and clarity; add debug prints for NaN checks in formatting functions

Browse files
app.py CHANGED
@@ -62,26 +62,26 @@ def init_leaderboard(dataframe):
62
  raise ValueError("Leaderboard DataFrame is empty or None.")
63
  return Leaderboard(
64
  value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
  select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
  label="Select Columns to Display:",
70
  ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
  filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
  ColumnFilter(
77
- AutoEvalColumn.params.name,
78
  type="slider",
79
  min=0.01,
80
  max=150,
81
  label="Select the number of parameters (B)",
82
  ),
83
  ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
  ),
86
  ],
87
  bool_checkboxgroup_label="Hide models",
 
62
  raise ValueError("Leaderboard DataFrame is empty or None.")
63
  return Leaderboard(
64
  value=dataframe,
65
+ datatype=[c.type for c in fields(AutoEvalColumn())],
66
  select_columns=SelectColumns(
67
+ default_selection=[c.name for c in fields(AutoEvalColumn()) if c.displayed_by_default],
68
+ cant_deselect=[c.name for c in fields(AutoEvalColumn()) if c.never_hidden],
69
  label="Select Columns to Display:",
70
  ),
71
+ search_columns=[AutoEvalColumn().model.name, AutoEvalColumn().license.name],
72
+ hide_columns=[c.name for c in fields(AutoEvalColumn()) if c.hidden],
73
  filter_columns=[
74
+ ColumnFilter(AutoEvalColumn().model_type.name, type="checkboxgroup", label="Model types"),
75
+ ColumnFilter(AutoEvalColumn().precision.name, type="checkboxgroup", label="Precision"),
76
  ColumnFilter(
77
+ AutoEvalColumn().params.name,
78
  type="slider",
79
  min=0.01,
80
  max=150,
81
  label="Select the number of parameters (B)",
82
  ),
83
  ColumnFilter(
84
+ AutoEvalColumn().still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
  ),
86
  ],
87
  bool_checkboxgroup_label="Hide models",
src/display/formatting.py CHANGED
@@ -20,6 +20,8 @@ def styled_message(message):
20
 
21
 
22
  def has_no_nan_values(df, columns):
 
 
23
  return df[columns].notna().all(axis=1)
24
 
25
 
 
20
 
21
 
22
  def has_no_nan_values(df, columns):
23
+ print(df.columns)
24
+ print(columns)
25
  return df[columns].notna().all(axis=1)
26
 
27
 
src/display/utils.py CHANGED
@@ -99,7 +99,7 @@ class Precision(Enum):
99
  return Precision.Unknown
100
 
101
  # Column selection
102
- COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
103
 
104
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
105
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
 
99
  return Precision.Unknown
100
 
101
  # Column selection
102
+ COLS = [c.name for c in fields(AutoEvalColumn()) if not c.hidden]
103
 
104
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
105
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
src/leaderboard/read_evals.py CHANGED
@@ -110,20 +110,21 @@ class EvalResult:
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
- AutoEvalColumn.precision.name: self.precision.value.name,
116
- AutoEvalColumn.model_type.name: self.model_type.value.name,
117
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
118
- AutoEvalColumn.weight_type.name: self.weight_type.value.name,
119
- AutoEvalColumn.architecture.name: self.architecture,
120
- AutoEvalColumn.model.name: make_clickable_model(self.full_model),
121
- AutoEvalColumn.revision.name: self.revision,
122
- AutoEvalColumn.average.name: average,
123
- AutoEvalColumn.license.name: self.license,
124
- AutoEvalColumn.likes.name: self.likes,
125
- AutoEvalColumn.params.name: self.num_params,
126
- AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
128
 
129
  for task in Tasks:
@@ -157,19 +158,10 @@ def get_request_file_for_model(requests_path, model_name, precision):
157
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
158
  """From the path of the results folder root, extract all needed info for results"""
159
  model_result_filepaths = []
160
-
161
  for root, _, files in os.walk(results_path):
162
- # We should only have json files in model results
163
- if len(files) == 0 or any([not f.endswith(".json") for f in files]):
164
- continue
165
-
166
- # Sort the files by date
167
- try:
168
- files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
169
- except dateutil.parser._parser.ParserError:
170
- files = [files[-1]]
171
-
172
- for file in files:
173
  model_result_filepaths.append(os.path.join(root, file))
174
 
175
  eval_results = {}
@@ -190,7 +182,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
190
  try:
191
  v.to_dict() # we test if the dict version is complete
192
  results.append(v)
193
- except KeyError: # not all eval values present
 
194
  continue
195
-
196
  return results
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
+ AutoEvalColumnInstance = AutoEvalColumn()
114
  data_dict = {
115
  "eval_name": self.eval_name, # not a column, just a save name,
116
+ AutoEvalColumnInstance.precision.name: self.precision.value.name,
117
+ AutoEvalColumnInstance.model_type.name: self.model_type.value.name,
118
+ AutoEvalColumnInstance.model_type_symbol.name: self.model_type.value.symbol,
119
+ AutoEvalColumnInstance.weight_type.name: self.weight_type.value.name,
120
+ AutoEvalColumnInstance.architecture.name: self.architecture,
121
+ AutoEvalColumnInstance.model.name: make_clickable_model(self.full_model),
122
+ AutoEvalColumnInstance.revision.name: self.revision,
123
+ AutoEvalColumnInstance.average.name: average,
124
+ AutoEvalColumnInstance.license.name: self.license,
125
+ AutoEvalColumnInstance.likes.name: self.likes,
126
+ AutoEvalColumnInstance.params.name: self.num_params,
127
+ AutoEvalColumnInstance.still_on_hub.name: self.still_on_hub,
128
  }
129
 
130
  for task in Tasks:
 
158
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
159
  """From the path of the results folder root, extract all needed info for results"""
160
  model_result_filepaths = []
 
161
  for root, _, files in os.walk(results_path):
162
+ # Only process .json files
163
+ json_files = [f for f in files if f.endswith(".json")]
164
+ for file in json_files:
 
 
 
 
 
 
 
 
165
  model_result_filepaths.append(os.path.join(root, file))
166
 
167
  eval_results = {}
 
182
  try:
183
  v.to_dict() # we test if the dict version is complete
184
  results.append(v)
185
+ except KeyError as e: # not all eval values present
186
+ print(e)
187
  continue
 
188
  return results
src/populate.py CHANGED
@@ -12,13 +12,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
-
16
  df = pd.DataFrame.from_records(all_data_json)
17
- print("Columns:", df.columns.tolist())
18
-
19
  df = df.sort_values(by=[AutoEvalColumn().average.name], ascending=False)
20
  df = df[cols].round(decimals=2)
21
-
22
  # filter out if any of the benchmarks have not been produced
23
  df = df[has_no_nan_values(df, benchmark_cols)]
24
  return df
@@ -39,14 +35,13 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
39
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
40
 
41
  all_evals.append(data)
42
- elif ".md" not in entry:
43
  # this is a folder
44
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
45
  for sub_entry in sub_entries:
46
  file_path = os.path.join(save_path, entry, sub_entry)
47
  with open(file_path) as fp:
48
  data = json.load(fp)
49
-
50
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
51
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
52
  all_evals.append(data)
 
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
 
15
  df = pd.DataFrame.from_records(all_data_json)
 
 
16
  df = df.sort_values(by=[AutoEvalColumn().average.name], ascending=False)
17
  df = df[cols].round(decimals=2)
 
18
  # filter out if any of the benchmarks have not been produced
19
  df = df[has_no_nan_values(df, benchmark_cols)]
20
  return df
 
35
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
36
 
37
  all_evals.append(data)
38
+ elif ".md" not in entry and os.path.isdir(os.path.join(save_path, entry)):
39
  # this is a folder
40
+ sub_entries = [e for e in os.listdir(os.path.join(save_path, entry)) if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
41
  for sub_entry in sub_entries:
42
  file_path = os.path.join(save_path, entry, sub_entry)
43
  with open(file_path) as fp:
44
  data = json.load(fp)
 
45
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
46
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
47
  all_evals.append(data)