Victor Dieguez commited on
Commit
781d4b0
·
1 Parent(s): fd29588

Removing envs variables

Browse files
Files changed (1) hide show
  1. src/populate.py +112 -2
src/populate.py CHANGED
@@ -50,7 +50,7 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
50
  df = df[has_no_nan_values(df, existing_benchmarks)]
51
 
52
  return df
53
- '''
54
 
55
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
56
  """
@@ -93,7 +93,7 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
93
  df = df.dropna(subset=existing_benchmarks, how="any")
94
 
95
  return df
96
- '''
97
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
98
  """Creates the different dataframes for the evaluation queues requestes"""
99
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
@@ -129,6 +129,116 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
129
  df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
130
  return df_finished[cols], df_running[cols], df_pending[cols]
131
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  def get_evaluation_queue_df(save_path: str, cols: list):
133
  """
134
  Stubbed evaluation queue.
 
50
  df = df[has_no_nan_values(df, existing_benchmarks)]
51
 
52
  return df
53
+
54
 
55
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
56
  """
 
93
  df = df.dropna(subset=existing_benchmarks, how="any")
94
 
95
  return df
96
+
97
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
98
  """Creates the different dataframes for the evaluation queues requestes"""
99
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
 
129
  df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
130
  return df_finished[cols], df_running[cols], df_pending[cols]
131
  '''
132
+ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
133
+ """
134
+ Build the leaderboard dataframe directly from JSON files in eval_results_path.
135
+
136
+ We completely bypass get_raw_eval_results because our JSONs are already in
137
+ a simple schema:
138
+ - config.model_name
139
+ - results[benchmark_name]["acc"] in [0, 1]
140
+
141
+ We:
142
+ - create a row for each *.json
143
+ - fill all AutoEvalColumn fields with None, then overwrite the ones we know:
144
+ * model -> clickable HF link
145
+ * Average ⬆️ -> mean of all metrics (in percentage)
146
+ * each benchmark col in `benchmark_cols` -> metric * 100
147
+ """
148
+ # 1) Collect all .json files under eval_results_path
149
+ if not os.path.isdir(eval_results_path):
150
+ print(f"Results path '{eval_results_path}' does not exist.")
151
+ return pd.DataFrame(columns=cols)
152
+
153
+ json_files = [
154
+ f for f in os.listdir(eval_results_path)
155
+ if f.endswith(".json") and not f.startswith(".")
156
+ ]
157
+
158
+ if not json_files:
159
+ print(f"No JSON result files found in '{eval_results_path}'.")
160
+ return pd.DataFrame(columns=cols)
161
+
162
+ rows = []
163
+
164
+ for fname in json_files:
165
+ fpath = os.path.join(eval_results_path, fname)
166
+ try:
167
+ with open(fpath, "r", encoding="utf-8") as fp:
168
+ data = json.load(fp)
169
+ except Exception as e:
170
+ print(f"Failed to read '{fpath}': {e}")
171
+ continue
172
+
173
+ # Start with all columns set to None so the DF matches AutoEvalColumn
174
+ row = {field.name: None for field in fields(AutoEvalColumn)}
175
+
176
+ # ---- model column ----
177
+ config = data.get("config", {})
178
+ model_id = (
179
+ config.get("model_name")
180
+ or config.get("model_id")
181
+ or config.get("model") # just in case
182
+ )
183
+
184
+ if model_id is None:
185
+ # skip weird files without model info
186
+ print(f"Skipping '{fname}' – no model_name in config.")
187
+ continue
188
+
189
+ # Fill the "model" column (clickable markdown link)
190
+ row[AutoEvalColumn.model.name] = make_clickable_model(model_id)
191
+
192
+ # ---- metrics ----
193
+ results = data.get("results", {})
194
+ scores = []
195
+
196
+ for bench in benchmark_cols:
197
+ bench_result = results.get(bench, None)
198
+ if not isinstance(bench_result, dict):
199
+ continue
200
+
201
+ # We agreed on metric key "acc" in your JSONs
202
+ val = bench_result.get("acc", None)
203
+ if val is None:
204
+ continue
205
+
206
+ # Convert to percentage (e.g. 0.747 -> 74.7)
207
+ score = float(val) * 100.0
208
+ row[bench] = score
209
+ scores.append(score)
210
+
211
+ # ---- Average ⬆️ ----
212
+ avg_col = AutoEvalColumn.average.name
213
+ if scores:
214
+ row[avg_col] = sum(scores) / len(scores)
215
+ else:
216
+ row[avg_col] = None
217
+
218
+ rows.append(row)
219
+
220
+ if not rows:
221
+ print("No valid evaluation rows constructed – returning empty leaderboard.")
222
+ return pd.DataFrame(columns=cols)
223
+
224
+ df = pd.DataFrame(rows)
225
+
226
+ # Keep column ordering consistent with COLS
227
+ existing_cols = [c for c in cols if c in df.columns]
228
+ df = df[existing_cols]
229
+
230
+ # Round numeric columns
231
+ num_cols = df.select_dtypes(include="number").columns
232
+ if len(num_cols) > 0:
233
+ df[num_cols] = df[num_cols].round(2)
234
+
235
+ # Optional: drop rows with NaNs in benchmark columns
236
+ existing_benchmarks = [c for c in benchmark_cols if c in df.columns]
237
+ if existing_benchmarks:
238
+ df = df.dropna(subset=existing_benchmarks, how="any")
239
+
240
+ return df
241
+ '''
242
  def get_evaluation_queue_df(save_path: str, cols: list):
243
  """
244
  Stubbed evaluation queue.