Victor Dieguez commited on
Commit
654c990
·
1 Parent(s): 0235b45

Removing envs variables

Browse files
Files changed (1) hide show
  1. src/populate.py +5 -125
src/populate.py CHANGED
@@ -1,134 +1,13 @@
1
  import json
2
  import os
 
3
 
4
  import pandas as pd
5
 
6
- from src.display.formatting import has_no_nan_values, make_clickable_model
7
- from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
- from src.leaderboard.read_evals import get_raw_eval_results
9
- #from src.display.utils import has_no_nan_values
10
-
11
- '''
12
- def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
- """Creates a dataframe from all the individual experiment results"""
14
- raw_data = get_raw_eval_results(results_path, requests_path)
15
- all_data_json = [v.to_dict() for v in raw_data]
16
-
17
- df = pd.DataFrame.from_records(all_data_json)
18
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
19
- df = df[cols].round(decimals=2)
20
-
21
- # filter out if any of the benchmarks have not been produced
22
- df = df[has_no_nan_values(df, benchmark_cols)]
23
- return df
24
-
25
- def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
26
- eval_results = get_raw_eval_results(eval_results_path, eval_requests_path)
27
- rows = [e.to_dict() for e in eval_results]
28
-
29
- # If nothing loaded, just return an empty DF (prevents KeyError)
30
- if not rows:
31
- print("No eval results found – returning empty leaderboard.")
32
- return pd.DataFrame()
33
-
34
- df = pd.DataFrame(rows)
35
-
36
- # 1) Sort only if the Average column exists
37
- avg_col = AutoEvalColumn.average.name # usually "Average ⬆️"
38
- if avg_col in df.columns:
39
- df = df.sort_values(by=[avg_col], ascending=False)
40
- else:
41
- print("Average column not found in dataframe. Columns:", df.columns)
42
-
43
- # 2) Only keep columns that actually exist in the DF
44
- existing_cols = [c for c in cols if c in df.columns]
45
- df = df[existing_cols].round(decimals=2)
46
-
47
- # 3) Filter models that miss some benchmarks, but only for existing benchmarks
48
- existing_benchmarks = [c for c in benchmark_cols if c in df.columns]
49
- if existing_benchmarks:
50
- df = df[has_no_nan_values(df, existing_benchmarks)]
51
-
52
- return df
53
 
54
 
55
- def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
56
- """
57
- Build the leaderboard dataframe from the JSON eval results.
58
-
59
- - Does NOT assume that an 'Average ⬆️' column already exists.
60
- - Does NOT depend on has_no_nan_values.
61
- - Is tolerant to missing columns.
62
- """
63
- # 1) Load EvalResult objects from results (and requests, if your get_raw_eval_results uses it)
64
- eval_results = get_raw_eval_results(eval_results_path, eval_requests_path)
65
- rows = [e.to_dict() for e in eval_results]
66
-
67
- # If nothing loaded, return an empty DF with the expected column order
68
- if not rows:
69
- print("No eval results found – returning empty leaderboard.")
70
- return pd.DataFrame(columns=cols)
71
-
72
- df = pd.DataFrame(rows)
73
-
74
- # 2) Sort by Average if that column exists
75
- avg_col = AutoEvalColumn.average.name # usually "Average ⬆️"
76
- if avg_col in df.columns:
77
- df = df.sort_values(by=[avg_col], ascending=False)
78
- else:
79
- print(f"Average column '{avg_col}' not found. Available columns:", list(df.columns))
80
-
81
- # 3) Keep only the columns that actually exist in the dataframe
82
- existing_cols = [c for c in cols if c in df.columns]
83
- df = df[existing_cols]
84
-
85
- # 4) Round numeric columns to 2 decimals
86
- num_cols = df.select_dtypes(include="number").columns
87
- if len(num_cols) > 0:
88
- df[num_cols] = df[num_cols].round(2)
89
-
90
- # 5) If you *want* to drop rows with NaNs in benchmark columns, do it directly with dropna
91
- existing_benchmarks = [c for c in benchmark_cols if c in df.columns]
92
- if existing_benchmarks:
93
- df = df.dropna(subset=existing_benchmarks, how="any")
94
-
95
- return df
96
-
97
- def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
98
- """Creates the different dataframes for the evaluation queues requestes"""
99
- entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
100
- all_evals = []
101
-
102
- for entry in entries:
103
- if ".json" in entry:
104
- file_path = os.path.join(save_path, entry)
105
- with open(file_path) as fp:
106
- data = json.load(fp)
107
-
108
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
109
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
110
-
111
- all_evals.append(data)
112
- elif ".md" not in entry:
113
- # this is a folder
114
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
115
- for sub_entry in sub_entries:
116
- file_path = os.path.join(save_path, entry, sub_entry)
117
- with open(file_path) as fp:
118
- data = json.load(fp)
119
-
120
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
121
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
122
- all_evals.append(data)
123
-
124
- pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
125
- running_list = [e for e in all_evals if e["status"] == "RUNNING"]
126
- finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
127
- df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
128
- df_running = pd.DataFrame.from_records(running_list, columns=cols)
129
- df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
130
- return df_finished[cols], df_running[cols], df_pending[cols]
131
- '''
132
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
133
  """
134
  Build the leaderboard dataframe directly from JSON files in eval_results_path.
@@ -239,6 +118,7 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
239
 
240
  return df
241
 
 
242
  def get_evaluation_queue_df(save_path: str, cols: list):
243
  """
244
  Stubbed evaluation queue.
@@ -248,7 +128,6 @@ def get_evaluation_queue_df(save_path: str, cols: list):
248
  - return three empty dataframes (finished, running, pending)
249
  with the expected columns.
250
  """
251
- # Make sure the folder exists so nothing crashes on missing dir
252
  os.makedirs(save_path, exist_ok=True)
253
 
254
  empty_df = pd.DataFrame(columns=cols)
@@ -256,3 +135,4 @@ def get_evaluation_queue_df(save_path: str, cols: list):
256
  # The order here must match how app.py unpacks the result:
257
  # finished_df, running_df, pending_df = get_evaluation_queue_df(...)
258
  return empty_df, empty_df.copy(), empty_df.copy()
 
 
1
  import json
2
  import os
3
+ from dataclasses import fields
4
 
5
  import pandas as pd
6
 
7
+ from src.display.formatting import make_clickable_model
8
+ from src.display.utils import AutoEvalColumn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
12
  """
13
  Build the leaderboard dataframe directly from JSON files in eval_results_path.
 
118
 
119
  return df
120
 
121
+
122
  def get_evaluation_queue_df(save_path: str, cols: list):
123
  """
124
  Stubbed evaluation queue.
 
128
  - return three empty dataframes (finished, running, pending)
129
  with the expected columns.
130
  """
 
131
  os.makedirs(save_path, exist_ok=True)
132
 
133
  empty_df = pd.DataFrame(columns=cols)
 
135
  # The order here must match how app.py unpacks the result:
136
  # finished_df, running_df, pending_df = get_evaluation_queue_df(...)
137
  return empty_df, empty_df.copy(), empty_df.copy()
138
+