Victor Dieguez commited on
Commit
2e39b31
·
1 Parent(s): 9edb513

Removing envs variables

Browse files
Files changed (1) hide show
  1. src/populate.py +73 -1
src/populate.py CHANGED
@@ -6,8 +6,9 @@ import pandas as pd
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
 
9
 
10
-
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
@@ -21,6 +22,77 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
21
  df = df[has_no_nan_values(df, benchmark_cols)]
22
  return df
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
26
  """Creates the different dataframes for the evaluation queues requestes"""
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
+ #from src.display.utils import has_no_nan_values
10
 
11
+ '''
12
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
  """Creates a dataframe from all the individual experiment results"""
14
  raw_data = get_raw_eval_results(results_path, requests_path)
 
22
  df = df[has_no_nan_values(df, benchmark_cols)]
23
  return df
24
 
25
+ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
26
+ eval_results = get_raw_eval_results(eval_results_path, eval_requests_path)
27
+ rows = [e.to_dict() for e in eval_results]
28
+
29
+ # If nothing loaded, just return an empty DF (prevents KeyError)
30
+ if not rows:
31
+ print("No eval results found – returning empty leaderboard.")
32
+ return pd.DataFrame()
33
+
34
+ df = pd.DataFrame(rows)
35
+
36
+ # 1) Sort only if the Average column exists
37
+ avg_col = AutoEvalColumn.average.name # usually "Average ⬆️"
38
+ if avg_col in df.columns:
39
+ df = df.sort_values(by=[avg_col], ascending=False)
40
+ else:
41
+ print("Average column not found in dataframe. Columns:", df.columns)
42
+
43
+ # 2) Only keep columns that actually exist in the DF
44
+ existing_cols = [c for c in cols if c in df.columns]
45
+ df = df[existing_cols].round(decimals=2)
46
+
47
+ # 3) Filter models that miss some benchmarks, but only for existing benchmarks
48
+ existing_benchmarks = [c for c in benchmark_cols if c in df.columns]
49
+ if existing_benchmarks:
50
+ df = df[has_no_nan_values(df, existing_benchmarks)]
51
+
52
+ return df
53
+ '''
54
+
55
+ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
56
+ """
57
+ Build the leaderboard dataframe from the JSON eval results.
58
+
59
+ - Does NOT assume that an 'Average ⬆️' column already exists.
60
+ - Does NOT depend on has_no_nan_values.
61
+ - Is tolerant to missing columns.
62
+ """
63
+ # 1) Load EvalResult objects from results (and requests, if your get_raw_eval_results uses it)
64
+ eval_results = get_raw_eval_results(eval_results_path, eval_requests_path)
65
+ rows = [e.to_dict() for e in eval_results]
66
+
67
+ # If nothing loaded, return an empty DF with the expected column order
68
+ if not rows:
69
+ print("No eval results found – returning empty leaderboard.")
70
+ return pd.DataFrame(columns=cols)
71
+
72
+ df = pd.DataFrame(rows)
73
+
74
+ # 2) Sort by Average if that column exists
75
+ avg_col = AutoEvalColumn.average.name # usually "Average ⬆️"
76
+ if avg_col in df.columns:
77
+ df = df.sort_values(by=[avg_col], ascending=False)
78
+ else:
79
+ print(f"Average column '{avg_col}' not found. Available columns:", list(df.columns))
80
+
81
+ # 3) Keep only the columns that actually exist in the dataframe
82
+ existing_cols = [c for c in cols if c in df.columns]
83
+ df = df[existing_cols]
84
+
85
+ # 4) Round numeric columns to 2 decimals
86
+ num_cols = df.select_dtypes(include="number").columns
87
+ if len(num_cols) > 0:
88
+ df[num_cols] = df[num_cols].round(2)
89
+
90
+ # 5) If you *want* to drop rows with NaNs in benchmark columns, do it directly with dropna
91
+ existing_benchmarks = [c for c in benchmark_cols if c in df.columns]
92
+ if existing_benchmarks:
93
+ df = df.dropna(subset=existing_benchmarks, how="any")
94
+
95
+ return df
96
 
97
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
98
  """Creates the different dataframes for the evaluation queues requestes"""