Nikhil Raghavan commited on
Commit
304343a
·
1 Parent(s): d400470
Files changed (3) hide show
  1. src/display/utils.py +2 -1
  2. src/envs.py +2 -0
  3. src/populate.py +24 -11
src/display/utils.py CHANGED
@@ -28,7 +28,7 @@ for task in Tasks:
28
  auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda t=_task: ColumnContent(t.value.col_name, "number", True))])
29
 
30
  # We use make dataclass to dynamically fill the scores from Tasks
31
- AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
32
 
33
  ## For the queue columns in the submission tab
34
  @dataclass(frozen=True)
@@ -80,6 +80,7 @@ class Precision(Enum):
80
  bfloat16 = ModelDetails("bfloat16")
81
  Unknown = ModelDetails("?")
82
 
 
83
  def from_str(precision):
84
  if precision in ["torch.float16", "float16"]:
85
  return Precision.float16
 
28
  auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda t=_task: ColumnContent(t.value.col_name, "number", True))])
29
 
30
  # We use make dataclass to dynamically fill the scores from Tasks
31
+ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)()
32
 
33
  ## For the queue columns in the submission tab
34
  @dataclass(frozen=True)
 
80
  bfloat16 = ModelDetails("bfloat16")
81
  Unknown = ModelDetails("?")
82
 
83
+ @staticmethod
84
  def from_str(precision):
85
  if precision in ["torch.float16", "float16"]:
86
  return Precision.float16
src/envs.py CHANGED
@@ -17,5 +17,7 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
17
 
18
  # Local caches
19
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 
 
20
 
21
  API = HfApi(token=TOKEN)
 
17
 
18
  # Local caches
19
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
20
+ EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
+ QUEUE_REPO = f"{OWNER}/requests"
22
 
23
  API = HfApi(token=TOKEN)
src/populate.py CHANGED
@@ -1,7 +1,17 @@
1
- from src.leaderboard.read_evals import get_raw_eval_results
 
 
2
  import pandas as pd
3
 
4
- """ calls get_raw_eval_results function from our read_evals.py file to get the DataFrame"""
 
 
 
 
 
 
 
 
5
 
6
  def get_leaderboard_df(results_path: str, requests_path: str = None, cols: list = None, benchmark_cols: list = None) -> pd.DataFrame:
7
  """Creates a dataframe from all the individual experiment results"""
@@ -9,16 +19,17 @@ def get_leaderboard_df(results_path: str, requests_path: str = None, cols: list
9
  all_data_json = [v.to_dict() for v in raw_data]
10
 
11
  df = pd.DataFrame.from_records(all_data_json)
12
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
13
- df = df[cols].round(decimals=2)
 
14
 
15
- # filter out if any of the benchmarks have not been produced
16
- df = df[has_no_nan_values(df, benchmark_cols)]
17
  return df
18
 
19
 
20
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
21
- """Creates the different dataframes for the evaluation queues requestes"""
22
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
23
  all_evals = []
24
 
@@ -28,19 +39,21 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
28
  with open(file_path) as fp:
29
  data = json.load(fp)
30
 
31
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
32
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
33
 
34
  all_evals.append(data)
35
  elif ".md" not in entry:
36
- # this is a folder
37
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
 
 
38
  for sub_entry in sub_entries:
39
  file_path = os.path.join(save_path, entry, sub_entry)
40
  with open(file_path) as fp:
41
  data = json.load(fp)
42
 
43
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
44
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
45
  all_evals.append(data)
46
 
 
1
+ import json
2
+ import os
3
+
4
  import pandas as pd
5
 
6
+ from src.display.utils import AutoEvalColumn, EvalQueueColumn
7
+ from src.leaderboard.read_evals import get_raw_eval_results
8
+
9
+
10
+ def has_no_nan_values(df, cols):
11
+ if not cols:
12
+ return [True] * len(df)
13
+ return df[cols].notna().all(axis=1)
14
+
15
 
16
  def get_leaderboard_df(results_path: str, requests_path: str = None, cols: list = None, benchmark_cols: list = None) -> pd.DataFrame:
17
  """Creates a dataframe from all the individual experiment results"""
 
19
  all_data_json = [v.to_dict() for v in raw_data]
20
 
21
  df = pd.DataFrame.from_records(all_data_json)
22
+ df = df.sort_values(by=[AutoEvalColumn.technique.name], ascending=True)
23
+ if cols:
24
+ df = df[[c for c in cols if c in df.columns]].round(decimals=2)
25
 
26
+ if benchmark_cols:
27
+ df = df[has_no_nan_values(df, benchmark_cols)]
28
  return df
29
 
30
 
31
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
32
+ """Creates the different dataframes for the evaluation queues"""
33
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
34
  all_evals = []
35
 
 
39
  with open(file_path) as fp:
40
  data = json.load(fp)
41
 
42
+ data[EvalQueueColumn.model.name] = data["model"]
43
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
44
 
45
  all_evals.append(data)
46
  elif ".md" not in entry:
47
+ sub_entries = [
48
+ e for e in os.listdir(f"{save_path}/{entry}")
49
+ if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")
50
+ ]
51
  for sub_entry in sub_entries:
52
  file_path = os.path.join(save_path, entry, sub_entry)
53
  with open(file_path) as fp:
54
  data = json.load(fp)
55
 
56
+ data[EvalQueueColumn.model.name] = data["model"]
57
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
58
  all_evals.append(data)
59