dgx-019 commited on
Commit
b090476
·
1 Parent(s): a14f2ee
Files changed (2) hide show
  1. app.py +12 -1
  2. src/display/utils.py +66 -2
app.py CHANGED
@@ -23,7 +23,8 @@ from src.display.utils import (
23
  ModelType,
24
  fields,
25
  WeightType,
26
- Precision
 
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -89,6 +90,11 @@ def init_leaderboard(dataframe):
89
  interactive=False,
90
  )
91
 
 
 
 
 
 
92
 
93
  demo = gr.Blocks(css=custom_css)
94
  with demo:
@@ -98,6 +104,11 @@ with demo:
98
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
99
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
100
  leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
 
 
101
 
102
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
103
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
23
  ModelType,
24
  fields,
25
  WeightType,
26
+ Precision,
27
+ get_category_columns
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
90
  interactive=False,
91
  )
92
 
93
+ def update_visible_columns(selected_categories):
94
+ """根据选中的类别更新可见列"""
95
+ always_visible = ['model_type_symbol', 'model', 'average']
96
+ category_columns = get_category_columns(selected_categories)
97
+ return always_visible + category_columns
98
 
99
  demo = gr.Blocks(css=custom_css)
100
  with demo:
 
104
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
105
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
106
  leaderboard = init_leaderboard(LEADERBOARD_DF)
107
+ leaderboard.filter_columns[0].change(
108
+ fn=update_visible_columns,
109
+ inputs=[leaderboard.filter_columns[0]],
110
+ outputs=[leaderboard.select_columns]
111
+ )
112
 
113
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
114
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
src/display/utils.py CHANGED
@@ -19,6 +19,7 @@ class ColumnContent:
19
  displayed_by_default: bool
20
  hidden: bool = False
21
  never_hidden: bool = False
 
22
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
@@ -32,12 +33,16 @@ auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average
32
  for category in TaskCategory:
33
  category_tasks = [task for task in Tasks if task.value.category == category]
34
  if category_tasks:
35
- # 添加该类别下的所有任务
36
  for task in category_tasks:
37
  auto_eval_column_dict.append([
38
  task.name, # 使用原始任务名作为列名
39
  ColumnContent,
40
- ColumnContent(task.value.col_name, "number", False) # 使用原始显示名
 
 
 
 
 
41
  ])
42
 
43
  # Model information
@@ -134,3 +139,62 @@ CATEGORY_GROUPS = {
134
  for category in TaskCategory
135
  }
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  displayed_by_default: bool
20
  hidden: bool = False
21
  never_hidden: bool = False
22
+ category: str = None # 新增类别字段
23
 
24
  ## Leaderboard columns
25
  auto_eval_column_dict = []
 
33
  for category in TaskCategory:
34
  category_tasks = [task for task in Tasks if task.value.category == category]
35
  if category_tasks:
 
36
  for task in category_tasks:
37
  auto_eval_column_dict.append([
38
  task.name, # 使用原始任务名作为列名
39
  ColumnContent,
40
+ ColumnContent(
41
+ task.value.col_name, # 使用原始显示名
42
+ "number",
43
+ False,
44
+ category=category.value # 添加类别信息
45
+ )
46
  ])
47
 
48
  # Model information
 
139
  for category in TaskCategory
140
  }
141
 
142
+ # 添加用于分组显示的类别过滤器
143
+ CATEGORY_FILTERS = [
144
+ {
145
+ 'name': category.value,
146
+ 'columns': [task.name for task in Tasks if task.value.category == category]
147
+ }
148
+ for category in TaskCategory
149
+ ]
150
+
151
+ def get_category_columns(categories):
152
+ """获取指定类别的所有列名"""
153
+ columns = []
154
+ for filter_item in CATEGORY_FILTERS:
155
+ if filter_item['name'] in categories:
156
+ columns.extend(filter_item['columns'])
157
+ return columns
158
+
159
+ # 修改初始化 Leaderboard 的函数
160
+ def init_leaderboard(dataframe):
161
+ if dataframe is None or dataframe.empty:
162
+ raise ValueError("Leaderboard DataFrame is empty or None.")
163
+
164
+ # 添加类别过滤器
165
+ category_filter = ColumnFilter(
166
+ "category",
167
+ type="checkboxgroup",
168
+ label="Categories",
169
+ options=[cat.value for cat in TaskCategory]
170
+ )
171
+
172
+ return Leaderboard(
173
+ value=dataframe,
174
+ datatype=[c.type for c in fields(AutoEvalColumn)],
175
+ select_columns=SelectColumns(
176
+ default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
177
+ cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
178
+ label="Select Columns to Display:",
179
+ ),
180
+ search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
181
+ hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
182
+ filter_columns=[
183
+ category_filter, # 添加类别过滤器
184
+ ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
185
+ ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
186
+ ColumnFilter(
187
+ AutoEvalColumn.params.name,
188
+ type="slider",
189
+ min=0.01,
190
+ max=150,
191
+ label="Select the number of parameters (B)",
192
+ ),
193
+ ColumnFilter(
194
+ AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
195
+ ),
196
+ ],
197
+ bool_checkboxgroup_label="Hide models",
198
+ interactive=False,
199
+ )
200
+