dgx-019 commited on
Commit
ddd6462
·
1 Parent(s): b090476
Files changed (2) hide show
  1. app.py +2 -14
  2. src/display/utils.py +5 -95
app.py CHANGED
@@ -4,8 +4,7 @@ import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
 
7
-
8
- from src.about import (
9
  CITATION_BUTTON_LABEL,
10
  CITATION_BUTTON_TEXT,
11
  EVALUATION_QUEUE_TEXT,
@@ -23,8 +22,7 @@ from src.display.utils import (
23
  ModelType,
24
  fields,
25
  WeightType,
26
- Precision,
27
- get_category_columns
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -90,11 +88,6 @@ def init_leaderboard(dataframe):
90
  interactive=False,
91
  )
92
 
93
- def update_visible_columns(selected_categories):
94
- """根据选中的类别更新可见列"""
95
- always_visible = ['model_type_symbol', 'model', 'average']
96
- category_columns = get_category_columns(selected_categories)
97
- return always_visible + category_columns
98
 
99
  demo = gr.Blocks(css=custom_css)
100
  with demo:
@@ -104,11 +97,6 @@ with demo:
104
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
105
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
106
  leaderboard = init_leaderboard(LEADERBOARD_DF)
107
- leaderboard.filter_columns[0].change(
108
- fn=update_visible_columns,
109
- inputs=[leaderboard.filter_columns[0]],
110
- outputs=[leaderboard.select_columns]
111
- )
112
 
113
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
114
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
 
7
+ from chuxue_leaderboard.Legal_Leaderboard.src.about_ori import (
 
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
10
  EVALUATION_QUEUE_TEXT,
 
22
  ModelType,
23
  fields,
24
  WeightType,
25
+ Precision
 
26
  )
27
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
88
  interactive=False,
89
  )
90
 
 
 
 
 
 
91
 
92
  demo = gr.Blocks(css=custom_css)
93
  with demo:
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
  leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
 
 
100
 
101
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
src/display/utils.py CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from src.about import Tasks, TaskCategory
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@@ -19,7 +19,6 @@ class ColumnContent:
19
  displayed_by_default: bool
20
  hidden: bool = False
21
  never_hidden: bool = False
22
- category: str = None # 新增类别字段
23
 
24
  ## Leaderboard columns
25
  auto_eval_column_dict = []
@@ -28,23 +27,8 @@ auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent(
28
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
29
  #Scores
30
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
31
-
32
- # 按类别分组添加任务列
33
- for category in TaskCategory:
34
- category_tasks = [task for task in Tasks if task.value.category == category]
35
- if category_tasks:
36
- for task in category_tasks:
37
- auto_eval_column_dict.append([
38
- task.name, # 使用原始任务名作为列名
39
- ColumnContent,
40
- ColumnContent(
41
- task.value.col_name, # 使用原始显示名
42
- "number",
43
- False,
44
- category=category.value # 添加类别信息
45
- )
46
- ])
47
-
48
  # Model information
49
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
50
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
@@ -52,6 +36,7 @@ auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weigh
52
  auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
53
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
54
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
 
55
  auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
56
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
57
 
@@ -121,80 +106,5 @@ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
121
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
122
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
123
 
124
- # 修改 BENCHMARK_COLS 的定义,只包含实际的任务列
125
- BENCHMARK_COLS = [t.name for t in Tasks if t.value.category is not None]
126
-
127
- # 添加用于前端分组显示的类别信息
128
- CATEGORY_GROUPS = {
129
- category.value: {
130
- 'display_name': category.value,
131
- 'tasks': [
132
- {
133
- 'col_name': task.name, # 数据列名
134
- 'display_name': task.value.col_name # 显示名称
135
- }
136
- for task in Tasks if task.value.category == category
137
- ]
138
- }
139
- for category in TaskCategory
140
- }
141
-
142
- # 添加用于分组显示的类别过滤器
143
- CATEGORY_FILTERS = [
144
- {
145
- 'name': category.value,
146
- 'columns': [task.name for task in Tasks if task.value.category == category]
147
- }
148
- for category in TaskCategory
149
- ]
150
-
151
- def get_category_columns(categories):
152
- """获取指定类别的所有列名"""
153
- columns = []
154
- for filter_item in CATEGORY_FILTERS:
155
- if filter_item['name'] in categories:
156
- columns.extend(filter_item['columns'])
157
- return columns
158
-
159
- # 修改初始化 Leaderboard 的函数
160
- def init_leaderboard(dataframe):
161
- if dataframe is None or dataframe.empty:
162
- raise ValueError("Leaderboard DataFrame is empty or None.")
163
-
164
- # 添加类别过滤器
165
- category_filter = ColumnFilter(
166
- "category",
167
- type="checkboxgroup",
168
- label="Categories",
169
- options=[cat.value for cat in TaskCategory]
170
- )
171
-
172
- return Leaderboard(
173
- value=dataframe,
174
- datatype=[c.type for c in fields(AutoEvalColumn)],
175
- select_columns=SelectColumns(
176
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
177
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
178
- label="Select Columns to Display:",
179
- ),
180
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
181
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
182
- filter_columns=[
183
- category_filter, # 添加类别过滤器
184
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
185
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
186
- ColumnFilter(
187
- AutoEvalColumn.params.name,
188
- type="slider",
189
- min=0.01,
190
- max=150,
191
- label="Select the number of parameters (B)",
192
- ),
193
- ColumnFilter(
194
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
195
- ),
196
- ],
197
- bool_checkboxgroup_label="Hide models",
198
- interactive=False,
199
- )
200
 
 
3
 
4
  import pandas as pd
5
 
6
+ from src.about import Tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
19
  displayed_by_default: bool
20
  hidden: bool = False
21
  never_hidden: bool = False
 
22
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
 
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
+ for task in Tasks:
31
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Model information
33
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
 
36
  auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
+ #auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
  auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
 
106
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
107
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
108
 
109
+ BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110