mangopy commited on
Commit
5fec7f9
·
verified ·
1 Parent(s): dffc8ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -21
app.py CHANGED
@@ -6,26 +6,21 @@ from yaml import safe_load
6
  import pandas as pd
7
  import gradio as gr
8
 
9
- # 加载配置文件
10
  CONFIG = safe_load(open("config.yaml"))
11
  label_map = {'Avg':"All", "API":"Web API", "Code": "Code Function", "Customized": "Customized App"}
12
- # 读取数据并进行初步处理
13
  data = defaultdict(dict)
14
  for setting in CONFIG['settings']:
15
  for data_type in CONFIG['types']:
16
  file_path = os.path.join("data", f"{CONFIG['settings_mapping'][setting]}-{data_type}.xlsx")
17
  df = pd.read_excel(file_path)
18
 
19
- # 添加平均分列,计算除第一列和倒数两列之外的均值
20
  df["Average"] = df.iloc[:, 1:-2].mean(axis=1)
21
 
22
- # 添加 Rank 列,根据 Average 降序排名
23
  df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
24
 
25
- # 按 Rank 排序(Rank 值越小越靠前)
26
  df = df.sort_values("Rank", ascending=True)
27
 
28
- # 将列重新排序:第一列为 Rank,第二列为 Model,第三列为 Average,其余列保持原有顺序
29
  cols = df.columns.tolist()
30
  first_cols = []
31
  if "Rank" in cols:
@@ -37,7 +32,6 @@ for setting in CONFIG['settings']:
37
  remaining_cols = [col for col in cols if col not in first_cols]
38
  df = df[first_cols + remaining_cols]
39
 
40
- # 数值格式化:对于数值列(除 Rank 列),如果最大值 <= 1 则认为是比例数据(乘以 100 后保留两位小数),否则直接保留两位小数
41
  numeric_cols = df.select_dtypes(include=['float', 'int']).columns
42
  for col in numeric_cols:
43
  if col != "Rank":
@@ -48,9 +42,7 @@ for setting in CONFIG['settings']:
48
 
49
  data[setting][data_type] = df
50
 
51
- # 自定义 CSS 样式,包括表格样式及标签页的边框美化
52
  css = """
53
- /* 表格样式 */
54
  table thead th, table thead td {
55
  text-align: center !important;
56
  }
@@ -74,7 +66,6 @@ table > tbody > tr > td:not(:nth-child(2)) {
74
  vertical-align: middle;
75
  }
76
 
77
- /* 外层标签页增加边框、内边距和圆角 */
78
  .outer-tabs {
79
  border: 2px solid #ccc;
80
  border-radius: 8px;
@@ -94,7 +85,6 @@ table > tbody > tr > td:not(:nth-child(2)) {
94
  border-bottom: 2px solid #0078d7;
95
  }
96
 
97
- /* 内层标签页增加边框、内边距和圆角 */
98
  .inner-tabs {
99
  border: 2px solid #aaa;
100
  border-radius: 8px;
@@ -115,7 +105,6 @@ table > tbody > tr > td:not(:nth-child(2)) {
115
  }
116
  """
117
 
118
- # 模型类型和模型大小(数值区间)设置
119
  MODEL_TYPES = [
120
  "sparse retrieval",
121
  "dense retrieval",
@@ -131,24 +120,21 @@ NUMERIC_INTERVALS = {
131
  ">1B": pd.Interval(1000, 1_000_000, closed='right'),
132
  }
133
 
134
- # 定义过滤函数,实现搜索、模型类型及模型大小过滤功能,并重新计算局部 Rank
135
  def filter_data(search_query, model_types, model_sizes):
136
  outputs = []
137
  for setting in CONFIG['settings']:
138
  for data_type in CONFIG['types']:
139
  df = data[setting][data_type].copy()
140
 
141
- # 搜索过滤:在 "Model" 列中查找包含任一搜索关键字的记录
142
  if search_query:
143
  queries = [q.strip().lower() for q in search_query.split(";") if q.strip()]
144
  mask_search = df["Model"].str.lower().apply(lambda x: any(q in x for q in queries))
145
  df = df[mask_search]
146
 
147
- # 模型类型过滤:假设 Excel 中存在 "Model Type" 列
148
  if model_types and set(model_types) != set(MODEL_TYPES):
149
  df = df[df["Model Type"].isin(model_types)]
150
 
151
- # 模型大小过滤:将 "Number of Parameters" 转换为数值,并利用选定的区间进行过滤
152
  def parse_params(val):
153
  try:
154
  if isinstance(val, str):
@@ -176,11 +162,9 @@ def filter_data(search_query, model_types, model_sizes):
176
  if "params_numeric" in df.columns:
177
  df = df.drop(columns=["params_numeric"])
178
 
179
- # 重新计算 Rank,根据当前过滤后的 Average 进行排序(局部 Rank)
180
  df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
181
  df = df.sort_values("Rank", ascending=True)
182
 
183
- # 重新排列列顺序:Rank, Model, Average, 其他
184
  cols = df.columns.tolist()
185
  first_cols = []
186
  if "Rank" in cols:
@@ -195,7 +179,7 @@ def filter_data(search_query, model_types, model_sizes):
195
  outputs.append(df)
196
  return outputs
197
 
198
- # 创建 Gradio 界面
199
  with gr.Blocks(css=css) as demo:
200
  gr.Markdown("""
201
  ## Tool-Retrieval benchmark leaderboard
@@ -233,7 +217,6 @@ with gr.Blocks(css=css) as demo:
233
 
234
  submit_button = gr.Button("Filter Data")
235
 
236
- # 创建嵌套标签页,外层标签页使用 outer-tabs 类,内层标签页使用 inner-tabs 类
237
  output_dfs = []
238
  with gr.Tabs(elem_classes="outer-tabs") as result_tabs:
239
  for setting in CONFIG['settings']:
@@ -244,7 +227,6 @@ with gr.Blocks(css=css) as demo:
244
  df_component = gr.DataFrame(value=data[setting][data_type], type="pandas")
245
  output_dfs.append(df_component)
246
 
247
- # 将过滤函数与按钮绑定,点击后更新所有 DataFrame 组件
248
  submit_button.click(
249
  fn=filter_data,
250
  inputs=[search_box, model_type_checkbox_group, model_size_checkbox_group],
 
6
  import pandas as pd
7
  import gradio as gr
8
 
 
9
  CONFIG = safe_load(open("config.yaml"))
10
  label_map = {'Avg':"All", "API":"Web API", "Code": "Code Function", "Customized": "Customized App"}
11
+
12
  data = defaultdict(dict)
13
  for setting in CONFIG['settings']:
14
  for data_type in CONFIG['types']:
15
  file_path = os.path.join("data", f"{CONFIG['settings_mapping'][setting]}-{data_type}.xlsx")
16
  df = pd.read_excel(file_path)
17
 
 
18
  df["Average"] = df.iloc[:, 1:-2].mean(axis=1)
19
 
 
20
  df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
21
 
 
22
  df = df.sort_values("Rank", ascending=True)
23
 
 
24
  cols = df.columns.tolist()
25
  first_cols = []
26
  if "Rank" in cols:
 
32
  remaining_cols = [col for col in cols if col not in first_cols]
33
  df = df[first_cols + remaining_cols]
34
 
 
35
  numeric_cols = df.select_dtypes(include=['float', 'int']).columns
36
  for col in numeric_cols:
37
  if col != "Rank":
 
42
 
43
  data[setting][data_type] = df
44
 
 
45
  css = """
 
46
  table thead th, table thead td {
47
  text-align: center !important;
48
  }
 
66
  vertical-align: middle;
67
  }
68
 
 
69
  .outer-tabs {
70
  border: 2px solid #ccc;
71
  border-radius: 8px;
 
85
  border-bottom: 2px solid #0078d7;
86
  }
87
 
 
88
  .inner-tabs {
89
  border: 2px solid #aaa;
90
  border-radius: 8px;
 
105
  }
106
  """
107
 
 
108
  MODEL_TYPES = [
109
  "sparse retrieval",
110
  "dense retrieval",
 
120
  ">1B": pd.Interval(1000, 1_000_000, closed='right'),
121
  }
122
 
123
+
124
  def filter_data(search_query, model_types, model_sizes):
125
  outputs = []
126
  for setting in CONFIG['settings']:
127
  for data_type in CONFIG['types']:
128
  df = data[setting][data_type].copy()
129
 
 
130
  if search_query:
131
  queries = [q.strip().lower() for q in search_query.split(";") if q.strip()]
132
  mask_search = df["Model"].str.lower().apply(lambda x: any(q in x for q in queries))
133
  df = df[mask_search]
134
 
 
135
  if model_types and set(model_types) != set(MODEL_TYPES):
136
  df = df[df["Model Type"].isin(model_types)]
137
 
 
138
  def parse_params(val):
139
  try:
140
  if isinstance(val, str):
 
162
  if "params_numeric" in df.columns:
163
  df = df.drop(columns=["params_numeric"])
164
 
 
165
  df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
166
  df = df.sort_values("Rank", ascending=True)
167
 
 
168
  cols = df.columns.tolist()
169
  first_cols = []
170
  if "Rank" in cols:
 
179
  outputs.append(df)
180
  return outputs
181
 
182
+
183
  with gr.Blocks(css=css) as demo:
184
  gr.Markdown("""
185
  ## Tool-Retrieval benchmark leaderboard
 
217
 
218
  submit_button = gr.Button("Filter Data")
219
 
 
220
  output_dfs = []
221
  with gr.Tabs(elem_classes="outer-tabs") as result_tabs:
222
  for setting in CONFIG['settings']:
 
227
  df_component = gr.DataFrame(value=data[setting][data_type], type="pandas")
228
  output_dfs.append(df_component)
229
 
 
230
  submit_button.click(
231
  fn=filter_data,
232
  inputs=[search_box, model_type_checkbox_group, model_size_checkbox_group],