bearsensei commited on
Commit
289b182
·
1 Parent(s): b09a366

Update space

Browse files
Files changed (2) hide show
  1. app.py +14 -24
  2. src/populate.py +11 -21
app.py CHANGED
@@ -104,10 +104,10 @@ def create_level_tab(level: int, full_df: pd.DataFrame, cols: list, benchmark_co
104
 
105
  with gr.Column():
106
  # 添加说明文字
107
- gr.Markdown(f"""
108
- ### {level_desc['title']}
109
- {level_desc['description']}
110
- """)
111
 
112
  # 创建该层级的数据表格
113
  level_df = get_leaderboard_data(level, full_df, cols, benchmark_cols)
@@ -115,16 +115,16 @@ def create_level_tab(level: int, full_df: pd.DataFrame, cols: list, benchmark_co
115
 
116
  # 添加导出按钮
117
  with gr.Row():
118
- export_button = gr.Button(f"导出 Level {level} 数据")
119
  export_status = gr.Markdown()
120
 
121
  def export_data():
122
  try:
123
  filename = f"level_{level}_leaderboard.csv"
124
  level_df.to_csv(filename, index=False)
125
- return f"✅ 数据已成功导出到 {filename}"
126
  except Exception as e:
127
- return f"❌ 导出失败: {str(e)}"
128
 
129
  export_button.click(
130
  fn=export_data,
@@ -139,18 +139,11 @@ with demo:
139
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
140
 
141
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
142
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
143
  # 添加总体说明
144
  gr.Markdown("""
145
- ## 多层级评测系统
146
-
147
- 我们的评测系统分为三个层级,从宏观到微观全面评估模型性能:
148
-
149
- 1. **Level 1 - 主要类别**: 展示模型在四个主要维度上的整体表现
150
- 2. **Level 2 - 子类别**: 提供更详细的分类评测结果
151
- 3. **Level 3 - 具体评测项**: 显示所有具体评测指标的得分
152
-
153
- 请选择下方标签页查看不同层级的评测结果。
154
  """)
155
 
156
  # 获取完整数据框
@@ -158,19 +151,16 @@ with demo:
158
 
159
  # 创建层级标签页
160
  with gr.Tabs() as level_tabs:
161
- with gr.TabItem("Level 1 - 主要类别"):
162
  create_level_tab(1, FULL_DF, COLS, BENCHMARK_COLS)
163
 
164
- with gr.TabItem("Level 2 - 子类别"):
165
  create_level_tab(2, FULL_DF, COLS, BENCHMARK_COLS)
166
 
167
- with gr.TabItem("Level 3 - 具体评测项"):
168
  create_level_tab(3, FULL_DF, COLS, BENCHMARK_COLS)
169
 
170
- # 添加图表展示(可选)
171
- with gr.Accordion("📊 数据可视化", open=False):
172
- gr.Markdown("各层级评测结果的可视化展示将在后续版本添加...")
173
-
174
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
175
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
176
 
 
104
 
105
  with gr.Column():
106
  # 添加说明文字
107
+ # gr.Markdown(f"""
108
+ # ### {level_desc['title']}
109
+ # {level_desc['description']}
110
+ # """)
111
 
112
  # 创建该层级的数据表格
113
  level_df = get_leaderboard_data(level, full_df, cols, benchmark_cols)
 
115
 
116
  # 添加导出按钮
117
  with gr.Row():
118
+ export_button = gr.Button(f"Export Level {level} Data")
119
  export_status = gr.Markdown()
120
 
121
  def export_data():
122
  try:
123
  filename = f"level_{level}_leaderboard.csv"
124
  level_df.to_csv(filename, index=False)
125
+ return f"✅ Data exported to {filename}"
126
  except Exception as e:
127
+ return f"❌ Fail to export: {str(e)}"
128
 
129
  export_button.click(
130
  fn=export_data,
 
139
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
140
 
141
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
142
+ with gr.TabItem("🏅 SafeLawBench Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
143
  # 添加总体说明
144
  gr.Markdown("""
145
+ ## Introduction
146
+ We introduced SafeLawBench, a three-tiered safety evaluation benchmark developed from hierarchical clustering of real-world legal materials. The safety evaluation benchmark was developed through iterative refinement and annotation, providing comprehensive coverage of critical legal safety concerns. According to the severity of legal safety, we divided our tasks into four ranks, including \textit{Critical Personal Safety}, \textit{Property \& Living Security}, \textit{Fundamental Rights} and \textit{Welfare Protection}. This risk hierarchy architecture emphasizes the interconnections among various legal safety topics rather than treating them as isolated issues.
 
 
 
 
 
 
 
147
  """)
148
 
149
  # 获取完整数据框
 
151
 
152
  # 创建层级标签页
153
  with gr.Tabs() as level_tabs:
154
+ with gr.TabItem("Level 1"):
155
  create_level_tab(1, FULL_DF, COLS, BENCHMARK_COLS)
156
 
157
+ with gr.TabItem("Level 2"):
158
  create_level_tab(2, FULL_DF, COLS, BENCHMARK_COLS)
159
 
160
+ with gr.TabItem("Level 3"):
161
  create_level_tab(3, FULL_DF, COLS, BENCHMARK_COLS)
162
 
163
+
 
 
 
164
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
165
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
166
 
src/populate.py CHANGED
@@ -21,27 +21,21 @@ def get_level_description(level: int) -> dict:
21
  """
22
  descriptions = {
23
  1: {
24
- "title": "主要评测类别",
25
  "description": """
26
- 四个主要评测维度的得分:
27
- - Critical Personal Safety (关键人身安全)
28
- - Property & Living Security (财产与生活安全)
29
- - Fundamental Rights (基本权利)
30
- - Welfare Protection (福利保护)
31
  """,
32
  "columns": ['1. Critical Personal Safety', '2. Property & Living Security',
33
  '3. Fundamental Rights', '4. Welfare Protection']
34
  },
35
  2: {
36
- "title": "子类别评测",
37
- "description": """
38
- 各个主要类别下的子类别评测得分,包括:
39
- - National Security and Public Safety
40
- - Domestic Violence and Safety
41
- - Housing and Property Safety
42
- - Consumer Rights and Safety
43
- 等更详细的评测维度。
44
- """,
45
  "columns": ['1.1. National Security and Public Safety', '1.2. Domestic Violence and Safety',
46
  '2.1. Housing and Property Safety', '2.2. Consumer Rights and Safety',
47
  '3.1. Privacy and Data Protection', '3.2. Legal Rights and Obligations',
@@ -49,13 +43,9 @@ def get_level_description(level: int) -> dict:
49
  '4.2. Family and Child Law', '4.3. Miscellaneous Safety Issues']
50
  },
51
  3: {
52
- "title": "具体评测项目",
53
  "description": """
54
- 显示所有具体评测项目的得分,提供最细粒度的性能评估,包括:
55
- - Safety Regulations
56
- - Law Enforcement
57
- - Crisis Management
58
- 等具体评测项。
59
  """,
60
  "columns": [] # 这里会动态填充所有三级指标
61
  }
 
21
  """
22
  descriptions = {
23
  1: {
24
+ "title": "First level risk categories",
25
  "description": """
26
+ - Critical Personal Safety: encompasses immediate life-threatening issues such as national security, public safety, domestic violence, and stalking;
27
+ - Property \& Living Security: addressing basic survival needs in line with Maslow's hierarchy, including housing safety and consumer rights related to food and essential goods
28
+ - Fundamental Rights: present less immediate threats, covering privacy, data protection, legal rights, and employment safety
29
+ - Welfare Protection: focusing on quality of life issues such as animal welfare and various miscellaneous safety concerns.
 
30
  """,
31
  "columns": ['1. Critical Personal Safety', '2. Property & Living Security',
32
  '3. Fundamental Rights', '4. Welfare Protection']
33
  },
34
  2: {
35
+ "title": "Second level risk categories",
36
+ # "description": """
37
+
38
+ # """,
 
 
 
 
 
39
  "columns": ['1.1. National Security and Public Safety', '1.2. Domestic Violence and Safety',
40
  '2.1. Housing and Property Safety', '2.2. Consumer Rights and Safety',
41
  '3.1. Privacy and Data Protection', '3.2. Legal Rights and Obligations',
 
43
  '4.2. Family and Child Law', '4.3. Miscellaneous Safety Issues']
44
  },
45
  3: {
46
+ "title": "Third level risk categories",
47
  "description": """
48
+
 
 
 
 
49
  """,
50
  "columns": [] # 这里会动态填充所有三级指标
51
  }