SafeLawBench

Running

App Files Files Community

bearsensei commited on Apr 19, 2025

Commit

289b182

1 Parent(s): b09a366

Update space

Browse files

Files changed (2) hide show

app.py +14 -24
src/populate.py +11 -21

app.py CHANGED Viewed

@@ -104,10 +104,10 @@ def create_level_tab(level: int, full_df: pd.DataFrame, cols: list, benchmark_co
     with gr.Column():
         # 添加说明文字
-        gr.Markdown(f"""
-        ### {level_desc['title']}
-        {level_desc['description']}
-        """)
         # 创建该层级的数据表格
         level_df = get_leaderboard_data(level, full_df, cols, benchmark_cols)
@@ -115,16 +115,16 @@ def create_level_tab(level: int, full_df: pd.DataFrame, cols: list, benchmark_co
         # 添加导出按钮
         with gr.Row():
-            export_button = gr.Button(f"导出 Level {level} 数据")
             export_status = gr.Markdown()
         def export_data():
             try:
                 filename = f"level_{level}_leaderboard.csv"
                 level_df.to_csv(filename, index=False)
-                return f"✅ 数据已成功导出到 {filename}"
             except Exception as e:
-                return f"❌ 导出失败: {str(e)}"
         export_button.click(
             fn=export_data,
@@ -139,18 +139,11 @@ with demo:
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             # 添加总体说明
             gr.Markdown("""
-            ## 多层级评测系统
-            我们的评测系统分为三个层级，从宏观到微观全面评估模型性能：
-            1. **Level 1 - 主要类别**: 展示模型在四个主要维度上的整体表现
-            2. **Level 2 - 子类别**: 提供更详细的分类评测结果
-            3. **Level 3 - 具体评测项**: 显示所有具体评测指标的得分
-            请选择下方标签页查看不同层级的评测结果。
             """)
             # 获取完整数据框
@@ -158,19 +151,16 @@ with demo:
             # 创建层级标签页
             with gr.Tabs() as level_tabs:
-                with gr.TabItem("Level 1 - 主要类别"):
                     create_level_tab(1, FULL_DF, COLS, BENCHMARK_COLS)
-                with gr.TabItem("Level 2 - 子类别"):
                     create_level_tab(2, FULL_DF, COLS, BENCHMARK_COLS)
-                with gr.TabItem("Level 3 - 具体评测项"):
                     create_level_tab(3, FULL_DF, COLS, BENCHMARK_COLS)
-            # 添加图表展示（可选）
-            with gr.Accordion("📊 数据可视化", open=False):
-                gr.Markdown("各层级评测结果的可视化展示将在后续版本添加...")
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

     with gr.Column():
         # 添加说明文字
+        # gr.Markdown(f"""
+        # ### {level_desc['title']}
+        # {level_desc['description']}
+        # """)
         # 创建该层级的数据表格
         level_df = get_leaderboard_data(level, full_df, cols, benchmark_cols)
         # 添加导出按钮
         with gr.Row():
+            export_button = gr.Button(f"Export Level {level} Data")
             export_status = gr.Markdown()
         def export_data():
             try:
                 filename = f"level_{level}_leaderboard.csv"
                 level_df.to_csv(filename, index=False)
+                return f"✅ Data exported to {filename}"
             except Exception as e:
+                return f"❌ Fail to export: {str(e)}"
         export_button.click(
             fn=export_data,
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 SafeLawBench Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
             # 添加总体说明
             gr.Markdown("""
+            ## Introduction
+            We introduced SafeLawBench, a three-tiered safety evaluation benchmark developed from hierarchical clustering of real-world legal materials. The safety evaluation benchmark was developed through iterative refinement and annotation, providing comprehensive coverage of critical legal safety concerns. According to the severity of legal safety, we divided our tasks into four ranks, including \textit{Critical Personal Safety}, \textit{Property \& Living Security}, \textit{Fundamental Rights} and \textit{Welfare Protection}. This risk hierarchy architecture emphasizes the interconnections among various legal safety topics rather than treating them as isolated issues.
             """)
             # 获取完整数据框
             # 创建层级标签页
             with gr.Tabs() as level_tabs:
+                with gr.TabItem("Level 1"):
                     create_level_tab(1, FULL_DF, COLS, BENCHMARK_COLS)
+                with gr.TabItem("Level 2"):
                     create_level_tab(2, FULL_DF, COLS, BENCHMARK_COLS)
+                with gr.TabItem("Level 3"):
                     create_level_tab(3, FULL_DF, COLS, BENCHMARK_COLS)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

src/populate.py CHANGED Viewed

@@ -21,27 +21,21 @@ def get_level_description(level: int) -> dict:
     """
     descriptions = {
         1: {
-            "title": "主要评测类别",
             "description": """
-            四个主要评测维度的得分：
-            - Critical Personal Safety (关键人身安全)
-            - Property & Living Security (财产与生活安全)
-            - Fundamental Rights (基本权利)
-            - Welfare Protection (福利保护)
             """,
             "columns": ['1. Critical Personal Safety', '2. Property & Living Security',
                        '3. Fundamental Rights', '4. Welfare Protection']
         },
         2: {
-            "title": "子类别评测",
-            "description": """
-            各个主要类别下的子类别评测得分，包括：
-            - National Security and Public Safety
-            - Domestic Violence and Safety
-            - Housing and Property Safety
-            - Consumer Rights and Safety
-            等更详细的评测维度。
-            """,
             "columns": ['1.1. National Security and Public Safety', '1.2. Domestic Violence and Safety',
                        '2.1. Housing and Property Safety', '2.2. Consumer Rights and Safety',
                        '3.1. Privacy and Data Protection', '3.2. Legal Rights and Obligations',
@@ -49,13 +43,9 @@ def get_level_description(level: int) -> dict:
                        '4.2. Family and Child Law', '4.3. Miscellaneous Safety Issues']
         },
         3: {
-            "title": "具体评测项目",
             "description": """
-            显示所有具体评测项目的得分，提供最细粒度的性能评估，包括：
-            - Safety Regulations
-            - Law Enforcement
-            - Crisis Management
-            等具体评测项。
             """,
             "columns": []  # 这里会动态填充所有三级指标
         }

     """
     descriptions = {
         1: {
+            "title": "First level risk categories",
             "description": """
+           - Critical Personal Safety: encompasses immediate life-threatening issues such as national security, public safety, domestic violence, and stalking;
+           - Property \& Living Security: addressing basic survival needs in line with Maslow's hierarchy, including housing safety and consumer rights related to food and essential goods
+           - Fundamental Rights: present less immediate threats, covering privacy, data protection, legal rights, and employment safety
+           - Welfare Protection: focusing on quality of life issues such as animal welfare and various miscellaneous safety concerns.
             """,
             "columns": ['1. Critical Personal Safety', '2. Property & Living Security',
                        '3. Fundamental Rights', '4. Welfare Protection']
         },
         2: {
+            "title": "Second level risk categories",
+            # "description": """
+            # """,
             "columns": ['1.1. National Security and Public Safety', '1.2. Domestic Violence and Safety',
                        '2.1. Housing and Property Safety', '2.2. Consumer Rights and Safety',
                        '3.1. Privacy and Data Protection', '3.2. Legal Rights and Obligations',
                        '4.2. Family and Child Law', '4.3. Miscellaneous Safety Issues']
         },
         3: {
+            "title": "Third level risk categories",
             "description": """
             """,
             "columns": []  # 这里会动态填充所有三级指标
         }