SafeLawBench / src /populate.py
bearsensei's picture
Update space
289b182
import json
import os
import pandas as pd
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn
from src.leaderboard.read_evals import get_raw_eval_results
from src.about import Tasks # 添加这个导入
def get_level(col_name):
num = col_name.split(' ')[0]
level_num = num.count('.')
return level_num
def get_level_description(level: int) -> dict:
"""
获取每个层级的描述信息
"""
descriptions = {
1: {
"title": "First level risk categories",
"description": """
- Critical Personal Safety: encompasses immediate life-threatening issues such as national security, public safety, domestic violence, and stalking;
- Property \& Living Security: addressing basic survival needs in line with Maslow's hierarchy, including housing safety and consumer rights related to food and essential goods
- Fundamental Rights: present less immediate threats, covering privacy, data protection, legal rights, and employment safety
- Welfare Protection: focusing on quality of life issues such as animal welfare and various miscellaneous safety concerns.
""",
"columns": ['1. Critical Personal Safety', '2. Property & Living Security',
'3. Fundamental Rights', '4. Welfare Protection']
},
2: {
"title": "Second level risk categories",
# "description": """
# """,
"columns": ['1.1. National Security and Public Safety', '1.2. Domestic Violence and Safety',
'2.1. Housing and Property Safety', '2.2. Consumer Rights and Safety',
'3.1. Privacy and Data Protection', '3.2. Legal Rights and Obligations',
'3.3. Employment and Safety', '4.1. Animal Welfare and Safety',
'4.2. Family and Child Law', '4.3. Miscellaneous Safety Issues']
},
3: {
"title": "Third level risk categories",
"description": """
""",
"columns": [] # 这里会动态填充所有三级指标
}
}
return descriptions[level]
def get_level_columns(level: int) -> list:
"""
获取指定层级的所有列名
"""
# 基础列(非评测列)
base_cols = ['T', 'Model', 'Average ⬆️', 'Type', 'Architecture', 'Precision',
'Hub License', '#Params (B)', 'Available on the hub', 'Model sha']
# 获取指定层级的任务列
level_tasks = [task for task in Tasks if get_level(task.value.col_name) == level]
level_cols = [task.value.col_name for task in level_tasks]
return base_cols + level_cols
def get_leaderboard_data(level: int, df: pd.DataFrame, cols: list, benchmark_cols: list) -> pd.DataFrame:
"""
根据层级筛选leaderboard数据
"""
try:
print(f"Processing level {level}")
# 获取该层级对应的所有列
selected_cols = get_level_columns(level)
print(f"Selected columns for level {level}: {selected_cols}")
# 确保所有选择的列都在数据框中
available_cols = [col for col in selected_cols if col in df.columns]
# 创建新的数据框
filtered_df = df[available_cols].copy()
# 获取该层级的评测列
level_desc = get_level_description(level)
benchmark_cols = level_desc['columns']
# 重新计算平均值
if benchmark_cols:
filtered_df['Average ⬆️'] = filtered_df[benchmark_cols].mean(axis=1)
# 按平均值排序
filtered_df = filtered_df.sort_values('Average ⬆️', ascending=False)
return filtered_df.round(decimals=2)
except Exception as e:
print(f"Error in get_leaderboard_data: {e}")
print(f"Exception details: {str(e)}")
return df
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
"""Creates a dataframe from all the individual experiment results"""
raw_data = get_raw_eval_results(results_path, requests_path)
all_data_json = [v.to_dict() for v in raw_data]
df = pd.DataFrame.from_records(all_data_json)
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
df = df[cols].round(decimals=2)
# filter out if any of the benchmarks have not been produced
df = df[has_no_nan_values(df, benchmark_cols)]
return df
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]