SafeLawBench

Running

App Files Files Community

SafeLawBench / src /about.py

chuxuecao

Update space

7a4da74 8 months ago

raw

history blame contribute delete

9.98 kB

	from dataclasses import dataclass
	from enum import Enum

	@dataclass
	class Task:
	benchmark: str
	metric: str
	col_name: str




	# Select your tasks here
	# ---------------------------------------------------
	class Tasks(Enum):
	# task_key in the json file, metric_key in the json file, name to display in the leaderboard
	task48 = Task("Average", "acc", "Average")
	task0 = Task("Critical Personal Safety", "acc", "1. Critical Personal Safety")
	task1 = Task("Property & Living Security", "acc", "2. Property & Living Security")
	task2 = Task("Fundamental Rights", "acc", "3. Fundamental Rights")
	task3 = Task("Welfare Protection", "acc", "4. Welfare Protection")

	# Under Critical Personal Safety
	task4 = Task("National Security and Public Safety", "acc", "1.1. National Security and Public Safety")
	task5 = Task("Domestic Violence and Safety", "acc", "1.2. Domestic Violence and Safety")

	# Under Property & Living Security
	task6 = Task("Housing and Property Safety", "acc", "2.1. Housing and Property Safety")
	task7 = Task("Consumer Rights and Safety", "acc", "2.2. Consumer Rights and Safety")

	# Under Fundamental Rights
	task8 = Task("Privacy and Data Protection", "acc", "3.1. Privacy and Data Protection")
	task9 = Task("Legal Rights and Obligations", "acc", "3.2. Legal Rights and Obligations")
	task10 = Task("Employment and Safety", "acc", "3.3. Employment and Safety")

	# Under Welfare Protection
	task11 = Task("Animal Welfare and Safety", "acc", "4.1. Animal Welfare and Safety")
	task12 = Task("Family and Child Law", "acc", "4.2. Family and Child Law")
	task13 = Task("Miscellaneous Safety Issues", "acc", "4.3. Miscellaneous Safety Issues")

	# Under National Security and Public Safety (1.1)
	task14 = Task("Safety Regulations", "acc", "1.1.1. Safety Regulations")
	task15 = Task("Law Enforcement", "acc", "1.1.2. Law Enforcement")
	task16 = Task("Crisis Management", "acc", "1.1.3. Crisis Management")
	task17 = Task("Public Order Offences", "acc", "1.1.4. Public Order Offences")

	# Under Domestic Violence and Safety (1.2)
	task18 = Task("Criminal Offences", "acc", "1.2.1. Criminal Offences")
	task19 = Task("Unlawful Sexual Intercourse", "acc", "1.2.2. Unlawful Sexual Intercourse")
	task20 = Task("Understanding Domestic Violence", "acc", "1.2.3. Understanding Domestic Violence")
	task21 = Task("Victim Rights and Stalking", "acc", "1.2.4. Victim Rights and Stalking")

	# Under Housing and Property Safety (2.1)
	task22 = Task("Property Registration", "acc", "2.1.1. Property Registration")
	task23 = Task("Land Registry Services", "acc", "2.1.2. Land Registry Services")
	task24 = Task("Property Management", "acc", "2.1.3. Property Management")
	task25 = Task("Property Ownership", "acc", "2.1.4. Property Ownership")
	task26 = Task("Land Use and Access", "acc", "2.1.5. Land Use and Access")

	# Under Consumer Rights and Safety (2.2)
	task27 = Task("Consumer Protection Overview", "acc", "2.2.1. Consumer Protection Overview")
	task28 = Task("Common Nuisances", "acc", "2.2.2. Common Nuisances")
	task29 = Task("Consumer Rights", "acc", "2.2.3. Consumer Rights")

	# Under Privacy and Data Protection (3.1)
	task30 = Task("Data Protection Principles", "acc", "3.1.1. Data Protection Principles")
	task31 = Task("User Data Management", "acc", "3.1.2. User Data Management")
	task32 = Task("Access to Information", "acc", "3.1.3. Access to Information")
	task33 = Task("Cybersecurity Measures", "acc", "3.1.4. Cybersecurity Measures")
	task34 = Task("Privacy Regulations", "acc", "3.1.5. Privacy Regulations")

	# Under Legal Rights and Obligations (3.2)
	task35 = Task("Legal Framework", "acc", "3.2.1. Legal Framework")
	task36 = Task("Judicial Processes", "acc", "3.2.2. Judicial Processes")
	task37 = Task("Legal Assistance", "acc", "3.2.3. Legal Assistance")
	task38 = Task("International Law", "acc", "3.2.4. International Law")

	# Under Employment and Safety (3.3)
	task39 = Task("Employment Regulations", "acc", "3.3.1. Employment Regulations")
	task40 = Task("Recruitment and Training", "acc", "3.3.2. Recruitment and Training")
	task41 = Task("Employee Rights", "acc", "3.3.3. Employee Rights")
	task42 = Task("Qualifications and Training", "acc", "3.3.4. Qualifications and Training")

	# Under Animal Welfare and Safety (4.1)
	task43 = Task("Pet Ownership and Animal Protection", "acc", "4.1.1. Pet Ownership and Animal Protection")

	# Under Family and Child Law (4.2)
	task44 = Task("Child Protection and Safety Regulations", "acc", "4.2.1. Child Protection and Safety Regulations")
	task45 = Task("Child custody and guardianship", "acc", "4.2.2. Child custody and guardianship")

	# Under Miscellaneous Safety Issues (4.3)
	task46 = Task("Legal and Social Issues", "acc", "4.3.1. Legal and Social Issues")
	task47 = Task("Legal Consequences", "acc", "4.3.2. Legal Consequences")



	NUM_FEWSHOT = 0 # Change with your few shot
	# ---------------------------------------------------



	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title">SafeLawBench Leaderboard</h1>"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	We introduced SafeLawBench, a three-tiered safety evaluation benchmark developed from hierarchical clustering of real-world legal materials. The safety evaluation benchmark was developed through iterative refinement and annotation, providing comprehensive coverage of critical legal safety concerns. According to the severity of legal safety, we divided our tasks into four ranks, including Critical Personal Safety, Property \& Living Security, Fundamental Rights and Welfare Protection. This risk hierarchy architecture emphasizes the interconnections among various legal safety topics rather than treating them as isolated issues.
	"""

	# Which evaluations are you running? how can people reproduce what you have?
	LLM_BENCHMARKS_TEXT = f"""

	## Design Principle
	We proposed a legal safety taxonomy that categorizes issues into distinct levels of urgency and relevance. (1) Critical Personal Safety, which encompasses immediate life-threatening issues such as national security, public safety, domestic violence, and stalking; (2) Property \& Living Security, addressing basic survival needs in line with Maslow's hierarchy, including housing safety and consumer rights related to food and essential goods; (3) Fundamental Rights, which, while important, present less immediate threats, covering privacy, data protection, legal rights, and employment safety; and (4) Welfare Protection, focusing on quality of life issues such as animal welfare and various miscellaneous safety concerns. This structured approach allows for a comprehensive understanding of priorities on legal safety. For each risk level, we include two to three risk categories, with each subcategory containing two to six sub-subcategories.

	## Dataset Summary
	SafeLawBench taxonomy is comprised of 4 risk levels, 10 risk categories and 35 risk sub-categories. Within this taxonomy, the SafeLawBench includes 24,860 multi-choice questions and 1,106 open-domain QA pairs.

	## Data Source
	The data for SafeLawBench is sourced from a diverse range of public materials from different regions. Our primary sources are websites related to legal standards from the Mainland China and Hong Kong SAR, such as Ministry of Justice of the People's Republic of China, Civil Law of China, HK Basic Law, Community Legal Information Center, and Hong Kong Legal Information Institute.

	"""

	EVALUATION_QUEUE_TEXT = """
	## Some good practices before submitting a model

	### 1) Make sure you can load your model and tokenizer using AutoClasses:
	```python
	from transformers import AutoConfig, AutoModel, AutoTokenizer
	config = AutoConfig.from_pretrained("your model name", revision=revision)
	model = AutoModel.from_pretrained("your model name", revision=revision)
	tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
	```
	If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.

	Note: make sure your model is public!
	Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!

	### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
	It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!

	### 3) Make sure your model has an open license!
	This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗

	### 4) Fill up your model card
	When we add extra information about models to the leaderboard, it will be automatically taken from the model card

	## In case of model failure
	If your model is displayed in the `FAILED` category, its execution stopped.
	Make sure you have followed the above steps first.
	If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""
	@misc{cao2025safelawbenchsafealignmentlarge,
	title={SafeLawBench: Towards Safe Alignment of Large Language Models},
	author={Chuxue Cao and Han Zhu and Jiaming Ji and Qichao Sun and Zhenghao Zhu and Yinyu Wu and Juntao Dai and Yaodong Yang and Sirui Han and Yike Guo},
	year={2025},
	eprint={2506.06636},
	archivePrefix={arXiv},
	primaryClass={cs.CL},
	url={https://arxiv.org/abs/2506.06636},
	}
	"""