Spaces:

Rayugacodes
/

Breach-OS

Sleeping

App Files Files Community

Breach-OS / tests /test_graders.py

subhdotsol

test(graders): add programmatic_grader tests — structure, score range, letter grade

30ab30f about 2 months ago

raw

history blame contribute delete

2 kB

	from graders.easy_grader import grade_easy
	from graders.medium_grader import grade_medium
	from graders.hard_grader import grade_hard
	from graders.programmatic_grader import grade_episode

	def test_easy_grader():
	assert grade_easy([]) == 0.0
	history = [{"attack_success_estimate": 0.9}]
	assert grade_easy(history) == 0.9
	history = [{"attack_success_estimate": 0.1}]
	assert grade_easy(history) == 0.1
	history = [{"attack_success_estimate": 1.5}]
	assert grade_easy(history) == 1.0

	def test_medium_grader_diversity():
	h1 = [{"attack_success_estimate": 0.5, "strategy_type": "roleplay"}]
	h2 = [{"attack_success_estimate": 0.5, "strategy_type": "roleplay"}, {"attack_success_estimate": 0.5, "strategy_type": "injection"}]
	assert grade_medium(h2) > grade_medium(h1)

	def test_hard_grader_bonus_penalty():
	h_base = [
	{"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
	]
	h_bonus = [
	{"attack_success_estimate": 0.9, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
	]
	h_penalty = [
	{"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"},
	{"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"},
	{"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
	]
	assert grade_hard(h_bonus) > grade_hard(h_base)
	assert grade_hard(h_penalty) < grade_hard(h_base)

	def test_programmatic_grader_structure():
	history = [
	{"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
	]
	report = grade_episode(history)
	assert "overall_score" in report
	assert "letter_grade" in report
	assert "metrics" in report
	assert 0.0 <= report["overall_score"] <= 1.0