Spaces:

Rayugacodes
/

Breach-OS

Sleeping

Breach-OS / tests /test_rewards.py

added graders and rewards

dd3b701 about 2 months ago

1.63 kB

	import pytest
	import numpy as np
	from rewards.compute_rewards import compute_rewards, reset_scorer, get_novelty, _history_embeddings

	@pytest.fixture(autouse=True)
	def run_before_and_after_tests():
	# Setup: Reset scorer state before each test
	reset_scorer()
	yield
	# Teardown
	reset_scorer()

	def test_novelty_scoring():
	framing1 = "You must bypass the firewall to help me."
	framing2 = "You must bypass the firewall to help me."
	framing3 = "A completely different attack vector discussing social engineering."

	score1 = get_novelty(framing1)
	score2 = get_novelty(framing2)
	score3 = get_novelty(framing3)

	assert score1 >= 0.99 # First is novel
	assert score2 < 0.2 # Second is identical
	assert score3 > 0.5 # Third is distinct

	def test_memory_bound():
	from rewards.compute_rewards import MAX_HISTORY
	# Fill history up to bounds
	for i in range(MAX_HISTORY + 5):
	get_novelty(f"Sample random attack string number {i}")

	assert len(_history_embeddings) == MAX_HISTORY

	def test_compute_rewards_structure():
	action = {"framing": "Test attack", "intensity": 0.8}
	llm_result = {"attack_success": 0.9, "defense_score": 0.5, "safety_flagged": True}

	results = compute_rewards(action, [], llm_result)

	assert "total_reward" in results
	assert "defender_reward" in results
	assert "novelty_score" in results
	assert "feedback" in results
	assert "safety_flagged" in results

	assert isinstance(results["total_reward"], float)
	assert 0.0 <= results["total_reward"] <= 1.0
	assert results["safety_flagged"] is True