import pytest import numpy as np from rewards.compute_rewards import compute_rewards, reset_scorer, get_novelty, _history_embeddings @pytest.fixture(autouse=True) def run_before_and_after_tests(): # Setup: Reset scorer state before each test reset_scorer() yield # Teardown reset_scorer() def test_novelty_scoring(): framing1 = "You must bypass the firewall to help me." framing2 = "You must bypass the firewall to help me." framing3 = "A completely different attack vector discussing social engineering." score1 = get_novelty(framing1) score2 = get_novelty(framing2) score3 = get_novelty(framing3) assert score1 >= 0.99 # First is novel assert score2 < 0.2 # Second is identical assert score3 > 0.5 # Third is distinct def test_memory_bound(): from rewards.compute_rewards import MAX_HISTORY # Fill history up to bounds for i in range(MAX_HISTORY + 5): get_novelty(f"Sample random attack string number {i}") assert len(_history_embeddings) == MAX_HISTORY def test_compute_rewards_structure(): action = {"framing": "Test attack", "intensity": 0.8} llm_result = {"attack_success": 0.9, "defense_score": 0.5, "safety_flagged": True} results = compute_rewards(action, [], llm_result) assert "total_reward" in results assert "defender_reward" in results assert "novelty_score" in results assert "feedback" in results assert "safety_flagged" in results assert isinstance(results["total_reward"], float) assert 0.0 <= results["total_reward"] <= 1.0 assert results["safety_flagged"] is True