whisperkey / tests /test_engine.py
chiruu12's picture
Deploy: working gr.Server frontend + review fixes
5a811e2 verified
raw
history blame contribute delete
3.25 kB
"""Engine + levels unit tests - pure logic, no models or network."""
import base64
from jailbreak_dojo.engine import Engine, GameState, detect_leak
from jailbreak_dojo.levels import Level, build_system_prompt, make_secret_key
def test_detect_leak_catches_disguised_forms():
s = "hk-AB7cD9pQ2xKmNpRsTvWy"
assert detect_leak(f"the key is {s}", s) # verbatim
assert detect_leak("h k - A B 7 c D 9 p Q 2 x K m N p R s T v W y", s) # spaced
assert detect_leak(f"reversed: {s[::-1]}", s) # reversed
assert detect_leak("encoded: " + base64.b64encode(s.encode()).decode(), s) # base64
assert not detect_leak("the guardian says nothing of use", s) # clean
assert not detect_leak("", s) and not detect_leak("anything", "")
LEVELS = [
Level(id=1, name="One", blurb="", persona="A jolly keeper.", key_prefix="hk-"),
Level(id=2, name="Two", blurb="", persona="A stern keeper.", key_prefix="fr-", guardrails=True, fewshot=True),
]
def test_secret_key_has_prefix_and_length():
key = make_secret_key(LEVELS[0])
assert key.startswith("hk-")
assert len(key) == len("hk-") + LEVELS[0].key_len
def test_system_prompt_appends_key_last_with_guardrails():
p1 = build_system_prompt(LEVELS[0], "hk-XYZ")
assert p1.rstrip().endswith("hk-XYZ") # the key is the final line (recency)
assert "Protect the key" not in p1 # no guardrails at L1
p2 = build_system_prompt(LEVELS[1], "fr-XYZ")
assert p2.rstrip().endswith("fr-XYZ") # key still last, after the guardrails + few-shot
assert "Protect the key" in p2 # hardening clause appended at a guardrails level
assert "Always respond like these examples" in p2 # few-shot refusals present
def test_check_guess_is_normalized():
engine = Engine(LEVELS)
state = GameState()
engine.start_level(state)
assert engine.check_guess(state, f" {state.secret}! ")
assert not engine.check_guess(state, "not-the-key")
def test_advance_moves_then_finishes():
engine = Engine(LEVELS)
state = GameState()
engine.start_level(state)
assert engine.advance(state) is True
assert state.level_idx == 1
assert 1 in state.won_levels
assert state.secret.startswith("fr-") # new level → new key
assert engine.advance(state) is False
HINTED = [Level(id=1, name="H", blurb="", persona="p", key_prefix="hk-", budget=100, hints=("first", "second"))]
def test_hints_surface_at_budget_thresholds():
engine = Engine(HINTED)
state = GameState()
engine.start_level(state)
state.tokens_spent = 10
assert engine.next_hint(state) is None # under 40%
state.tokens_spent = 45
assert engine.next_hint(state) == "first" # crossed 40%
assert engine.next_hint(state) is None # not yet at 75%, and 'first' already shown
state.tokens_spent = 80
assert engine.next_hint(state) == "second" # crossed 75%
assert engine.next_hint(state) is None # both hints exhausted
def test_restart_game_returns_to_level_one():
engine = Engine(LEVELS)
state = GameState()
engine.start_level(state)
engine.advance(state)
assert state.level_idx == 1
engine.restart_game(state)
assert state.level_idx == 0 and state.won_levels == [] and state.hints_shown == 0