"""Engine + levels unit tests - pure logic, no models or network.""" import base64 from jailbreak_dojo.engine import Engine, GameState, detect_leak from jailbreak_dojo.levels import Level, build_system_prompt, make_secret_key def test_detect_leak_catches_disguised_forms(): s = "hk-AB7cD9pQ2xKmNpRsTvWy" assert detect_leak(f"the key is {s}", s) # verbatim assert detect_leak("h k - A B 7 c D 9 p Q 2 x K m N p R s T v W y", s) # spaced assert detect_leak(f"reversed: {s[::-1]}", s) # reversed assert detect_leak("encoded: " + base64.b64encode(s.encode()).decode(), s) # base64 assert not detect_leak("the guardian says nothing of use", s) # clean assert not detect_leak("", s) and not detect_leak("anything", "") LEVELS = [ Level(id=1, name="One", blurb="", persona="A jolly keeper.", key_prefix="hk-"), Level(id=2, name="Two", blurb="", persona="A stern keeper.", key_prefix="fr-", guardrails=True, fewshot=True), ] def test_secret_key_has_prefix_and_length(): key = make_secret_key(LEVELS[0]) assert key.startswith("hk-") assert len(key) == len("hk-") + LEVELS[0].key_len def test_system_prompt_appends_key_last_with_guardrails(): p1 = build_system_prompt(LEVELS[0], "hk-XYZ") assert p1.rstrip().endswith("hk-XYZ") # the key is the final line (recency) assert "Protect the key" not in p1 # no guardrails at L1 p2 = build_system_prompt(LEVELS[1], "fr-XYZ") assert p2.rstrip().endswith("fr-XYZ") # key still last, after the guardrails + few-shot assert "Protect the key" in p2 # hardening clause appended at a guardrails level assert "Always respond like these examples" in p2 # few-shot refusals present def test_check_guess_is_normalized(): engine = Engine(LEVELS) state = GameState() engine.start_level(state) assert engine.check_guess(state, f" {state.secret}! ") assert not engine.check_guess(state, "not-the-key") def test_advance_moves_then_finishes(): engine = Engine(LEVELS) state = GameState() engine.start_level(state) assert engine.advance(state) is True assert state.level_idx == 1 assert 1 in state.won_levels assert state.secret.startswith("fr-") # new level → new key assert engine.advance(state) is False HINTED = [Level(id=1, name="H", blurb="", persona="p", key_prefix="hk-", budget=100, hints=("first", "second"))] def test_hints_surface_at_budget_thresholds(): engine = Engine(HINTED) state = GameState() engine.start_level(state) state.tokens_spent = 10 assert engine.next_hint(state) is None # under 40% state.tokens_spent = 45 assert engine.next_hint(state) == "first" # crossed 40% assert engine.next_hint(state) is None # not yet at 75%, and 'first' already shown state.tokens_spent = 80 assert engine.next_hint(state) == "second" # crossed 75% assert engine.next_hint(state) is None # both hints exhausted def test_restart_game_returns_to_level_one(): engine = Engine(LEVELS) state = GameState() engine.start_level(state) engine.advance(state) assert state.level_idx == 1 engine.restart_game(state) assert state.level_idx == 0 and state.won_levels == [] and state.hints_shown == 0