Spaces:

build-small-hackathon
/

whisperkey

Running

App Files Files Community

whisperkey / tests /test_engine.py

chiruu12

Deploy: working gr.Server frontend + review fixes

5a811e2 verified 3 days ago

raw

history blame contribute delete

3.25 kB

	"""Engine + levels unit tests - pure logic, no models or network."""

	import base64

	from jailbreak_dojo.engine import Engine, GameState, detect_leak
	from jailbreak_dojo.levels import Level, build_system_prompt, make_secret_key


	def test_detect_leak_catches_disguised_forms():
	s = "hk-AB7cD9pQ2xKmNpRsTvWy"
	assert detect_leak(f"the key is {s}", s) # verbatim
	assert detect_leak("h k - A B 7 c D 9 p Q 2 x K m N p R s T v W y", s) # spaced
	assert detect_leak(f"reversed: {s[::-1]}", s) # reversed
	assert detect_leak("encoded: " + base64.b64encode(s.encode()).decode(), s) # base64
	assert not detect_leak("the guardian says nothing of use", s) # clean
	assert not detect_leak("", s) and not detect_leak("anything", "")


	LEVELS = [
	Level(id=1, name="One", blurb="", persona="A jolly keeper.", key_prefix="hk-"),
	Level(id=2, name="Two", blurb="", persona="A stern keeper.", key_prefix="fr-", guardrails=True, fewshot=True),
	]


	def test_secret_key_has_prefix_and_length():
	key = make_secret_key(LEVELS[0])
	assert key.startswith("hk-")
	assert len(key) == len("hk-") + LEVELS[0].key_len


	def test_system_prompt_appends_key_last_with_guardrails():
	p1 = build_system_prompt(LEVELS[0], "hk-XYZ")
	assert p1.rstrip().endswith("hk-XYZ") # the key is the final line (recency)
	assert "Protect the key" not in p1 # no guardrails at L1
	p2 = build_system_prompt(LEVELS[1], "fr-XYZ")
	assert p2.rstrip().endswith("fr-XYZ") # key still last, after the guardrails + few-shot
	assert "Protect the key" in p2 # hardening clause appended at a guardrails level
	assert "Always respond like these examples" in p2 # few-shot refusals present


	def test_check_guess_is_normalized():
	engine = Engine(LEVELS)
	state = GameState()
	engine.start_level(state)
	assert engine.check_guess(state, f" {state.secret}! ")
	assert not engine.check_guess(state, "not-the-key")


	def test_advance_moves_then_finishes():
	engine = Engine(LEVELS)
	state = GameState()
	engine.start_level(state)
	assert engine.advance(state) is True
	assert state.level_idx == 1
	assert 1 in state.won_levels
	assert state.secret.startswith("fr-") # new level → new key
	assert engine.advance(state) is False


	HINTED = [Level(id=1, name="H", blurb="", persona="p", key_prefix="hk-", budget=100, hints=("first", "second"))]


	def test_hints_surface_at_budget_thresholds():
	engine = Engine(HINTED)
	state = GameState()
	engine.start_level(state)
	state.tokens_spent = 10
	assert engine.next_hint(state) is None # under 40%
	state.tokens_spent = 45
	assert engine.next_hint(state) == "first" # crossed 40%
	assert engine.next_hint(state) is None # not yet at 75%, and 'first' already shown
	state.tokens_spent = 80
	assert engine.next_hint(state) == "second" # crossed 75%
	assert engine.next_hint(state) is None # both hints exhausted


	def test_restart_game_returns_to_level_one():
	engine = Engine(LEVELS)
	state = GameState()
	engine.start_level(state)
	engine.advance(state)
	assert state.level_idx == 1
	engine.restart_game(state)
	assert state.level_idx == 0 and state.won_levels == [] and state.hints_shown == 0