neilA / tests /test_loop_stub.py
TriggerFish212's picture
Track taught concepts for lineage and reapplied
3081a6a
Raw
History Blame Contribute Delete
7.05 kB
"""Full turn loop against StubBrain — the entire arc, zero GPU (SPEC §11)."""
from __future__ import annotations
import json
from game.brain import StubBrain
from game.engine import (
advance_challenge,
confirm_candidate,
current_challenge,
new_session,
run_turn,
)
from game.ledger import find, has
def test_fresh_sessions_do_not_share_state():
"""Two visitors must not share an alien (SPEC §0/§12)."""
a, b = new_session(), new_session()
assert a.ledger is not b.ledger and a.world is not b.world
run_turn(a, "Hide the blue stone.", StubBrain())
# mutating a's world/ledger must not touch b
assert b.world.objects["blue_stone"].location == "ground"
assert not has(b.ledger, "hidden_info") or find(b.ledger, "hidden_info").times_applied == 0
def test_warmup_is_mechanical_no_concept():
s = new_session()
res = run_turn(s, "Put the red stone in the basket.", StubBrain())
assert res.won is True and res.newly_won is True
assert res.learn_offer is None
assert s.pending_candidate is None
def test_full_arc_with_teaching_and_generalization():
s = new_session()
brain = StubBrain()
# 1. warm-up
assert run_turn(s, "Put the red stone in the basket.", StubBrain()).won
assert advance_challenge(s)
assert current_challenge(s).id == "hide"
# 2. teach hidden_info — offered, gated on confirmation
res = run_turn(s, "Hide the blue stone from the other one.", brain)
assert res.won and res.learn_offer and res.learn_offer["id"] == "hidden_info"
assert not has(s.ledger, "hidden_info") # not added until confirmed
assert confirm_candidate(s) == "hiding information"
assert has(s.ledger, "hidden_info")
assert advance_challenge(s)
# 3. teach gift
res = run_turn(s, "Give the other one a present.", brain)
assert res.won and res.learn_offer["id"] == "gift"
confirm_candidate(s)
assert has(s.ledger, "gift")
assert advance_challenge(s)
# 4. GENERALIZATION: surprise = hide + give, with no re-teaching ----------- #
assert current_challenge(s).id == "surprise"
assert current_challenge(s).teaches is None
res = run_turn(s, "Make a surprise for the other one.", brain)
assert res.won is True
# the learned concepts transferred: their times_applied bumped (§5)
assert "hidden_info" in res.reapplied and "gift" in res.reapplied
assert find(s.ledger, "hidden_info").times_applied == 1
# the alien also coins "surprise", composed from what it knows
assert res.learn_offer and res.learn_offer["id"] == "surprise"
confirm_candidate(s)
assert find(s.ledger, "surprise").built_from == ("hidden_info", "gift")
assert advance_challenge(s)
# 5. second generalization: secret -> reuses hidden_info
res = run_turn(s, "Keep your blue stone a secret from the other one.", brain)
assert res.won is True
assert "hidden_info" in res.reapplied
assert find(s.ledger, "hidden_info").times_applied == 2 # bumped again
assert advance_challenge(s) is False # arc complete
def test_reject_candidate_keeps_ledger_clean():
s = new_session()
advance_challenge(s) # -> hide
run_turn(s, "Hide the blue stone.", StubBrain())
assert s.pending_candidate is not None
from game.engine import reject_candidate
reject_candidate(s)
assert s.pending_candidate is None
assert not has(s.ledger, "hidden_info")
def test_unparseable_player_request_is_safe():
"""A request the alien has no concept for never crashes — it waits."""
s = new_session()
res = run_turn(s, "Please reticulate the splines philosophically.", StubBrain())
assert res.response.action.verb == "wait"
assert res.won is False
def _scripted(verb, args, cid=None):
"""A raw model response with a free-form (non-canonical) concept id — what the
REAL model emits, vs the stub's canonical ids."""
cand = {"id": cid, "label": cid.replace("_", " "), "understanding": f"{cid} gloss"} if cid else None
return json.dumps({"action": {"verb": verb, "args": args},
"utterance": "...", "gap": None, "candidate_concept": cand})
def test_real_model_ids_tracked_by_actual_concepts():
"""The model names concepts freely (concealing/offering, not hidden_info/gift);
lineage + times_applied must follow the concepts actually taught, not the
challenge's canonical relies_on ids."""
brain = StubBrain(scripted=[
_scripted("put_in", {"obj_id": "red_stone", "container_id": "basket"}),
_scripted("put_in", {"obj_id": "blue_stone", "container_id": "basket"}, "concealing"),
_scripted("give", {"obj_id": "blue_stone", "agent_id": "other"}, "offering"),
_scripted("put_in", {"obj_id": "blue_stone", "container_id": "basket"}, "the_surprise"),
])
s = new_session()
run_turn(s, "put red in basket", brain); advance_challenge(s)
run_turn(s, "hide blue", brain); confirm_candidate(s); advance_challenge(s)
run_turn(s, "give a present", brain); confirm_candidate(s); advance_challenge(s)
res = run_turn(s, "make a surprise", brain)
# reuse lights up the ACTUAL taught concepts, by their real ids
assert set(res.reapplied) == {"concealing", "offering"}
assert find(s.ledger, "concealing").times_applied == 1
confirm_candidate(s)
surprise = find(s.ledger, "the_surprise")
assert surprise.via_generalization is True
assert surprise.built_from == ("concealing", "offering") # real ids, not phantom
assert find(s.ledger, "concealing").via_generalization is False
def test_generalization_with_nothing_taught_has_no_phantom_lineage():
"""The transcript bug: reach a generalization having taught nothing. The new
concept must be self-reached with EMPTY lineage — not 'from hidden_info+gift'
pointing at concepts that were never learned."""
from game.engine import reject_candidate
brain = StubBrain(scripted=[
_scripted("put_in", {"obj_id": "red_stone", "container_id": "basket"}),
_scripted("put_in", {"obj_id": "blue_stone", "container_id": "basket"}, "concealing"),
_scripted("give", {"obj_id": "blue_stone", "agent_id": "other"}, "offering"),
_scripted("put_in", {"obj_id": "blue_stone", "container_id": "basket"}, "the_surprise"),
])
s = new_session()
run_turn(s, "put red", brain); advance_challenge(s)
run_turn(s, "hide blue", brain); reject_candidate(s); advance_challenge(s) # taught nothing
run_turn(s, "give present", brain); reject_candidate(s); advance_challenge(s) # taught nothing
res = run_turn(s, "make a surprise", brain)
assert res.reapplied == () # nothing taught to reapply
confirm_candidate(s)
surprise = find(s.ledger, "the_surprise")
assert surprise.via_generalization is True
assert surprise.built_from == () # no phantom lineage
assert [c for c in s.ledger if c.taught_on_turn != 0 and not c.via_generalization] == []