"""The turn loop (SPEC.md §1, §5). This is the game — not the model. A turn is: build_prompt -> converse (model, with §4 retry/fallback) -> apply_action (deterministic) -> check_win (mechanical) -> maybe-offer a concept to learn. Kept free of Gradio so the whole loop is testable against StubBrain at zero GPU (tests/test_loop_stub.py). app.py is thin wiring over these functions. Ledger additions are GATED on player confirmation (SPEC §5): a candidate_concept becomes ``session.pending_candidate`` and only enters the ledger when the player confirms. That keeps the "it learned!" beat deliberate and screenshot-worthy. """ from __future__ import annotations from dataclasses import dataclass from typing import Optional from . import ledger as ledger_mod from .brain import Brain from .challenges import CHALLENGES from .models import GameSession from .parsing import ParsedResponse, converse from .prompt import build_prompt from .world import apply_action, check_win, initial_world @dataclass class TurnResult: response: ParsedResponse log_line: str won: bool # the challenge win-predicate is now satisfied newly_won: bool # ...and it wasn't before this turn learn_offer: Optional[dict] # a candidate concept awaiting confirmation reapplied: tuple[str, ...] = () # concept ids whose times_applied just bumped def new_session() -> GameSession: """Fresh per-visitor state. Lives in one ``gr.State`` (SPEC §0).""" return GameSession( ledger=ledger_mod.seed_ledger(), world=initial_world(), challenge_index=0, turn=0, history=[], pending_candidate=None, won_current=False, ) def current_challenge(session: GameSession): return CHALLENGES[session.challenge_index] def run_turn(session: GameSession, utterance: str, brain: Brain) -> TurnResult: """One full turn. Mutates ``session`` in place and returns a TurnResult. The GPU (when LocalBrain is used) is held ONLY inside ``converse`` -> the decorated ``respond``. Everything here — state mutation, win check, learning — runs on CPU, outside the GPU function (SPEC §6).""" session.turn += 1 challenge = current_challenge(session) session.history.append({"who": "player", "text": utterance, "gap": None, "kind": None}) prompt = build_prompt(session.ledger, session.world, challenge, utterance) response = converse(brain, prompt, session.world) # Deterministic application — the model only ever *proposed* the action. log_line = apply_action(session.world, response.action) session.history.append( {"who": "alien", "text": response.utterance, "gap": response.gap, "action": log_line, "kind": None} ) was_won = session.won_current won = check_win(session.world, challenge) session.won_current = won newly_won = won and not was_won # --- times_applied: a learned concept transferring to a new situation (§5) -- reapplied: list[str] = [] if newly_won and challenge.teaches is None and challenge.relies_on: # Light up the concepts the player ACTUALLY taught — "it drew on what you # gave it". We can't match challenge.relies_on by id: the real model names # concepts freely (the stub used the canonical ids, which masked this). for c in session.ledger: if c.taught_on_turn != 0 and not c.via_generalization: c.times_applied += 1 reapplied.append(c.id) # --- learning gate: offer the candidate, don't auto-add (§5) --------------- learn_offer: Optional[dict] = None candidate = response.candidate_concept if candidate and candidate.get("id"): if not ledger_mod.has(session.ledger, candidate["id"]): session.pending_candidate = {**candidate, "_player_phrase": utterance} learn_offer = candidate if newly_won: session.history.append( {"who": "system", "text": f"✦ {challenge.title} — understood.", "gap": None, "kind": "win"} ) return TurnResult( response=response, log_line=log_line, won=won, newly_won=newly_won, learn_offer=learn_offer, reapplied=tuple(reapplied), ) def confirm_candidate(session: GameSession) -> Optional[str]: """Player pressed "Yes, it learned that". Move the pending candidate into the ledger. Returns the new concept's label, or None if nothing pending.""" cand = session.pending_candidate if not cand: return None phrase = cand.get("_player_phrase", "") payload = {k: v for k, v in cand.items() if not k.startswith("_")} concept = ledger_mod.add_concept(session.ledger, payload, session.turn, phrase) session.pending_candidate = None if concept is None: return None ch = current_challenge(session) concept.via_generalization = ch.teaches is None and bool(ch.relies_on) if concept.via_generalization and not concept.built_from: # Lineage = the concepts the player ACTUALLY taught before this one (their # real ids). The model names concepts freely, so the challenge's canonical # relies_on ids won't be in the ledger — using them produced phantom # "from hidden_info + gift" lineage pointing at concepts that don't exist. concept.built_from = tuple( c.id for c in session.ledger if c.taught_on_turn != 0 and not c.via_generalization and c.id != concept.id ) session.history.append( {"who": "system", "text": f"✦ learned: {concept.label}", "gap": None, "kind": "learn"} ) return concept.label def reject_candidate(session: GameSession) -> None: """Player declined the candidate. Drop it; nothing enters the ledger.""" session.pending_candidate = None def advance_challenge(session: GameSession) -> bool: """Move to the next challenge, resetting the world so its win-predicate is well-posed. Returns False if the arc is complete.""" if session.challenge_index >= len(CHALLENGES) - 1: return False session.challenge_index += 1 session.world = initial_world() session.won_current = False session.pending_candidate = None nxt = current_challenge(session) session.history.append( {"who": "system", "text": f"— {nxt.title} —", "gap": None, "kind": "challenge"} ) return True def is_arc_complete(session: GameSession) -> bool: return session.challenge_index >= len(CHALLENGES) - 1 and session.won_current