neilA / game /engine.py
TriggerFish212's picture
Track taught concepts for lineage and reapplied
3081a6a
Raw
History Blame Contribute Delete
6.62 kB
"""The turn loop (SPEC.md §1, §5).
This is the game — not the model. A turn is:
build_prompt -> converse (model, with §4 retry/fallback) -> apply_action
(deterministic) -> check_win (mechanical) -> maybe-offer a concept to learn.
Kept free of Gradio so the whole loop is testable against StubBrain at zero GPU
(tests/test_loop_stub.py). app.py is thin wiring over these functions.
Ledger additions are GATED on player confirmation (SPEC §5): a candidate_concept
becomes ``session.pending_candidate`` and only enters the ledger when the player
confirms. That keeps the "it learned!" beat deliberate and screenshot-worthy.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from . import ledger as ledger_mod
from .brain import Brain
from .challenges import CHALLENGES
from .models import GameSession
from .parsing import ParsedResponse, converse
from .prompt import build_prompt
from .world import apply_action, check_win, initial_world
@dataclass
class TurnResult:
response: ParsedResponse
log_line: str
won: bool # the challenge win-predicate is now satisfied
newly_won: bool # ...and it wasn't before this turn
learn_offer: Optional[dict] # a candidate concept awaiting confirmation
reapplied: tuple[str, ...] = () # concept ids whose times_applied just bumped
def new_session() -> GameSession:
"""Fresh per-visitor state. Lives in one ``gr.State`` (SPEC §0)."""
return GameSession(
ledger=ledger_mod.seed_ledger(),
world=initial_world(),
challenge_index=0,
turn=0,
history=[],
pending_candidate=None,
won_current=False,
)
def current_challenge(session: GameSession):
return CHALLENGES[session.challenge_index]
def run_turn(session: GameSession, utterance: str, brain: Brain) -> TurnResult:
"""One full turn. Mutates ``session`` in place and returns a TurnResult.
The GPU (when LocalBrain is used) is held ONLY inside ``converse`` -> the
decorated ``respond``. Everything here — state mutation, win check, learning
— runs on CPU, outside the GPU function (SPEC §6)."""
session.turn += 1
challenge = current_challenge(session)
session.history.append({"who": "player", "text": utterance, "gap": None, "kind": None})
prompt = build_prompt(session.ledger, session.world, challenge, utterance)
response = converse(brain, prompt, session.world)
# Deterministic application — the model only ever *proposed* the action.
log_line = apply_action(session.world, response.action)
session.history.append(
{"who": "alien", "text": response.utterance, "gap": response.gap,
"action": log_line, "kind": None}
)
was_won = session.won_current
won = check_win(session.world, challenge)
session.won_current = won
newly_won = won and not was_won
# --- times_applied: a learned concept transferring to a new situation (§5) --
reapplied: list[str] = []
if newly_won and challenge.teaches is None and challenge.relies_on:
# Light up the concepts the player ACTUALLY taught — "it drew on what you
# gave it". We can't match challenge.relies_on by id: the real model names
# concepts freely (the stub used the canonical ids, which masked this).
for c in session.ledger:
if c.taught_on_turn != 0 and not c.via_generalization:
c.times_applied += 1
reapplied.append(c.id)
# --- learning gate: offer the candidate, don't auto-add (§5) ---------------
learn_offer: Optional[dict] = None
candidate = response.candidate_concept
if candidate and candidate.get("id"):
if not ledger_mod.has(session.ledger, candidate["id"]):
session.pending_candidate = {**candidate, "_player_phrase": utterance}
learn_offer = candidate
if newly_won:
session.history.append(
{"who": "system", "text": f"✦ {challenge.title} — understood.",
"gap": None, "kind": "win"}
)
return TurnResult(
response=response,
log_line=log_line,
won=won,
newly_won=newly_won,
learn_offer=learn_offer,
reapplied=tuple(reapplied),
)
def confirm_candidate(session: GameSession) -> Optional[str]:
"""Player pressed "Yes, it learned that". Move the pending candidate into the
ledger. Returns the new concept's label, or None if nothing pending."""
cand = session.pending_candidate
if not cand:
return None
phrase = cand.get("_player_phrase", "")
payload = {k: v for k, v in cand.items() if not k.startswith("_")}
concept = ledger_mod.add_concept(session.ledger, payload, session.turn, phrase)
session.pending_candidate = None
if concept is None:
return None
ch = current_challenge(session)
concept.via_generalization = ch.teaches is None and bool(ch.relies_on)
if concept.via_generalization and not concept.built_from:
# Lineage = the concepts the player ACTUALLY taught before this one (their
# real ids). The model names concepts freely, so the challenge's canonical
# relies_on ids won't be in the ledger — using them produced phantom
# "from hidden_info + gift" lineage pointing at concepts that don't exist.
concept.built_from = tuple(
c.id for c in session.ledger
if c.taught_on_turn != 0 and not c.via_generalization and c.id != concept.id
)
session.history.append(
{"who": "system", "text": f"✦ learned: {concept.label}", "gap": None, "kind": "learn"}
)
return concept.label
def reject_candidate(session: GameSession) -> None:
"""Player declined the candidate. Drop it; nothing enters the ledger."""
session.pending_candidate = None
def advance_challenge(session: GameSession) -> bool:
"""Move to the next challenge, resetting the world so its win-predicate is
well-posed. Returns False if the arc is complete."""
if session.challenge_index >= len(CHALLENGES) - 1:
return False
session.challenge_index += 1
session.world = initial_world()
session.won_current = False
session.pending_candidate = None
nxt = current_challenge(session)
session.history.append(
{"who": "system", "text": f"— {nxt.title} —", "gap": None, "kind": "challenge"}
)
return True
def is_arc_complete(session: GameSession) -> bool:
return session.challenge_index >= len(CHALLENGES) - 1 and session.won_current