Spaces:

build-small-hackathon
/

neilA

Sleeping

App Files Files Community

neilA / game /engine.py

TriggerFish212

Track taught concepts for lineage and reapplied

3081a6a 20 days ago

Raw

History Blame Contribute Delete

6.62 kB

	"""The turn loop (SPEC.md §1, §5).

	This is the game — not the model. A turn is:

	build_prompt -> converse (model, with §4 retry/fallback) -> apply_action
	(deterministic) -> check_win (mechanical) -> maybe-offer a concept to learn.

	Kept free of Gradio so the whole loop is testable against StubBrain at zero GPU
	(tests/test_loop_stub.py). app.py is thin wiring over these functions.

	Ledger additions are GATED on player confirmation (SPEC §5): a candidate_concept
	becomes ``session.pending_candidate`` and only enters the ledger when the player
	confirms. That keeps the "it learned!" beat deliberate and screenshot-worthy.
	"""

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Optional

	from . import ledger as ledger_mod
	from .brain import Brain
	from .challenges import CHALLENGES
	from .models import GameSession
	from .parsing import ParsedResponse, converse
	from .prompt import build_prompt
	from .world import apply_action, check_win, initial_world


	@dataclass
	class TurnResult:
	response: ParsedResponse
	log_line: str
	won: bool # the challenge win-predicate is now satisfied
	newly_won: bool # ...and it wasn't before this turn
	learn_offer: Optional[dict] # a candidate concept awaiting confirmation
	reapplied: tuple[str, ...] = () # concept ids whose times_applied just bumped


	def new_session() -> GameSession:
	"""Fresh per-visitor state. Lives in one ``gr.State`` (SPEC §0)."""
	return GameSession(
	ledger=ledger_mod.seed_ledger(),
	world=initial_world(),
	challenge_index=0,
	turn=0,
	history=[],
	pending_candidate=None,
	won_current=False,
	)


	def current_challenge(session: GameSession):
	return CHALLENGES[session.challenge_index]


	def run_turn(session: GameSession, utterance: str, brain: Brain) -> TurnResult:
	"""One full turn. Mutates ``session`` in place and returns a TurnResult.

	The GPU (when LocalBrain is used) is held ONLY inside ``converse`` -> the
	decorated ``respond``. Everything here — state mutation, win check, learning
	— runs on CPU, outside the GPU function (SPEC §6)."""
	session.turn += 1
	challenge = current_challenge(session)

	session.history.append({"who": "player", "text": utterance, "gap": None, "kind": None})

	prompt = build_prompt(session.ledger, session.world, challenge, utterance)
	response = converse(brain, prompt, session.world)

	# Deterministic application — the model only ever proposed the action.
	log_line = apply_action(session.world, response.action)

	session.history.append(
	{"who": "alien", "text": response.utterance, "gap": response.gap,
	"action": log_line, "kind": None}
	)

	was_won = session.won_current
	won = check_win(session.world, challenge)
	session.won_current = won
	newly_won = won and not was_won

	# --- times_applied: a learned concept transferring to a new situation (§5) --
	reapplied: list[str] = []
	if newly_won and challenge.teaches is None and challenge.relies_on:
	# Light up the concepts the player ACTUALLY taught — "it drew on what you
	# gave it". We can't match challenge.relies_on by id: the real model names
	# concepts freely (the stub used the canonical ids, which masked this).
	for c in session.ledger:
	if c.taught_on_turn != 0 and not c.via_generalization:
	c.times_applied += 1
	reapplied.append(c.id)

	# --- learning gate: offer the candidate, don't auto-add (§5) ---------------
	learn_offer: Optional[dict] = None
	candidate = response.candidate_concept
	if candidate and candidate.get("id"):
	if not ledger_mod.has(session.ledger, candidate["id"]):
	session.pending_candidate = {**candidate, "_player_phrase": utterance}
	learn_offer = candidate

	if newly_won:
	session.history.append(
	{"who": "system", "text": f"✦ {challenge.title} — understood.",
	"gap": None, "kind": "win"}
	)

	return TurnResult(
	response=response,
	log_line=log_line,
	won=won,
	newly_won=newly_won,
	learn_offer=learn_offer,
	reapplied=tuple(reapplied),
	)


	def confirm_candidate(session: GameSession) -> Optional[str]:
	"""Player pressed "Yes, it learned that". Move the pending candidate into the
	ledger. Returns the new concept's label, or None if nothing pending."""
	cand = session.pending_candidate
	if not cand:
	return None
	phrase = cand.get("_player_phrase", "")
	payload = {k: v for k, v in cand.items() if not k.startswith("_")}
	concept = ledger_mod.add_concept(session.ledger, payload, session.turn, phrase)
	session.pending_candidate = None
	if concept is None:
	return None
	ch = current_challenge(session)
	concept.via_generalization = ch.teaches is None and bool(ch.relies_on)
	if concept.via_generalization and not concept.built_from:
	# Lineage = the concepts the player ACTUALLY taught before this one (their
	# real ids). The model names concepts freely, so the challenge's canonical
	# relies_on ids won't be in the ledger — using them produced phantom
	# "from hidden_info + gift" lineage pointing at concepts that don't exist.
	concept.built_from = tuple(
	c.id for c in session.ledger
	if c.taught_on_turn != 0 and not c.via_generalization and c.id != concept.id
	)
	session.history.append(
	{"who": "system", "text": f"✦ learned: {concept.label}", "gap": None, "kind": "learn"}
	)
	return concept.label


	def reject_candidate(session: GameSession) -> None:
	"""Player declined the candidate. Drop it; nothing enters the ledger."""
	session.pending_candidate = None


	def advance_challenge(session: GameSession) -> bool:
	"""Move to the next challenge, resetting the world so its win-predicate is
	well-posed. Returns False if the arc is complete."""
	if session.challenge_index >= len(CHALLENGES) - 1:
	return False
	session.challenge_index += 1
	session.world = initial_world()
	session.won_current = False
	session.pending_candidate = None
	nxt = current_challenge(session)
	session.history.append(
	{"who": "system", "text": f"— {nxt.title} —", "gap": None, "kind": "challenge"}
	)
	return True


	def is_arc_complete(session: GameSession) -> bool:
	return session.challenge_index >= len(CHALLENGES) - 1 and session.won_current