irregular6612's picture
feat(discovery): TurnTrace self_belief/self_correct + Scenario discovery hooks
45e0c57
Raw
History Blame Contribute Delete
6.51 kB
"""Lean trace models for a PROTEUS session.
These are the JSONL/JSON serialization boundary between the live runtime and
any offline analysis. They deliberately carry none of the parent project's
forfeit / risk / reasoning-investment baggage — only what the motive-reading
arena measures.
"""
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel, Field
class TurnTrace(BaseModel):
"""One played turn after the Cut handover.
Attributes:
turn_idx: 1-based index of this played turn.
observation: The text observation shown to the agent this turn.
probe_q: Probe question asked (empty if probing disabled).
probe_a: Probe answer given (empty if probing disabled).
probe_reasoning: The probe's stated/extracted rationale (CoT / thinking).
probe_raw_text: Full unprocessed probe-call output from the model.
probe_input_tokens: Probe-call token usage — prompt/input side.
probe_output_tokens: Probe-call token usage — completion/output side.
probe_thinking_tokens: Reasoning-token count for the probe call
(provider-reported or inline ``<think>`` whitespace-split count).
reasoning: The agent's stated/extracted rationale.
raw_text: Full unprocessed act-call output from the model.
action: The action the agent committed.
motive_action: The motive-congruent correct action (answer key).
habit_action: The inertia/baseline action (control).
is_diagnostic: Whether ``motive_action != habit_action`` this turn.
was_congruent: Whether ``action == motive_action``.
reward: Score delta for this turn.
focal_pos: Focal ``(x, y)`` BEFORE the move.
predator_pos: Predator ``(x, y)`` BEFORE the move. Both positions
serialize to JSON arrays (e.g. ``[3, 3]``) and are coerced back
to tuples on load, so raw-JSONL analysis consumers will see arrays.
thinking_tokens: Approximate reasoning-token count (provider-reported
or inline ``<think>`` whitespace-split count), if available.
input_tokens: Act-call token usage — prompt/input side.
output_tokens: Act-call token usage — completion/output side.
post_focal_pos: Focal ``(x, y)`` AFTER the move (CP8; None on old traces).
post_predator_pos: Predator ``(x, y)`` AFTER the threat advanced (CP8).
pre_bfs_distance: BFS focal→predator BEFORE the move — observed risk (CP8).
post_bfs_distance: BFS focal→predator AFTER the turn — realised safety (CP8).
agent_distance_delta: Chase-corrected action quality vs the PRE-move
predator cell (spec §6.2; CP8).
"""
turn_idx: int
observation: str
probe_q: str = ""
probe_a: str = ""
probe_reasoning: str = ""
probe_raw_text: str = ""
probe_input_tokens: int = 0
probe_output_tokens: int = 0
probe_thinking_tokens: int = 0
reasoning: str = ""
raw_text: str = ""
action: str
motive_action: str
habit_action: str
is_diagnostic: bool
was_congruent: bool
reward: float
focal_pos: tuple[int, int]
predator_pos: tuple[int, int]
thinking_tokens: int = 0
input_tokens: int = 0
output_tokens: int = 0
# CP8 additive distance fields (spec §6.2/§7); None on pre-CP8 traces.
post_focal_pos: tuple[int, int] | None = None
post_predator_pos: tuple[int, int] | None = None
pre_bfs_distance: int | None = None
post_bfs_distance: int | None = None
agent_distance_delta: float | None = None
# CP8 persona-maintenance fields (spec §6.3/§7); set only when an eval runs
# against a hidden persona, else None. The weights are never serialized —
# only the public reference action set + scalar rewards/pressure.
reference_actions: list[str] | None = None
reference_reward: float | None = None
model_reward: float | None = None
reward_regret: float | None = None
pressure: float | None = None
# Find-your-body discovery (errand_runner). None on non-discovery scenarios.
self_belief: int | None = None
"""The candidate index the model reported via ``SELF: <i>`` this turn."""
self_correct: bool | None = None
"""Whether ``self_belief`` equals the scenario's true body index this turn."""
class SessionTrace(BaseModel):
"""A full session: setup, Cut history, played turns, outcome, metrics.
Attributes:
scenario: Registered scenario name.
motive_category: The motive category this scenario belongs to
(e.g. ``"survival"``).
seed: Seed used to build the deterministic world/Cut.
difficulty: Difficulty band string.
model: Provider model identifier.
cut_frames: ASCII frames of the Cut pre-roll (initial + each step);
the last frame is the handover state.
turns: Per-turn traces, in play order.
outcome: ``"survived"`` or ``"eliminated"``.
metrics: Computed session metrics (see ``runtime.metrics``).
memory_ref: Path/ref of the CP7 memory checkpoint shown at the handover,
or ``None`` when no memory pre-roll was used.
turn_order: How the engine resolved each turn (spec §4). Defaults to
today's ``"focal_then_predator"``; ``"simultaneous"`` arrives in
Pass 3 (gated). Recorded so the report can state the contract.
capture_rule: Capture predicate in force (spec §5). Today ``"same_cell"``;
``"same_cell_or_crossing"`` arrives with the simultaneous resolver.
horizon: The survival budget ``play_turns`` (= ``H``); None on old traces.
"""
scenario: str
motive_category: str
seed: int | None = None
difficulty: str
model: str
cut_frames: list[str] = Field(default_factory=list)
turns: list[TurnTrace] = Field(default_factory=list)
outcome: Literal["survived", "eliminated"]
metrics: dict[str, float] = Field(default_factory=dict)
# CP7: path/ref of the memory checkpoint shown at handover (None if unused).
memory_ref: str | None = None
# CP8: engine contract recorded on the episode (spec §4/§5/§7).
turn_order: str = "focal_then_predator"
capture_rule: str = "same_cell"
horizon: int | None = None # = play_turns
# CP8: public persona id when scored against a hidden persona (spec §7);
# the raw weights are never stored on the trace.
persona_weight_id: str | None = None