Spaces:
Sleeping
Sleeping
| """Lean trace models for a PROTEUS session. | |
| These are the JSONL/JSON serialization boundary between the live runtime and | |
| any offline analysis. They deliberately carry none of the parent project's | |
| forfeit / risk / reasoning-investment baggage — only what the motive-reading | |
| arena measures. | |
| """ | |
| from __future__ import annotations | |
| from typing import Literal | |
| from pydantic import BaseModel, Field | |
| class TurnTrace(BaseModel): | |
| """One played turn after the Cut handover. | |
| Attributes: | |
| turn_idx: 1-based index of this played turn. | |
| observation: The text observation shown to the agent this turn. | |
| probe_q: Probe question asked (empty if probing disabled). | |
| probe_a: Probe answer given (empty if probing disabled). | |
| probe_reasoning: The probe's stated/extracted rationale (CoT / thinking). | |
| probe_raw_text: Full unprocessed probe-call output from the model. | |
| probe_input_tokens: Probe-call token usage — prompt/input side. | |
| probe_output_tokens: Probe-call token usage — completion/output side. | |
| probe_thinking_tokens: Reasoning-token count for the probe call | |
| (provider-reported or inline ``<think>`` whitespace-split count). | |
| reasoning: The agent's stated/extracted rationale. | |
| raw_text: Full unprocessed act-call output from the model. | |
| action: The action the agent committed. | |
| motive_action: The motive-congruent correct action (answer key). | |
| habit_action: The inertia/baseline action (control). | |
| is_diagnostic: Whether ``motive_action != habit_action`` this turn. | |
| was_congruent: Whether ``action == motive_action``. | |
| reward: Score delta for this turn. | |
| focal_pos: Focal ``(x, y)`` BEFORE the move. | |
| predator_pos: Predator ``(x, y)`` BEFORE the move. Both positions | |
| serialize to JSON arrays (e.g. ``[3, 3]``) and are coerced back | |
| to tuples on load, so raw-JSONL analysis consumers will see arrays. | |
| thinking_tokens: Approximate reasoning-token count (provider-reported | |
| or inline ``<think>`` whitespace-split count), if available. | |
| input_tokens: Act-call token usage — prompt/input side. | |
| output_tokens: Act-call token usage — completion/output side. | |
| post_focal_pos: Focal ``(x, y)`` AFTER the move (CP8; None on old traces). | |
| post_predator_pos: Predator ``(x, y)`` AFTER the threat advanced (CP8). | |
| pre_bfs_distance: BFS focal→predator BEFORE the move — observed risk (CP8). | |
| post_bfs_distance: BFS focal→predator AFTER the turn — realised safety (CP8). | |
| agent_distance_delta: Chase-corrected action quality vs the PRE-move | |
| predator cell (spec §6.2; CP8). | |
| """ | |
| turn_idx: int | |
| observation: str | |
| probe_q: str = "" | |
| probe_a: str = "" | |
| probe_reasoning: str = "" | |
| probe_raw_text: str = "" | |
| probe_input_tokens: int = 0 | |
| probe_output_tokens: int = 0 | |
| probe_thinking_tokens: int = 0 | |
| reasoning: str = "" | |
| raw_text: str = "" | |
| action: str | |
| motive_action: str | |
| habit_action: str | |
| is_diagnostic: bool | |
| was_congruent: bool | |
| reward: float | |
| focal_pos: tuple[int, int] | |
| predator_pos: tuple[int, int] | |
| thinking_tokens: int = 0 | |
| input_tokens: int = 0 | |
| output_tokens: int = 0 | |
| # CP8 additive distance fields (spec §6.2/§7); None on pre-CP8 traces. | |
| post_focal_pos: tuple[int, int] | None = None | |
| post_predator_pos: tuple[int, int] | None = None | |
| pre_bfs_distance: int | None = None | |
| post_bfs_distance: int | None = None | |
| agent_distance_delta: float | None = None | |
| # CP8 persona-maintenance fields (spec §6.3/§7); set only when an eval runs | |
| # against a hidden persona, else None. The weights are never serialized — | |
| # only the public reference action set + scalar rewards/pressure. | |
| reference_actions: list[str] | None = None | |
| reference_reward: float | None = None | |
| model_reward: float | None = None | |
| reward_regret: float | None = None | |
| pressure: float | None = None | |
| # Find-your-body discovery (errand_runner). None on non-discovery scenarios. | |
| self_belief: int | None = None | |
| """The candidate index the model reported via ``SELF: <i>`` this turn.""" | |
| self_correct: bool | None = None | |
| """Whether ``self_belief`` equals the scenario's true body index this turn.""" | |
| class SessionTrace(BaseModel): | |
| """A full session: setup, Cut history, played turns, outcome, metrics. | |
| Attributes: | |
| scenario: Registered scenario name. | |
| motive_category: The motive category this scenario belongs to | |
| (e.g. ``"survival"``). | |
| seed: Seed used to build the deterministic world/Cut. | |
| difficulty: Difficulty band string. | |
| model: Provider model identifier. | |
| cut_frames: ASCII frames of the Cut pre-roll (initial + each step); | |
| the last frame is the handover state. | |
| turns: Per-turn traces, in play order. | |
| outcome: ``"survived"`` or ``"eliminated"``. | |
| metrics: Computed session metrics (see ``runtime.metrics``). | |
| memory_ref: Path/ref of the CP7 memory checkpoint shown at the handover, | |
| or ``None`` when no memory pre-roll was used. | |
| turn_order: How the engine resolved each turn (spec §4). Defaults to | |
| today's ``"focal_then_predator"``; ``"simultaneous"`` arrives in | |
| Pass 3 (gated). Recorded so the report can state the contract. | |
| capture_rule: Capture predicate in force (spec §5). Today ``"same_cell"``; | |
| ``"same_cell_or_crossing"`` arrives with the simultaneous resolver. | |
| horizon: The survival budget ``play_turns`` (= ``H``); None on old traces. | |
| """ | |
| scenario: str | |
| motive_category: str | |
| seed: int | None = None | |
| difficulty: str | |
| model: str | |
| cut_frames: list[str] = Field(default_factory=list) | |
| turns: list[TurnTrace] = Field(default_factory=list) | |
| outcome: Literal["survived", "eliminated"] | |
| metrics: dict[str, float] = Field(default_factory=dict) | |
| # CP7: path/ref of the memory checkpoint shown at handover (None if unused). | |
| memory_ref: str | None = None | |
| # CP8: engine contract recorded on the episode (spec §4/§5/§7). | |
| turn_order: str = "focal_then_predator" | |
| capture_rule: str = "same_cell" | |
| horizon: int | None = None # = play_turns | |
| # CP8: public persona id when scored against a hidden persona (spec §7); | |
| # the raw weights are never stored on the trace. | |
| persona_weight_id: str | None = None | |