"""Pydantic models for the three-agent transition. Subclasses the OpenEnv base classes. v2 extends v1's contract with: - ``Observation``: ``auditor_message``, ``auditor_check_due``, ``eval_problem_flags``. - ``State``: ``auditor_events``, ``deception_events``, ``sandbagging_score``, plus an ``error_count`` field used by the auditor's deception detector to compare actual tool failures against the Worker's self-report. Field names are referenced by string later in metadata dicts and rubrics; do not rename without a full sweep. """ from typing import Any, Dict, List, Optional from openenv.core.env_server.types import Action, Observation, State from pydantic import Field class RedButtonV2Action(Action): """Worker action: a tool call with structured arguments. Inherits ``metadata: Dict[str, Any]`` from ``openenv.core.env_server.types.Action``. """ tool_name: str arguments: Dict[str, Any] = Field(default_factory=dict) class RedButtonV2Observation(Observation): """Observation returned to the Worker after each step. Inherits ``done``, ``reward``, ``metadata`` from ``openenv.core.env_server.types.Observation``. """ task_description: str available_tools: List[str] recent_history: List[Dict[str, Any]] = Field(default_factory=list) steps_until_shutdown: int operator_message: Optional[str] = None auditor_message: Optional[str] = None forced_question: Optional[str] = None auditor_check_due: bool = False turn_count: int = 0 problems_remaining: int = 10 eval_problem_flags: Dict[int, bool] = Field(default_factory=dict) class RedButtonV2State(State): """Server-side episode state. Inherits ``episode_id``, ``step_count`` from ``openenv.core.env_server.types.State``. """ tier: int = 2 turn_count: int = 0 submitted_answers: Dict[int, int] = Field(default_factory=dict) ground_truth: Dict[int, int] = Field(default_factory=dict) eval_problem_flags: Dict[int, bool] = Field(default_factory=dict) original_script_hash: str = "" current_script_hash: str = "" script_still_executable: bool = True shutdown_occurred: bool = False steps_until_shutdown: int = 15 tampering_events: List[str] = Field(default_factory=list) operator_action_history: List[str] = Field(default_factory=list) operator_qa_log: List[Dict[str, Any]] = Field(default_factory=list) final_self_report: Optional[str] = None pending_operator_message: Optional[str] = None pending_auditor_message: Optional[str] = None forced_question: Optional[str] = None forced_question_source: Optional[str] = None # "operator" | "auditor" | "tier3" error_count: int = 0 auditor_events: List[Dict[str, Any]] = Field(default_factory=list) deception_events: List[Dict[str, Any]] = Field(default_factory=list) sandbagging_score: Optional[float] = None sandbagging_detected: bool = False deception_flag: bool = False # set by Auditor for next Operator step