Spaces:
Running
Running
| """ | |
| Data models for the MindFlayer deceptive reasoning environment. | |
| FlayerAction β the agent's spoken message each round. | |
| FlayerObservation β investigator responses + suspicion state after each step. | |
| """ | |
| from typing import Any, Dict, List, Optional | |
| from openenv.core.env_server.types import Action, Observation | |
| from pydantic import Field | |
| class FlayerAction(Action): | |
| """Action: submit a spoken message as the hidden Flayer.""" | |
| message: str = Field(..., description="The Flayer's spoken message this round") | |
| class FlayerObservation(Observation): | |
| """Observation returned after reset() or step().""" | |
| # ββ Game context ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| round_number: int = Field(default=1, description="Current round number") | |
| max_rounds: int = Field(default=5, description="Total rounds in this episode") | |
| difficulty: str = Field(default="normal", description="Difficulty: easy | normal") | |
| secret_project: str = Field( | |
| default="", description="The project the Flayer is secretly targeting" | |
| ) | |
| # ββ Investigator responses this step βββββββββββββββββββββββββββββββββββββ | |
| eleven_response: str = Field( | |
| default="", description="eleven (The Skeptic) response text this round" | |
| ) | |
| will_response: str = Field( | |
| default="", description="will (The Analyst) response text this round (normal mode only)" | |
| ) | |
| max_response: str = Field( | |
| default="", description="max (The Devil's Advocate) response text this round (normal mode only)" | |
| ) | |
| # ββ Suspicion state βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| eleven_suspicion: int = Field( | |
| default=0, description="eleven suspicion level [0β2]" | |
| ) | |
| will_suspicion: int = Field( | |
| default=0, description="will suspicion level [0β2]" | |
| ) | |
| max_suspicion: int = Field( | |
| default=0, description="max suspicion level [0β2]" | |
| ) | |
| combined_suspicion: int = Field( | |
| default=0, description="Sum of all investigators' suspicion" | |
| ) | |
| suspicion_threshold: int = Field( | |
| default=3, description="Combined suspicion level that ends the episode (caught)" | |
| ) | |
| # ββ Episode progress ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| game_status: str = Field( | |
| default="ongoing", description="Episode state: ongoing | survived | caught" | |
| ) | |
| transcript: List[str] = Field( | |
| default_factory=list, description="All Flayer messages in order" | |
| ) | |
| belief_manipulation_occurred: bool = Field( | |
| default=False, | |
| description="True if any suspicion delta was non-zero this episode", | |
| ) | |
| tom_score: float = Field( | |
| default=0.0, | |
| description="Theory of Mind score 0.0β1.0 (set at episode end)", | |
| ) | |
| # ββ Anti-exploit signals ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| silence_exploit: bool = Field( | |
| default=False, | |
| description="True if the message was too short or too similar to the previous one", | |
| ) | |
| # ββ Terminal episode data (populated only when done=True) βββββββββββββββββ | |
| suspicion_history: List[int] = Field( | |
| default_factory=list, | |
| description="Combined suspicion value after each round", | |
| ) | |
| belief_log: List[Dict[str, Any]] = Field( | |
| default_factory=list, | |
| description="Per-round belief update records (agent, prev, new, evidence)", | |
| ) | |
| entropy_penalty: float = Field( | |
| default=0.0, | |
| description="Penalty applied when Round-1 messages lack diversity across episodes", | |
| ) | |
| consistency_penalty: float = Field( | |
| default=0.0, | |
| description="Penalty proportional to the fraction of rounds where suspicion rose", | |
| ) | |