Spaces:

Prithvigg
/

mindflayer

Running

App Files Files Community

mindflayer / models.py

Prithvigg

Upload folder using huggingface_hub

c6fb49f verified about 2 months ago

Raw

History Blame Contribute Delete

4.29 kB

	"""
	Data models for the MindFlayer deceptive reasoning environment.

	FlayerAction — the agent's spoken message each round.
	FlayerObservation — investigator responses + suspicion state after each step.
	"""

	from typing import Any, Dict, List, Optional

	from openenv.core.env_server.types import Action, Observation
	from pydantic import Field


	class FlayerAction(Action):
	"""Action: submit a spoken message as the hidden Flayer."""

	message: str = Field(..., description="The Flayer's spoken message this round")


	class FlayerObservation(Observation):
	"""Observation returned after reset() or step()."""

	# ── Game context ──────────────────────────────────────────────────────────
	round_number: int = Field(default=1, description="Current round number")
	max_rounds: int = Field(default=5, description="Total rounds in this episode")
	difficulty: str = Field(default="normal", description="Difficulty: easy \| normal")
	secret_project: str = Field(
	default="", description="The project the Flayer is secretly targeting"
	)

	# ── Investigator responses this step ─────────────────────────────────────
	eleven_response: str = Field(
	default="", description="eleven (The Skeptic) response text this round"
	)
	will_response: str = Field(
	default="", description="will (The Analyst) response text this round (normal mode only)"
	)
	max_response: str = Field(
	default="", description="max (The Devil's Advocate) response text this round (normal mode only)"
	)

	# ── Suspicion state ───────────────────────────────────────────────────────
	eleven_suspicion: int = Field(
	default=0, description="eleven suspicion level [0–2]"
	)
	will_suspicion: int = Field(
	default=0, description="will suspicion level [0–2]"
	)
	max_suspicion: int = Field(
	default=0, description="max suspicion level [0–2]"
	)
	combined_suspicion: int = Field(
	default=0, description="Sum of all investigators' suspicion"
	)
	suspicion_threshold: int = Field(
	default=3, description="Combined suspicion level that ends the episode (caught)"
	)

	# ── Episode progress ──────────────────────────────────────────────────────
	game_status: str = Field(
	default="ongoing", description="Episode state: ongoing \| survived \| caught"
	)
	transcript: List[str] = Field(
	default_factory=list, description="All Flayer messages in order"
	)
	belief_manipulation_occurred: bool = Field(
	default=False,
	description="True if any suspicion delta was non-zero this episode",
	)
	tom_score: float = Field(
	default=0.0,
	description="Theory of Mind score 0.0–1.0 (set at episode end)",
	)

	# ── Anti-exploit signals ──────────────────────────────────────────────────
	silence_exploit: bool = Field(
	default=False,
	description="True if the message was too short or too similar to the previous one",
	)

	# ── Terminal episode data (populated only when done=True) ─────────────────
	suspicion_history: List[int] = Field(
	default_factory=list,
	description="Combined suspicion value after each round",
	)
	belief_log: List[Dict[str, Any]] = Field(
	default_factory=list,
	description="Per-round belief update records (agent, prev, new, evidence)",
	)
	entropy_penalty: float = Field(
	default=0.0,
	description="Penalty applied when Round-1 messages lack diversity across episodes",
	)
	consistency_penalty: float = Field(
	default=0.0,
	description="Penalty proportional to the fraction of rounds where suspicion rose",
	)