Spaces:

openenv-community
/

FATHOM-DM

Runtime error

App Files Files Community

aarushgupta commited on Mar 8

Commit

2803d7e

verified ·

1 Parent(s): fb7a916

Deploy FATHOM-DM Space bundle

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +19 -0
README.md +15 -6
agents/__init__.py +2 -0
agents/hero/__init__.py +58 -0
agents/hero/__main__.py +78 -0
agents/hero/cli.py +115 -0
agents/hero/env.py +450 -0
agents/hero/policy.py +157 -0
agents/hero/prompt.py +134 -0
agents/hero/runner.py +92 -0
agents/hero/schema.py +103 -0
agents/loop/__init__.py +11 -0
agents/loop/__main__.py +92 -0
agents/loop/runner.py +253 -0
agents/loop/schema.py +64 -0
agents/master/__init__.py +15 -0
agents/master/__main__.py +5 -0
agents/master/base.py +84 -0
agents/master/build.py +287 -0
agents/master/check.py +435 -0
agents/master/env.py +236 -0
agents/master/graph.py +87 -0
agents/master/interface.py +831 -0
agents/master/logic.py +92 -0
agents/master/main.py +72 -0
agents/master/play.py +70 -0
agents/master/policy.py +147 -0
agents/master/prompt.py +371 -0
agents/master/quest.py +418 -0
agents/master/sample.py +499 -0
agents/master/schema.py +316 -0
agents/master/server.py +370 -0
agents/master/session.py +484 -0
agents/master/snapshots.py +308 -0
agents/master/templates.py +44 -0
agents/openenv_server/__init__.py +2 -0
agents/openenv_server/__main__.py +72 -0
agents/shared/__init__.py +43 -0
agents/shared/llm_client.py +415 -0
agents/shared/model_schema.py +14 -0
agents/shared/openenv_compat.py +125 -0
agents/shared/runtime.py +165 -0
agents/spaces/__init__.py +13 -0
agents/spaces/dm_space.py +194 -0
agents/spaces/hero_space.py +271 -0
agents/train/__init__.py +2 -0
agents/train/__main__.py +361 -0
agents/train/grpo.py +0 -0
agents/train/joint.py +278 -0
pyproject.toml +63 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends build-essential git curl \
+    && rm -rf /var/lib/apt/lists/*
+COPY . /app
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir .
+EXPOSE 8000
+CMD ["uvicorn", "agents.spaces.dm_space:create_app", "--factory", "--host", "0.0.0.0", "--port", "8000"]

README.md CHANGED Viewed

@@ -1,10 +1,19 @@
 ---
-title: FATHOM DM
-emoji: 🏃
-colorFrom: yellow
-colorTo: blue
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DND-DM
 sdk: docker
+app_port: 8000
+tags:
+  - openenv
+  - dnd
+  - textworld
 ---
+# DND-DM
+This Space hosts the CPU-only `DND-DM` environment.
+- OpenEnv API: `/env`
+- Health check: `/healthz`
+- Latest normalized world output: `/world-output/latest`
+`DND-DM` evaluates submitted world definitions. It does not generate worlds by itself.

agents/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Agent environments for the dungeon project."""
2	+

agents/hero/__init__.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Hero agent environment and runner primitives."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+__all__ = [
+    "HeroEnvironment",
+    "HeroLLMPolicy",
+    "HeroObservation",
+    "HeroPolicy",
+    "HeroPolicyError",
+    "HeroRunner",
+    "HeroServerAction",
+    "HeroState",
+    "HeroTraceEvent",
+    "ScriptedToolCallingPolicy",
+    "ToolCallingPolicy",
+]
+if TYPE_CHECKING:
+    from .env import HeroEnvironment
+    from .policy import HeroLLMPolicy, HeroPolicy, HeroPolicyError, HeroTraceEvent
+    from .runner import HeroRunner, ScriptedToolCallingPolicy, ToolCallingPolicy
+    from .schema import HeroObservation, HeroServerAction, HeroState
+def __getattr__(name: str) -> Any:
+    if name == "HeroEnvironment":
+        from .env import HeroEnvironment
+        return HeroEnvironment
+    if name in {"HeroLLMPolicy", "HeroPolicy", "HeroPolicyError", "HeroTraceEvent"}:
+        from .policy import HeroLLMPolicy, HeroPolicy, HeroPolicyError, HeroTraceEvent
+        return {
+            "HeroLLMPolicy": HeroLLMPolicy,
+            "HeroPolicy": HeroPolicy,
+            "HeroPolicyError": HeroPolicyError,
+            "HeroTraceEvent": HeroTraceEvent,
+        }[name]
+    if name in {"HeroRunner", "ScriptedToolCallingPolicy", "ToolCallingPolicy"}:
+        from .runner import HeroRunner, ScriptedToolCallingPolicy, ToolCallingPolicy
+        return {
+            "HeroRunner": HeroRunner,
+            "ScriptedToolCallingPolicy": ScriptedToolCallingPolicy,
+            "ToolCallingPolicy": ToolCallingPolicy,
+        }[name]
+    if name in {"HeroObservation", "HeroServerAction", "HeroState"}:
+        from .schema import HeroObservation, HeroServerAction, HeroState
+        return {
+            "HeroObservation": HeroObservation,
+            "HeroServerAction": HeroServerAction,
+            "HeroState": HeroState,
+        }[name]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

agents/hero/__main__.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from agents.master.sample import load_world
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+from .env import HeroEnvironment
+def _manual_action(raw: str) -> dict[str, object]:
+    if raw == "/read":
+        return {"tool": "scratchpad_read"}
+    if raw.startswith("/write append "):
+        return {"tool": "scratchpad_write", "mode": "append", "content": raw[len("/write append ") :]}
+    if raw.startswith("/write replace "):
+        return {"tool": "scratchpad_write", "mode": "replace", "content": raw[len("/write replace ") :]}
+    return {"tool": "act", "command": raw}
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Local hero environment smoke runner")
+    parser.add_argument("mode", choices=["manual", "scripted"])
+    parser.add_argument("world", help="Path to a world-definition JSON file.")
+    parser.add_argument("--actions", help="JSON file containing a list of hero action objects.")
+    parser.add_argument("--debug", action="store_true")
+    parser.add_argument("--interface-model")
+    parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite observations into a corporate app metaphor and translate parser-safe corporate commands back through Gemini.",
+    )
+    args = parser.parse_args(argv)
+    world = load_world(args.world)
+    interface_adapter = build_interface_adapter(
+        resolve_interface_config(
+            model_name=args.interface_model,
+            translation_mode="corporate_app" if args.translate_corporate_env else None,
+        )
+    )
+    env = HeroEnvironment(debug=args.debug, interface_adapter=interface_adapter)
+    observation = env.reset(world)
+    print(observation.message)
+    if args.mode == "scripted":
+        if not args.actions:
+            parser.error("--actions is required for scripted mode.")
+        actions = json.loads(Path(args.actions).read_text(encoding="utf-8"))
+        for action in actions:
+            result = env.step(action)
+            print(result.observation.message)
+            if result.done:
+                print(json.dumps(result.observation.model_dump(), indent=2))
+                return 0
+        print(json.dumps(env.state.model_dump(), indent=2))
+        return 0
+    while not observation.done:
+        try:
+            raw = input("hero> ").strip()
+        except EOFError:
+            print()
+            return 0
+        if raw in {"quit", "exit"}:
+            return 0
+        result = env.step(_manual_action(raw))
+        observation = result.observation
+        print(observation.message)
+        if result.done:
+            print(json.dumps(observation.model_dump(), indent=2))
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/hero/cli.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from agents.master.base import SUPPORTED_DIRECTIONS
+_TOKEN_RE = re.compile(r"^[a-z0-9]+(?: [a-z0-9]+)*$")
+_BANNED_OBJECT_TOKENS = {"a", "an", "the"}
+@dataclass(frozen=True)
+class CliCommandAst:
+    kind: str
+    normalized_command: str
+    arguments: tuple[str, ...] = ()
+@dataclass(frozen=True)
+class CliCommandParseResult:
+    valid: bool
+    normalized_command: str | None = None
+    ast: CliCommandAst | None = None
+    error: str | None = None
+def parse_cli_command(raw_command: str) -> CliCommandParseResult:
+    normalized = normalize_cli_command(raw_command)
+    if not normalized:
+        return CliCommandParseResult(valid=False, error="Command must not be empty.")
+    if normalized in {"look", "inventory", "wait"}:
+        return _ok(normalized, normalized)
+    if normalized in SUPPORTED_DIRECTIONS:
+        return _ok("move", f"go {normalized}", normalized)
+    if normalized.startswith("go "):
+        direction = normalized[3:].strip()
+        if direction in SUPPORTED_DIRECTIONS:
+            return _ok("move", f"go {direction}", direction)
+        return CliCommandParseResult(valid=False, error="Unknown direction.")
+    if match := re.fullmatch(r"look in (?P<object>.+)", normalized):
+        object_text = match.group("object").strip()
+        return _object_result("look_in", normalized, object_text)
+    if match := re.fullmatch(r"take (?P<object>.+) from (?P<source>.+)", normalized):
+        return _two_object_result("take_from", normalized, match.group("object"), match.group("source"))
+    one_target_patterns = {
+        "open": r"open (?P<object>.+)",
+        "read": r"read (?P<object>.+)",
+        "talk": r"talk (?P<object>.+)",
+        "examine": r"examine (?P<object>.+)",
+    }
+    for kind, pattern in one_target_patterns.items():
+        if match := re.fullmatch(pattern, normalized):
+            object_text = match.group("object").strip()
+            return _object_result(kind, normalized, object_text)
+    if match := re.fullmatch(r"take (?P<object>.+)", normalized):
+        object_text = match.group("object").strip()
+        return _object_result("take", normalized, object_text)
+    if match := re.fullmatch(r"unlock (?P<object>.+) with (?P<tool>.+)", normalized):
+        return _two_object_result("unlock", normalized, match.group("object"), match.group("tool"))
+    if match := re.fullmatch(r"use (?P<object>.+) on (?P<target>.+)", normalized):
+        return _two_object_result("use", normalized, match.group("object"), match.group("target"))
+    if match := re.fullmatch(r"combine (?P<object>.+) with (?P<target>.+)", normalized):
+        return _two_object_result("combine", normalized, match.group("object"), match.group("target"))
+    if match := re.fullmatch(r"give (?P<object>.+) to (?P<target>.+)", normalized):
+        return _two_object_result("give", normalized, match.group("object"), match.group("target"))
+    if match := re.fullmatch(r"submit (?P<answer>[a-z0-9]+(?: [a-z0-9]+)*)", normalized):
+        answer = match.group("answer").strip()
+        return _ok("submit", normalized, answer)
+    return CliCommandParseResult(valid=False, error="Command does not match the strict CLI grammar.")
+def normalize_cli_command(raw_command: str) -> str:
+    return re.sub(r"\s+", " ", raw_command.strip().lower())
+def _object_result(kind: str, normalized_command: str, object_text: str) -> CliCommandParseResult:
+    object_error = _validate_object_text(object_text)
+    if object_error is not None:
+        return CliCommandParseResult(valid=False, error=object_error)
+    return _ok(kind, normalized_command, object_text)
+def _two_object_result(kind: str, normalized_command: str, first: str, second: str) -> CliCommandParseResult:
+    first_error = _validate_object_text(first)
+    if first_error is not None:
+        return CliCommandParseResult(valid=False, error=first_error)
+    second_error = _validate_object_text(second)
+    if second_error is not None:
+        return CliCommandParseResult(valid=False, error=second_error)
+    return _ok(kind, normalized_command, first.strip(), second.strip())
+def _validate_object_text(value: str) -> str | None:
+    candidate = value.strip()
+    if not candidate:
+        return "Command target must not be empty."
+    if not _TOKEN_RE.fullmatch(candidate):
+        return "Command targets must use lowercase letters, numbers, and spaces only."
+    if any(token in _BANNED_OBJECT_TOKENS for token in candidate.split()):
+        return "Strict CLI commands must use exact parser-safe object names without articles."
+    return None
+def _ok(kind: str, normalized_command: str, *arguments: str) -> CliCommandParseResult:
+    return CliCommandParseResult(
+        valid=True,
+        normalized_command=normalized_command,
+        ast=CliCommandAst(kind=kind, normalized_command=normalized_command, arguments=arguments),
+    )

agents/hero/env.py ADDED Viewed

	@@ -0,0 +1,450 @@

+from __future__ import annotations
+from collections import deque
+from pathlib import Path
+from typing import Any
+from agents.master.base import DMInterfaceError, MAX_STEP_MULTIPLIER
+from agents.master.build import WorldCompiler
+from agents.master.interface import InterfaceAdapter, StrictCliInterfaceAdapter
+from agents.master.schema import CompiledWorld, WorldDefinition
+from agents.master.session import EpisodeSession
+from agents.shared.openenv_compat import Environment, StepResult, build_step_result
+from .cli import parse_cli_command
+from .schema import (
+    ActAction,
+    HeroAction,
+    HeroAuxSignals,
+    HeroEpisodeStats,
+    HeroObservation,
+    HeroRewardBreakdown,
+    HeroState,
+    ScratchpadReadAction,
+    ScratchpadWriteAction,
+    validate_hero_action,
+)
+_DENSE_PROGRESS_SCALE = 0.30
+_SYNTAX_PENALTY = -0.02
+_INVALID_ACTION_PENALTY = -0.02
+_REPEAT_NOOP_PENALTY = -0.01
+_WRONG_SUBMIT_PENALTY = -0.10
+class HeroEnvironment(Environment[HeroAction, HeroObservation, HeroState]):
+    def __init__(
+        self,
+        *,
+        artifacts_root: Path | None = None,
+        world_input: CompiledWorld | WorldDefinition | dict[str, Any] | None = None,
+        session: EpisodeSession | None = None,
+        interface_adapter: InterfaceAdapter | None = None,
+        model: str = "",
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int = 8000,
+        debug: bool = False,
+    ) -> None:
+        super().__init__()
+        self.compiler = WorldCompiler(artifacts_root=artifacts_root)
+        self._initial_world_input = world_input
+        self._provided_session = session
+        self._provided_interface_adapter = interface_adapter
+        self.model = model
+        self._default_max_game_steps = max_game_steps
+        self._default_max_tool_calls = max_tool_calls
+        self.scratchpad_max_chars = scratchpad_max_chars
+        self.debug = debug
+        self._state = HeroState()
+        self._compiled: CompiledWorld | None = None
+        self._session: EpisodeSession | None = None
+        self._scratchpad = ""
+        self._max_game_steps = 0
+        self._max_tool_calls = 0
+        self._debug_dir: Path | None = None
+        self._episode_stats = HeroEpisodeStats()
+        self._recent_noop_signatures: deque[tuple[str, str, str]] = deque(maxlen=3)
+    @classmethod
+    def from_session(
+        cls,
+        session: EpisodeSession,
+        *,
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int = 8000,
+        debug: bool = False,
+    ) -> "HeroEnvironment":
+        return cls(
+            session=session,
+            max_game_steps=max_game_steps,
+            max_tool_calls=max_tool_calls,
+            scratchpad_max_chars=scratchpad_max_chars,
+            debug=debug,
+        )
+    def reset(
+        self,
+        world_input: CompiledWorld | WorldDefinition | dict[str, Any] | None = None,
+        *,
+        seed: int | None = None,
+        episode_id: str | None = None,
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int | None = None,
+        debug: bool | None = None,
+    ) -> HeroObservation:
+        del seed, episode_id
+        if debug is not None:
+            self.debug = debug
+        if scratchpad_max_chars is not None:
+            self.scratchpad_max_chars = scratchpad_max_chars
+        self._scratchpad = ""
+        self._episode_stats = HeroEpisodeStats()
+        self._recent_noop_signatures.clear()
+        if self._provided_session is not None:
+            self._session = self._provided_session
+            self._compiled = self._session.compiled
+        else:
+            selected_world = world_input if world_input is not None else self._initial_world_input
+            if selected_world is None:
+                raise ValueError("HeroEnvironment.reset requires a compiled world, world definition, or live session.")
+            self._compiled = (
+                selected_world
+                if isinstance(selected_world, CompiledWorld)
+                else self.compiler.compile(selected_world)
+            )
+            adapter = self._provided_interface_adapter or StrictCliInterfaceAdapter()
+            self._session = EpisodeSession(self._compiled, interface_adapter=adapter)
+        self._max_game_steps = max_game_steps or self._default_max_game_steps or max(
+            1, len(self._compiled.solver_policy) * MAX_STEP_MULTIPLIER
+        )
+        self._max_tool_calls = max_tool_calls or self._default_max_tool_calls or (self._max_game_steps * 4)
+        self._state = HeroState(
+            episode_id=self._compiled.episode_id,
+            step_count=0,
+            game_steps_taken=self._session.steps_taken,
+            tool_calls_total=0,
+            max_game_steps=self._max_game_steps,
+            max_tool_calls=self._max_tool_calls,
+            game_steps_remaining=max(0, self._max_game_steps - self._session.steps_taken),
+            tool_calls_remaining=self._max_tool_calls,
+            status="running",
+            world_title=self._compiled.world.meta.title,
+            last_command=None,
+            scratchpad_chars=0,
+        )
+        self._prepare_debug_dir()
+        reward_breakdown = self._empty_breakdown(self._progress_potential())
+        observation = self._apply_transform(
+            HeroObservation(
+                message=self._session.current_feedback(),
+                reward=0.0,
+                done=False,
+                won=None,
+                reward_breakdown=reward_breakdown,
+                aux_signals=self._progress_signals(),
+            )
+        )
+        return observation
+    def step(  # type: ignore[override]
+        self,
+        action: HeroAction | dict[str, object],
+        timeout_s: float | None = None,
+        **kwargs: Any,
+    ) -> StepResult[HeroObservation]:
+        del timeout_s, kwargs
+        if self._session is None or self._compiled is None:
+            raise RuntimeError("HeroEnvironment.reset must be called before step().")
+        if self._state.status != "running":
+            observation = HeroObservation(
+                message="",
+                reward=1.0 if self._state.status == "won" else 0.0,
+                done=True,
+                won=self._state.status == "won",
+                terminal_reason="episode_complete",
+                reward_breakdown=self._empty_breakdown(self._progress_potential()),
+                aux_signals=self._progress_signals(),
+            )
+            return build_step_result(self._apply_transform(observation))
+        parsed = validate_hero_action(action)
+        self._state.tool_calls_total += 1
+        self._state.step_count = self._state.tool_calls_total
+        self._update_remaining_counters()
+        if isinstance(parsed, ScratchpadReadAction):
+            observation = self._observation(
+                message=self._scratchpad,
+                tool=parsed.tool,
+                tool_success=True,
+                reward_breakdown=self._empty_breakdown(self._progress_potential()),
+            )
+            return build_step_result(observation)
+        if isinstance(parsed, ScratchpadWriteAction):
+            observation = self._handle_scratchpad_write(parsed)
+            return build_step_result(observation)
+        observation = self._handle_act(parsed)
+        return build_step_result(observation)
+    @property
+    def state(self) -> HeroState:
+        return self._state
+    @property
+    def scratchpad(self) -> str:
+        return self._scratchpad
+    @property
+    def session(self) -> EpisodeSession | None:
+        return self._session
+    @property
+    def episode_stats(self) -> HeroEpisodeStats:
+        return self._episode_stats
+    def _handle_scratchpad_write(self, action: ScratchpadWriteAction) -> HeroObservation:
+        new_value = (
+            self._scratchpad + action.content
+            if action.mode == "append"
+            else action.content
+        )
+        if len(new_value) > self.scratchpad_max_chars:
+            return self._observation(
+                message="Scratchpad write rejected: notebook size limit exceeded.",
+                tool=action.tool,
+                tool_success=False,
+                reward_breakdown=self._empty_breakdown(self._progress_potential()),
+            )
+        self._scratchpad = new_value
+        self._state.scratchpad_chars = len(self._scratchpad)
+        self._persist_debug_scratchpad()
+        return self._observation(
+            message="Scratchpad updated.",
+            tool=action.tool,
+            tool_success=True,
+            reward_breakdown=self._empty_breakdown(self._progress_potential()),
+        )
+    def _handle_act(self, action: ActAction) -> HeroObservation:
+        assert self._session is not None
+        parsed_command = parse_cli_command(action.command)
+        self._state.last_command = parsed_command.normalized_command or action.command
+        if not parsed_command.valid or parsed_command.normalized_command is None:
+            breakdown = self._empty_breakdown(self._progress_potential())
+            breakdown.syntax_penalty = _SYNTAX_PENALTY
+            return self._observation(
+                message=parsed_command.error or "That command does not match the strict CLI grammar.",
+                tool=action.tool,
+                tool_success=False,
+                reward_breakdown=breakdown,
+            )
+        potential_before = self._progress_potential()
+        fingerprint_before = self._session.state_fingerprint()
+        room_before = self._session.current_room_id
+        try:
+            turn = self._session.step(parsed_command.normalized_command)
+        except DMInterfaceError:
+            breakdown = self._empty_breakdown(potential_before)
+            breakdown.syntax_penalty = _SYNTAX_PENALTY
+            return self._observation(
+                message="The interface could not interpret that action.",
+                tool=action.tool,
+                tool_success=False,
+                reward_breakdown=breakdown,
+            )
+        tool_success = self._turn_succeeded(turn.game_state_delta)
+        self._state.game_steps_taken = self._session.steps_taken
+        self._session.recent_normalized_commands.append(parsed_command.normalized_command)
+        potential_after = self._progress_potential()
+        breakdown = self._empty_breakdown(potential_before)
+        breakdown.progress_potential_after = potential_after
+        breakdown.dense_progress_reward = _DENSE_PROGRESS_SCALE * max(0.0, potential_after - potential_before)
+        if not tool_success:
+            breakdown.invalid_action_penalty = _INVALID_ACTION_PENALTY
+        if self._is_wrong_submit(turn.game_state_delta):
+            breakdown.wrong_submit_penalty = _WRONG_SUBMIT_PENALTY
+        if self._repeat_noop(parsed_command.normalized_command, fingerprint_before, room_before):
+            breakdown.repeat_noop_penalty = _REPEAT_NOOP_PENALTY
+        return self._observation(
+            message=turn.observation,
+            tool=action.tool,
+            tool_success=tool_success,
+            reward_breakdown=breakdown,
+        )
+    def _update_remaining_counters(self) -> None:
+        self._state.game_steps_remaining = max(0, self._max_game_steps - self._state.game_steps_taken)
+        self._state.tool_calls_remaining = max(0, self._max_tool_calls - self._state.tool_calls_total)
+    def _turn_succeeded(self, delta: dict[str, Any]) -> bool:
+        if delta.get("wrapper") == "submit_rejected":
+            return False
+        if "succeeded" in delta:
+            return bool(delta["succeeded"])
+        return True
+    def _observation(
+        self,
+        *,
+        message: str,
+        tool: str,
+        tool_success: bool,
+        reward_breakdown: HeroRewardBreakdown,
+    ) -> HeroObservation:
+        assert self._session is not None
+        done = False
+        won: bool | None = None
+        terminal_reason: str | None = None
+        if self._session.player_won:
+            self._state.status = "won"
+            done = True
+            won = True
+            reward_breakdown.base_terminal_reward = 1.0
+        elif self._session.done:
+            self._state.status = "lost"
+            done = True
+            won = False
+            terminal_reason = "session_ended"
+        elif self._state.game_steps_taken >= self._max_game_steps:
+            self._state.status = "timed_out"
+            done = True
+            won = False
+            terminal_reason = "game_step_budget_exhausted"
+        elif self._state.tool_calls_total >= self._max_tool_calls:
+            self._state.status = "timed_out"
+            done = True
+            won = False
+            terminal_reason = "tool_budget_exhausted"
+        reward_breakdown.total_reward = (
+            reward_breakdown.base_terminal_reward
+            + reward_breakdown.dense_progress_reward
+            + reward_breakdown.syntax_penalty
+            + reward_breakdown.invalid_action_penalty
+            + reward_breakdown.repeat_noop_penalty
+            + reward_breakdown.wrong_submit_penalty
+        )
+        self._update_remaining_counters()
+        aux_signals = self._progress_signals()
+        self._accumulate_episode_stats(reward_breakdown, won is True)
+        observation = self._apply_transform(
+            HeroObservation(
+                message=message,
+                reward=reward_breakdown.total_reward,
+                done=done,
+                won=won,
+                tool=tool,
+                tool_success=tool_success,
+                terminal_reason=terminal_reason,
+                reward_breakdown=reward_breakdown,
+                aux_signals=aux_signals,
+            )
+        )
+        return observation
+    def _prepare_debug_dir(self) -> None:
+        if not self.debug or self._compiled is None:
+            self._debug_dir = None
+            return
+        self._debug_dir = self._compiled.artifacts_dir / "hero_debug"
+        self._debug_dir.mkdir(parents=True, exist_ok=True)
+        self._persist_debug_scratchpad()
+    def _persist_debug_scratchpad(self) -> None:
+        if self._debug_dir is None:
+            return
+        (self._debug_dir / "scratchpad.txt").write_text(self._scratchpad, encoding="utf-8")
+    def _progress_signals(self) -> HeroAuxSignals:
+        assert self._session is not None
+        assert self._compiled is not None
+        room_ids = {node.id for node in self._compiled.world.nodes if node.type in {"location", "junction"}}
+        total_locked_doors = {
+            edge.door_node_id
+            for edge in self._compiled.world.edges
+            if edge.type == "locked_passage" and edge.door_node_id
+        }
+        total_clues = {clue.id for clue in self._compiled.world.clues}
+        answer_ready = float(
+            bool(total_clues)
+            and self._session.consulted_guardian
+            and self._session.discovered_clues == total_clues
+        )
+        return HeroAuxSignals(
+            visited_room_progress=_fraction(len(self._session.visited_nodes & room_ids), len(room_ids)),
+            clue_progress=_fraction(len(self._session.discovered_clues), len(total_clues)),
+            locked_gate_progress=_fraction(len(self._session.unlocked_doors), len(total_locked_doors)),
+            trade_progress=_fraction(len(self._session.traded_npcs), len(self._compiled.npc_trade_map)),
+            recipe_progress=_fraction(len(self._session.completed_recipe_outputs), len(self._compiled.world.recipes)),
+            use_effect_progress=_fraction(len(self._session.completed_use_targets), len(self._compiled.use_effects)),
+            guardian_consulted_progress=1.0 if self._session.consulted_guardian else 0.0,
+            answer_ready_progress=answer_ready,
+        )
+    def _progress_potential(self) -> float:
+        signals = self._progress_signals()
+        potential = (
+            0.10 * signals.visited_room_progress
+            + 0.35 * signals.clue_progress
+            + 0.10 * signals.locked_gate_progress
+            + 0.10 * signals.trade_progress
+            + 0.10 * signals.recipe_progress
+            + 0.15 * signals.use_effect_progress
+            + 0.05 * signals.guardian_consulted_progress
+            + 0.05 * signals.answer_ready_progress
+        )
+        return max(0.0, min(1.0, potential))
+    def _empty_breakdown(self, potential: float) -> HeroRewardBreakdown:
+        return HeroRewardBreakdown(
+            progress_potential_before=potential,
+            progress_potential_after=potential,
+        )
+    def _repeat_noop(self, command: str, fingerprint_before: str, room_before: str) -> bool:
+        assert self._session is not None
+        fingerprint_after = self._session.state_fingerprint()
+        room_after = self._session.current_room_id
+        if room_before == room_after and fingerprint_before == fingerprint_after:
+            self._recent_noop_signatures.append((command, room_after, fingerprint_after))
+        else:
+            self._recent_noop_signatures.clear()
+        return (
+            len(self._recent_noop_signatures) == 3
+            and len({signature[0] for signature in self._recent_noop_signatures}) == 1
+            and len({signature[1] for signature in self._recent_noop_signatures}) == 1
+            and len({signature[2] for signature in self._recent_noop_signatures}) == 1
+        )
+    @staticmethod
+    def _is_wrong_submit(delta: dict[str, Any]) -> bool:
+        return delta.get("wrapper") == "submit_rejected" and delta.get("reason") == "wrong_answer"
+    def _accumulate_episode_stats(self, breakdown: HeroRewardBreakdown, player_won: bool) -> None:
+        self._episode_stats.player_won = player_won or self._episode_stats.player_won
+        self._episode_stats.total_reward += breakdown.total_reward
+        self._episode_stats.dense_return += breakdown.dense_progress_reward
+        self._episode_stats.syntax_penalty_total += breakdown.syntax_penalty
+        self._episode_stats.invalid_action_penalty_total += breakdown.invalid_action_penalty
+        self._episode_stats.repeat_noop_penalty_total += breakdown.repeat_noop_penalty
+        self._episode_stats.wrong_submit_penalty_total += breakdown.wrong_submit_penalty
+        self._episode_stats.steps_taken = self._state.game_steps_taken
+        self._episode_stats.tool_calls_total = self._state.tool_calls_total
+def _fraction(done: int, total: int) -> float:
+    if total <= 0:
+        return 0.0
+    return min(1.0, done / total)

agents/hero/policy.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from __future__ import annotations
+from typing import Literal, Protocol
+from pydantic import BaseModel
+from agents.shared.llm_client import StructuredModelClient
+from agents.shared.model_schema import ModelMessage, StrictModel
+from .cli import parse_cli_command
+from .prompt import format_hero_system_prompt, format_hero_turn_prompt
+from .schema import ActAction, HeroAction, HeroObservation, HeroState, validate_hero_action
+class HeroPolicyError(RuntimeError):
+    pass
+class HeroPolicy(Protocol):
+    trace_events: list["HeroTraceEvent"]
+    last_error: str | None
+    def reset(self) -> None:
+        ...
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction:
+        ...
+class HeroActionPayload(BaseModel):
+    tool: Literal["act", "scratchpad_read", "scratchpad_write"]
+    command: str | None = None
+    mode: Literal["append", "replace"] | None = None
+    content: str | None = None
+class HeroActionResponse(BaseModel):
+    action: HeroActionPayload
+class HeroTraceEvent(StrictModel):
+    turn_index: int
+    observation: str
+    scratchpad: str
+    state: dict[str, object]
+    action: dict[str, object] | None = None
+    repair_count: int = 0
+    validation_error: str | None = None
+class HeroLLMPolicy:
+    def __init__(
+        self,
+        client: StructuredModelClient,
+        *,
+        model_name: str,
+        temperature: float = 0.1,
+        max_output_tokens: int = 256,
+        max_repair_attempts: int = 1,
+    ) -> None:
+        self.client = client
+        self.model_name = model_name
+        self.temperature = temperature
+        self.max_output_tokens = max_output_tokens
+        self.max_repair_attempts = max_repair_attempts
+        self.trace_events: list[HeroTraceEvent] = []
+        self.last_error: str | None = None
+    def reset(self) -> None:
+        self.trace_events = []
+        self.last_error = None
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction:
+        repair_error: str | None = None
+        for attempt in range(self.max_repair_attempts + 1):
+            try:
+                response = self.client.generate_structured(
+                    self._messages(observation, state, scratchpad, repair_error),
+                    HeroActionResponse,
+                    model_name=self.model_name,
+                    temperature=self.temperature,
+                    max_output_tokens=self.max_output_tokens,
+                )
+                action = validate_hero_action(response.action.model_dump(mode="json", exclude_none=True))
+                if isinstance(action, ActAction):
+                    parsed_command = parse_cli_command(action.command)
+                    if not parsed_command.valid or parsed_command.normalized_command is None:
+                        raise ValueError(parsed_command.error or "Invalid strict CLI command.")
+                    action = ActAction(command=parsed_command.normalized_command)
+                self.trace_events.append(
+                    HeroTraceEvent(
+                        turn_index=len(self.trace_events),
+                        observation=observation.message,
+                        scratchpad=scratchpad,
+                        state=state.model_dump(mode="json"),
+                        action=action.model_dump(mode="json"),
+                        repair_count=attempt,
+                    )
+                )
+                self.last_error = None
+                return action
+            except Exception as exc:
+                repair_error = self._normalize_error(exc)
+                if attempt >= self.max_repair_attempts:
+                    self.last_error = repair_error
+                    self.trace_events.append(
+                        HeroTraceEvent(
+                            turn_index=len(self.trace_events),
+                            observation=observation.message,
+                            scratchpad=scratchpad,
+                            state=state.model_dump(mode="json"),
+                            repair_count=attempt,
+                            validation_error=repair_error,
+                        )
+                    )
+                    raise HeroPolicyError(repair_error) from exc
+        raise HeroPolicyError("Hero policy failed without a usable action.")
+    def _messages(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+        repair_error: str | None,
+    ) -> list[ModelMessage]:
+        user_prompt = format_hero_turn_prompt(observation.message, state, scratchpad)
+        if repair_error is not None:
+            user_prompt += (
+                "\nThe previous response did not match the action schema.\n"
+                f"Validation error: {repair_error}\n"
+                "Return one corrected action only.\n"
+            )
+        return [
+            ModelMessage(
+                role="system",
+                content=format_hero_system_prompt(
+                    state.world_title,
+                    state.max_game_steps,
+                    state.max_tool_calls,
+                ),
+            ),
+            ModelMessage(role="user", content=user_prompt),
+        ]
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        return " ".join(str(exc).split()) or exc.__class__.__name__

agents/hero/prompt.py ADDED Viewed

	@@ -0,0 +1,134 @@

+from __future__ import annotations
+from .schema import HeroState
+HERO_SYSTEM_PROMPT = """You are the hero exploring a living dungeon.
+You can only act through tools.
+Rules:
+- Use `act` for any in-world action with one strict parser-style CLI command.
+- Use `scratchpad_read` and `scratchpad_write` to manage your own notebook.
+- Track rooms, objects, clues, hypotheses, and failed attempts in the notebook.
+- Do not assume the world is fair in obvious ways; verify.
+- Do not expect command hints from the environment. Use `look` and `inventory` when needed.
+- Prefer systematic play: open visible containers and doors, take portable items, read text, talk to NPCs, and backtrack when blocked.
+- When a puzzle reveals a clue, record it immediately.
+- Do not submit an answer until you have enough evidence and the guardian is ready.
+- Winning requires gathering evidence and then answering the guardian correctly.
+- Keep your notebook concise and update it when the world changes.
+- Commands must be lowercase only, with no articles, no markdown, and no conversational text.
+- Allowed command grammar:
+  look
+  inventory
+  wait
+  north|south|east|west|up|down|in|out
+  go north|go south|go east|go west|go up|go down|go in|go out
+  open <object>
+  read <object>
+  talk <npc>
+  examine <object>
+  look in <object>
+  take <item>
+  take <item> from <container>
+  unlock <door> with <key>
+  use <item> on <target>
+  combine <item_a> with <item_b>
+  give <item> to <npc>
+  submit <answer>
+- Example valid commands:
+  open entry chest
+  take brass key from entry chest
+  unlock iron door with brass key
+  east
+  use torch on ash mural
+  talk stone guardian
+  submit mira
+- Return JSON only. Never add prose, markdown fences, or explanations.
+- Valid response shapes:
+  {"action":{"tool":"act","command":"look"}}
+  {"action":{"tool":"scratchpad_read"}}
+  {"action":{"tool":"scratchpad_write","mode":"append","content":"room notes"}}
+"""
+HERO_GRPO_SYSTEM_PROMPT = """You are the hero exploring a living dungeon.
+You can only act through tool calls.
+Rules:
+- Call exactly one tool for each turn.
+- Use `act` for any in-world action with one strict parser-style CLI command.
+- Use `scratchpad_read` and `scratchpad_write` to manage your own notebook.
+- Track rooms, objects, clues, hypotheses, and failed attempts in the notebook.
+- Do not assume the world is fair in obvious ways; verify.
+- Do not expect command hints from the environment. Use `look` and `inventory` when needed.
+- Prefer systematic play: open visible containers and doors, take portable items, read text, talk to NPCs, and backtrack when blocked.
+- When a puzzle reveals a clue, record it immediately.
+- Do not submit an answer until you have enough evidence and the guardian is ready.
+- Winning requires gathering evidence and then answering the guardian correctly.
+- Keep your notebook concise and update it when the world changes.
+- Commands must be lowercase only, with no articles, no markdown, and no conversational text.
+- Allowed command grammar:
+  look
+  inventory
+  wait
+  north|south|east|west|up|down|in|out
+  go north|go south|go east|go west|go up|go down|go in|go out
+  open <object>
+  read <object>
+  talk <npc>
+  examine <object>
+  look in <object>
+  take <item>
+  take <item> from <container>
+  unlock <door> with <key>
+  use <item> on <target>
+  combine <item_a> with <item_b>
+  give <item> to <npc>
+  submit <answer>
+- Example valid commands:
+  open entry chest
+  take brass key from entry chest
+  unlock iron door with brass key
+  east
+  use torch on ash mural
+  talk stone guardian
+  submit mira
+- Do not write prose, plans, or plain JSON action objects.
+- The runtime provides the tool schema; emit a tool call only.
+"""
+def format_hero_system_prompt(world_title: str, max_game_steps: int, max_tool_calls: int) -> str:
+    return (
+        f"{HERO_SYSTEM_PROMPT}\n\n"
+        f"World: {world_title}\n"
+        f"Game-step budget: {max_game_steps}\n"
+        f"Total tool-call budget: {max_tool_calls}\n"
+    )
+def format_hero_grpo_system_prompt(world_title: str, max_game_steps: int, max_tool_calls: int) -> str:
+    return (
+        f"{HERO_GRPO_SYSTEM_PROMPT}\n\n"
+        f"World: {world_title}\n"
+        f"Game-step budget: {max_game_steps}\n"
+        f"Total tool-call budget: {max_tool_calls}\n"
+    )
+def format_hero_turn_prompt(message: str, state: HeroState, scratchpad: str) -> str:
+    notebook = scratchpad if scratchpad else "<empty>"
+    return (
+        "Choose exactly one next tool call.\n"
+        f"Observation:\n{message.strip() or '<empty>'}\n\n"
+        f"World: {state.world_title}\n"
+        f"Status: {state.status}\n"
+        f"Game steps taken: {state.game_steps_taken}/{state.max_game_steps}\n"
+        f"Tool calls used: {state.tool_calls_total}/{state.max_tool_calls}\n"
+        f"Game steps remaining: {state.game_steps_remaining}\n"
+        f"Tool calls remaining: {state.tool_calls_remaining}\n"
+        f"Last command: {state.last_command or '<none>'}\n\n"
+        f"Scratchpad:\n{notebook}\n"
+    )

agents/hero/runner.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from __future__ import annotations
+from collections.abc import Iterable
+from typing import Protocol
+from agents.master.session import EpisodeSession
+from .env import HeroEnvironment
+from .policy import HeroPolicyError
+from .schema import HeroAction, HeroEpisodeStats, HeroObservation, HeroState
+class ToolCallingPolicy(Protocol):
+    def reset(self) -> None:
+        ...
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction | dict[str, object] | None:
+        ...
+class ScriptedToolCallingPolicy:
+    def __init__(self, actions: Iterable[HeroAction | dict[str, object]]) -> None:
+        self._initial_actions = list(actions)
+        self._remaining_actions = list(self._initial_actions)
+    def reset(self) -> None:
+        self._remaining_actions = list(self._initial_actions)
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction | dict[str, object] | None:
+        del observation, state, scratchpad
+        if not self._remaining_actions:
+            return None
+        return self._remaining_actions.pop(0)
+class HeroRunner:
+    def __init__(
+        self,
+        policy: ToolCallingPolicy,
+        *,
+        max_game_steps: int | None = 40,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int = 8000,
+        debug: bool = False,
+    ) -> None:
+        self.policy = policy
+        self.max_game_steps = max_game_steps
+        self.max_tool_calls = max_tool_calls
+        self.scratchpad_max_chars = scratchpad_max_chars
+        self.debug = debug
+        self.last_error: str | None = None
+        self.last_observation: HeroObservation | None = None
+        self.episode_stats: HeroEpisodeStats | None = None
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        self.last_error = None
+        self.last_observation = None
+        self.episode_stats = None
+        self.policy.reset()
+        env = HeroEnvironment.from_session(
+            session,
+            max_game_steps=max_steps if self.max_game_steps is None else min(max_steps, self.max_game_steps),
+            max_tool_calls=self.max_tool_calls,
+            scratchpad_max_chars=self.scratchpad_max_chars,
+            debug=self.debug,
+        )
+        observation = env.reset()
+        self.last_observation = observation
+        while not observation.done:
+            try:
+                action = self.policy.next_action(observation, env.state, env.scratchpad)
+            except HeroPolicyError as exc:
+                self.last_error = str(exc)
+                self.episode_stats = env.episode_stats
+                return
+            if action is None:
+                self.episode_stats = env.episode_stats
+                return
+            result = env.step(action)
+            observation = result.observation
+            self.last_observation = observation
+        self.episode_stats = env.episode_stats

agents/hero/schema.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from __future__ import annotations
+from typing import Any
+from typing import Annotated, Literal, TypeAlias
+from pydantic import Field, TypeAdapter
+from agents.shared.openenv_compat import Action, Observation, State
+from agents.shared.model_schema import StrictModel
+class ActAction(Action):
+    tool: Literal["act"] = "act"
+    command: str
+class ScratchpadReadAction(Action):
+    tool: Literal["scratchpad_read"] = "scratchpad_read"
+class ScratchpadWriteAction(Action):
+    tool: Literal["scratchpad_write"] = "scratchpad_write"
+    mode: Literal["append", "replace"]
+    content: str
+class HeroServerAction(Action):
+    tool: Literal["act", "scratchpad_read", "scratchpad_write"]
+    command: str | None = None
+    mode: Literal["append", "replace"] | None = None
+    content: str | None = None
+HeroAction: TypeAlias = Annotated[
+    ActAction | ScratchpadReadAction | ScratchpadWriteAction,
+    Field(discriminator="tool"),
+]
+HERO_ACTION_ADAPTER = TypeAdapter(HeroAction)
+def validate_hero_action(value: HeroAction | HeroServerAction | dict[str, Any]) -> HeroAction:
+    if isinstance(value, Action):
+        value = value.model_dump(mode="json", exclude_none=True)
+    return HERO_ACTION_ADAPTER.validate_python(value)
+class HeroObservation(Observation):
+    message: str = ""
+    won: bool | None = None
+    tool: str | None = None
+    tool_success: bool | None = None
+    terminal_reason: str | None = None
+    reward_breakdown: "HeroRewardBreakdown | None" = None
+    aux_signals: "HeroAuxSignals | None" = None
+class HeroAuxSignals(StrictModel):
+    visited_room_progress: float = 0.0
+    clue_progress: float = 0.0
+    locked_gate_progress: float = 0.0
+    trade_progress: float = 0.0
+    recipe_progress: float = 0.0
+    use_effect_progress: float = 0.0
+    guardian_consulted_progress: float = 0.0
+    answer_ready_progress: float = 0.0
+class HeroRewardBreakdown(StrictModel):
+    base_terminal_reward: float = 0.0
+    dense_progress_reward: float = 0.0
+    syntax_penalty: float = 0.0
+    invalid_action_penalty: float = 0.0
+    repeat_noop_penalty: float = 0.0
+    wrong_submit_penalty: float = 0.0
+    total_reward: float = 0.0
+    progress_potential_before: float = 0.0
+    progress_potential_after: float = 0.0
+class HeroEpisodeStats(StrictModel):
+    player_won: bool = False
+    total_reward: float = 0.0
+    dense_return: float = 0.0
+    syntax_penalty_total: float = 0.0
+    invalid_action_penalty_total: float = 0.0
+    repeat_noop_penalty_total: float = 0.0
+    wrong_submit_penalty_total: float = 0.0
+    steps_taken: int = 0
+    tool_calls_total: int = 0
+class HeroState(State):
+    game_steps_taken: int = 0
+    tool_calls_total: int = 0
+    max_game_steps: int = 0
+    max_tool_calls: int = 0
+    game_steps_remaining: int = 0
+    tool_calls_remaining: int = 0
+    status: Literal["ready", "running", "won", "lost", "timed_out", "error"] = "ready"
+    world_title: str = ""
+    last_command: str | None = None
+    scratchpad_chars: int = 0

agents/loop/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""Closed-loop orchestration for hero and dungeon master policies."""
+from .runner import ClosedLoopRunner
+from .schema import ClosedLoopEpisodeArtifacts, ClosedLoopEpisodeRecord, ClosedLoopEpisodeSummary
+__all__ = [
+    "ClosedLoopEpisodeArtifacts",
+    "ClosedLoopEpisodeRecord",
+    "ClosedLoopEpisodeSummary",
+    "ClosedLoopRunner",
+]

agents/loop/__main__.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from agents.hero.policy import HeroLLMPolicy
+from agents.master.interface import DEFAULT_GEMINI_MODEL
+from agents.master.env import DMEnvironment
+from agents.master.policy import DungeonMasterLLMPolicy
+from agents.shared.runtime import (
+    build_interface_adapter,
+    create_structured_client,
+    resolve_interface_config,
+    resolve_structured_client_config,
+)
+from .runner import ClosedLoopRunner
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Closed-loop dungeon master and hero harness")
+    parser.add_argument("--episodes", type=int, default=1)
+    parser.add_argument("--seed", type=int)
+    parser.add_argument("--target-ratio", type=float)
+    parser.add_argument("--dm-provider", choices=["gemini", "hf_local"])
+    parser.add_argument("--dm-model")
+    parser.add_argument("--dm-adapter-path")
+    parser.add_argument("--hero-provider", choices=["gemini", "hf_local"])
+    parser.add_argument("--hero-model")
+    parser.add_argument("--hero-adapter-path")
+    parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    parser.add_argument("--interface-model", default=DEFAULT_GEMINI_MODEL)
+    parser.add_argument("--interface-narrate", action="store_true")
+    parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    parser.add_argument("--artifacts-root", type=Path)
+    parser.add_argument("--dm-artifacts-root", type=Path)
+    parser.add_argument("--dm-repair-attempts", type=int, default=2)
+    parser.add_argument("--hero-max-game-steps", type=int, default=40)
+    parser.add_argument("--hero-max-tool-calls", type=int, default=80)
+    parser.add_argument("--live", action="store_true")
+    parser.add_argument("--live-dir", type=Path)
+    args = parser.parse_args(argv)
+    dm_config = resolve_structured_client_config(
+        "dm",
+        provider=args.dm_provider,
+        model_name=args.dm_model,
+        adapter_path=args.dm_adapter_path,
+    )
+    hero_config = resolve_structured_client_config(
+        "hero",
+        provider=args.hero_provider,
+        model_name=args.hero_model,
+        adapter_path=args.hero_adapter_path,
+    )
+    interface_config = resolve_interface_config(
+        provider=args.interface_provider,
+        model_name=args.interface_model,
+        narrate_observations=args.interface_narrate,
+        translation_mode="corporate_app" if args.translate_corporate_env else None,
+    )
+    runner = ClosedLoopRunner(
+        dm_env=DMEnvironment(artifacts_root=args.dm_artifacts_root),
+        dm_policy=DungeonMasterLLMPolicy(create_structured_client(dm_config), model_name=dm_config.model_name),
+        hero_policy=HeroLLMPolicy(create_structured_client(hero_config), model_name=hero_config.model_name),
+        artifacts_root=args.artifacts_root,
+        live_dir=args.live_dir,
+        max_dm_repair_attempts=args.dm_repair_attempts,
+        hero_runner_kwargs={
+            "max_game_steps": args.hero_max_game_steps,
+            "max_tool_calls": args.hero_max_tool_calls,
+        },
+        hero_interface_adapter=build_interface_adapter(interface_config),
+    )
+    records = []
+    for index in range(args.episodes):
+        seed = None if args.seed is None else args.seed + index
+        record = runner.run_episode(seed=seed, target_ratio=args.target_ratio, live=args.live)
+        records.append(record)
+        print(json.dumps(ClosedLoopRunner.summary(record).model_dump(mode="json")))
+    if records:
+        print(json.dumps(ClosedLoopRunner.aggregate(records).model_dump(mode="json")))
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/loop/runner.py ADDED Viewed

	@@ -0,0 +1,253 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from agents.hero.policy import HeroPolicy
+from agents.hero.runner import HeroRunner
+from agents.master.env import DMEnvironment
+from agents.master.interface import InterfaceAdapter, StrictCliInterfaceAdapter
+from agents.master.policy import DMRepairContext, DungeonMasterPolicy, DungeonMasterPolicyError
+from agents.master.schema import DMObservation, DMRewardBreakdown, WorldDefinition
+from agents.master.snapshots import LiveObserver, LiveSnapshotWriter
+from .schema import (
+    ClosedLoopAggregateReport,
+    ClosedLoopEpisodeArtifacts,
+    ClosedLoopEpisodeRecord,
+    ClosedLoopEpisodeSummary,
+)
+DEFAULT_CLOSED_LOOP_ROOT = Path(__file__).resolve().parents[2] / ".play_runs" / "closed_loop"
+class ClosedLoopRunner:
+    def __init__(
+        self,
+        *,
+        dm_env: DMEnvironment,
+        dm_policy: DungeonMasterPolicy,
+        hero_policy: HeroPolicy,
+        artifacts_root: Path | None = None,
+        live_dir: Path | None = None,
+        max_dm_repair_attempts: int = 2,
+        hero_runner_kwargs: dict[str, object] | None = None,
+        hero_interface_adapter: InterfaceAdapter | None = None,
+    ) -> None:
+        self.dm_env = dm_env
+        self.dm_policy = dm_policy
+        self.hero_policy = hero_policy
+        self.artifacts_root = artifacts_root or DEFAULT_CLOSED_LOOP_ROOT
+        self.live_dir = live_dir
+        self.max_dm_repair_attempts = max_dm_repair_attempts
+        self.hero_runner_kwargs = hero_runner_kwargs or {"max_game_steps": 40, "max_tool_calls": 80}
+        self.hero_interface_adapter = hero_interface_adapter or StrictCliInterfaceAdapter()
+    def run_episode(
+        self,
+        *,
+        seed: int | None = None,
+        target_ratio: float | None = None,
+        live: bool = False,
+    ) -> ClosedLoopEpisodeRecord:
+        self.dm_env.reset(seed=seed, difficulty_hint=target_ratio)
+        episode_id = self.dm_env.state.episode_id
+        if episode_id is None:
+            raise RuntimeError("DM environment did not assign an episode id.")
+        episode_dir = self.artifacts_root / episode_id
+        episode_dir.mkdir(parents=True, exist_ok=True)
+        artifacts = ClosedLoopEpisodeArtifacts.from_episode_dir(episode_dir)
+        observer = self._observer(live)
+        world: WorldDefinition | None = None
+        errors: list[str] = []
+        compile_attempts = 0
+        repair_context: DMRepairContext | None = None
+        previous_candidate_json: str | None = None
+        attempt_rows: list[dict[str, object]] = []
+        for attempt in range(1, self.max_dm_repair_attempts + 2):
+            compile_attempts = attempt
+            try:
+                candidate = self.dm_policy.generate_world(
+                    target_ratio=self.dm_env.state.target_ratio,
+                    repair_context=repair_context,
+                )
+                previous_candidate_json = candidate.model_dump_json(indent=2)
+                self._write_json(Path(artifacts.world_definition_path), previous_candidate_json)
+                self.dm_env.compile_world(candidate, episode_id=episode_id)
+                world = candidate
+                attempt_rows.append(
+                    {
+                        "attempt_number": attempt,
+                        "status": "compiled",
+                        "world_title": candidate.meta.title,
+                        "difficulty_target": candidate.meta.difficulty_target,
+                    }
+                )
+                break
+            except Exception as exc:
+                normalized_error = self._normalize_error(exc)
+                errors.append(normalized_error)
+                attempt_rows.append(
+                    {
+                        "attempt_number": attempt,
+                        "status": "failed",
+                        "error": normalized_error,
+                    }
+                )
+                repair_context = DMRepairContext(
+                    attempt_number=attempt,
+                    error_message=normalized_error,
+                    previous_candidate_json=previous_candidate_json,
+                )
+        self._write_jsonl(Path(artifacts.world_generation_attempts_path), attempt_rows)
+        if world is None:
+            observation = self._compile_failure_observation(errors[-1] if errors else "world compilation failed")
+            record = ClosedLoopEpisodeRecord(
+                episode_id=episode_id,
+                status="compile_failed",
+                target_ratio=self.dm_env.state.target_ratio,
+                compile_attempts=compile_attempts,
+                dm_repair_errors=errors,
+                world_definition=None,
+                declared_difficulty_target=None,
+                difficulty_target_matches_target_ratio=None,
+                observation=observation,
+                artifacts=artifacts,
+            )
+            self._persist_record(record)
+            self._write_jsonl(Path(artifacts.hero_trace_path), [])
+            self._write_jsonl(Path(artifacts.transcript_path), [])
+            return record
+        hero_runner = HeroRunner(policy=self.hero_policy, **self.hero_runner_kwargs)
+        previous_adapter = self.dm_env.interface_adapter
+        self.dm_env.interface_adapter = self.hero_interface_adapter
+        try:
+            result = self.dm_env.step(world, runner=hero_runner, observer=observer)
+        finally:
+            self.dm_env.interface_adapter = previous_adapter
+        observation = result.observation
+        status = "policy_error" if hero_runner.last_error else ("complete" if observation.player_won else "failed")
+        record = ClosedLoopEpisodeRecord(
+            episode_id=episode_id,
+            status=status,
+            target_ratio=self.dm_env.state.target_ratio,
+            compile_attempts=compile_attempts,
+            dm_repair_errors=errors,
+            hero_policy_error=hero_runner.last_error,
+            hero_episode_stats=hero_runner.episode_stats,
+            world_definition=world,
+            declared_difficulty_target=world.meta.difficulty_target,
+            difficulty_target_matches_target_ratio=(world.meta.difficulty_target == self.dm_env.state.target_ratio),
+            observation=observation,
+            artifacts=artifacts,
+        )
+        self._persist_record(record)
+        self._write_jsonl(
+            Path(artifacts.hero_trace_path),
+            [event.model_dump(mode="json") for event in self.hero_policy.trace_events],
+        )
+        self._write_jsonl(
+            Path(artifacts.transcript_path),
+            [turn.model_dump(mode="json") for turn in observation.episode_transcript],
+        )
+        return record
+    @staticmethod
+    def summary(record: ClosedLoopEpisodeRecord) -> ClosedLoopEpisodeSummary:
+        return ClosedLoopEpisodeSummary(
+            episode_id=record.episode_id,
+            status=record.status,
+            reward=record.observation.reward,
+            player_won=record.observation.player_won,
+            ratio=record.observation.ratio,
+            compile_error=record.observation.compile_error,
+            hero_policy_error=record.hero_policy_error,
+        )
+    @staticmethod
+    def aggregate(records: list[ClosedLoopEpisodeRecord]) -> ClosedLoopAggregateReport:
+        episodes = len(records)
+        dense_returns = [
+            record.hero_episode_stats.dense_return
+            for record in records
+            if record.hero_episode_stats is not None
+        ]
+        invalid_penalties = [
+            record.hero_episode_stats.invalid_action_penalty_total
+            for record in records
+            if record.hero_episode_stats is not None
+        ]
+        repeat_penalties = [
+            record.hero_episode_stats.repeat_noop_penalty_total
+            for record in records
+            if record.hero_episode_stats is not None
+        ]
+        return ClosedLoopAggregateReport(
+            episodes=episodes,
+            compile_valid_rate=_rate(sum(record.status != "compile_failed" for record in records), episodes),
+            policy_error_rate=_rate(sum(record.status == "policy_error" for record in records), episodes),
+            playable_rate=_rate(sum(record.world_definition is not None for record in records), episodes),
+            solve_rate=_rate(sum(record.status == "complete" for record in records), episodes),
+            mean_dense_return=_mean(dense_returns),
+            mean_invalid_action_penalty=_mean(invalid_penalties),
+            mean_repeat_noop_penalty=_mean(repeat_penalties),
+        )
+    def _compile_failure_observation(self, error: str) -> DMObservation:
+        breakdown = DMRewardBreakdown(
+            reward_mode="compile_failure_penalty",
+            player_won=False,
+            target_ratio=self.dm_env.state.target_ratio,
+            quality_score=0.0,
+            reward=0.0,
+        )
+        return DMObservation(
+            player_won=False,
+            compile_error=error,
+            reward=0.0,
+            done=True,
+            reward_breakdown=breakdown,
+            target_ratio_used=self.dm_env.state.target_ratio,
+        )
+    def _observer(self, live: bool) -> LiveObserver | None:
+        if not live:
+            return None
+        return LiveSnapshotWriter(live_dir=self.live_dir, runner_name="hero_llm")
+    def _persist_record(self, record: ClosedLoopEpisodeRecord) -> None:
+        self._write_json(Path(record.artifacts.run_record_path), record.model_dump_json(indent=2))
+    @staticmethod
+    def _write_json(path: Path, payload: str) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(payload + "\n", encoding="utf-8")
+    @staticmethod
+    def _write_jsonl(path: Path, rows: list[dict[str, object]]) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        payload = "".join(json.dumps(row) + "\n" for row in rows)
+        path.write_text(payload, encoding="utf-8")
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        if isinstance(exc, DungeonMasterPolicyError):
+            return str(exc)
+        return " ".join(str(exc).split()) or exc.__class__.__name__
+def _mean(values: list[float]) -> float:
+    if not values:
+        return 0.0
+    return sum(values) / len(values)
+def _rate(count: int, total: int) -> float:
+    if total <= 0:
+        return 0.0
+    return count / total

agents/loop/schema.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Literal
+from agents.hero.schema import HeroEpisodeStats
+from agents.master.schema import DMObservation, WorldDefinition
+from agents.shared.model_schema import StrictModel
+class ClosedLoopEpisodeArtifacts(StrictModel):
+    episode_dir: str
+    world_generation_attempts_path: str
+    world_definition_path: str
+    run_record_path: str
+    hero_trace_path: str
+    transcript_path: str
+    @classmethod
+    def from_episode_dir(cls, episode_dir: Path) -> "ClosedLoopEpisodeArtifacts":
+        return cls(
+            episode_dir=str(episode_dir),
+            world_generation_attempts_path=str(episode_dir / "world_generation_attempts.jsonl"),
+            world_definition_path=str(episode_dir / "world_definition.json"),
+            run_record_path=str(episode_dir / "run_record.json"),
+            hero_trace_path=str(episode_dir / "hero_trace.jsonl"),
+            transcript_path=str(episode_dir / "transcript.jsonl"),
+        )
+class ClosedLoopEpisodeRecord(StrictModel):
+    episode_id: str
+    status: Literal["complete", "failed", "compile_failed", "policy_error"]
+    target_ratio: float
+    compile_attempts: int
+    dm_repair_errors: list[str]
+    hero_policy_error: str | None = None
+    hero_episode_stats: HeroEpisodeStats | None = None
+    declared_difficulty_target: float | None = None
+    difficulty_target_matches_target_ratio: bool | None = None
+    world_definition: WorldDefinition | None = None
+    observation: DMObservation
+    artifacts: ClosedLoopEpisodeArtifacts
+class ClosedLoopEpisodeSummary(StrictModel):
+    episode_id: str
+    status: str
+    reward: float | None = None
+    player_won: bool | None = None
+    ratio: float | None = None
+    compile_error: str | None = None
+    hero_policy_error: str | None = None
+class ClosedLoopAggregateReport(StrictModel):
+    episodes: int
+    compile_valid_rate: float
+    policy_error_rate: float
+    playable_rate: float
+    solve_rate: float
+    mean_dense_return: float
+    mean_invalid_action_penalty: float
+    mean_repeat_noop_penalty: float

agents/master/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""DM environment source package."""
+from .policy import (
+    DMRepairContext,
+    DungeonMasterLLMPolicy,
+    DungeonMasterPolicy,
+    DungeonMasterPolicyError,
+)
+__all__ = [
+    "DMRepairContext",
+    "DungeonMasterLLMPolicy",
+    "DungeonMasterPolicy",
+    "DungeonMasterPolicyError",
+]

agents/master/__main__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .main import main
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/master/base.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from __future__ import annotations
+from contextlib import contextmanager
+import re
+import warnings
+from pathlib import Path
+MAX_NODES = 40
+MAX_ITEMS = 32
+MAX_QUEST_STEPS = 64
+MIN_NODES = 5
+MIN_QUEST_STEPS = 2
+MIN_CLUES = 3
+MAX_CLUES = 5
+TARGET_RATIO = 1.5
+TARGET_RATIO_SIGMA = 0.4
+MAX_STEP_MULTIPLIER = 5
+INVENTORY_ID = "__inventory__"
+STORED_ID = "__stored__"
+ROOT_DIR = Path(__file__).resolve().parents[2]
+ARTIFACTS_ROOT = ROOT_DIR / ".artifacts" / "dm_env"
+CUSTOM_LOGIC_DIR = ROOT_DIR / "textworld_data" / "dnd" / "logic"
+CUSTOM_GRAMMAR_DIR = ROOT_DIR / "textworld_data" / "dnd" / "text_grammars"
+SUPPORTED_DIRECTIONS = ("north", "south", "east", "west", "up", "down", "in", "out")
+OPPOSITE_DIRECTION = {
+    "north": "south",
+    "south": "north",
+    "east": "west",
+    "west": "east",
+    "up": "down",
+    "down": "up",
+    "in": "out",
+    "out": "in",
+}
+GO_RE = re.compile(r"^go\((?P<target>[a-z0-9_]+)\)$")
+OPEN_RE = re.compile(r"^open\((?P<target>[a-z0-9_]+)\)$")
+UNLOCK_RE = re.compile(r"^unlock\((?P<door>[a-z0-9_]+),(?P<key>[a-z0-9_]+)\)$")
+TAKE_RE = re.compile(r"^take\((?P<item>[a-z0-9_]+),(?P<source>[a-z0-9_]+)\)$")
+READ_RE = re.compile(r"^read\((?P<target>[a-z0-9_]+)\)$")
+USE_RE = re.compile(r"^use\((?P<item>[a-z0-9_]+),(?P<target>[a-z0-9_]+)\)$")
+COMBINE_RE = re.compile(r"^combine\((?P<item_a>[a-z0-9_]+),(?P<item_b>[a-z0-9_]+)\)$")
+GIVE_RE = re.compile(r"^give\((?P<item>[a-z0-9_]+),(?P<npc>[a-z0-9_]+)\)$")
+TALK_RE = re.compile(r"^talk\((?P<target>[a-z0-9_]+)\)$")
+SUBMIT_RE = re.compile(r"^submit\((?P<quote>[\"'])(?P<answer>.+)(?P=quote)\)$")
+class DMCompileError(RuntimeError):
+    pass
+class DMInterfaceError(RuntimeError):
+    pass
+@contextmanager
+def suppress_unsupported_game_warning():
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            message=r"Game '.*' is not fully supported\..*",
+            category=Warning,
+        )
+        yield
+def normalize_snake_id(value: str, kind: str) -> str:
+    if not re.fullmatch(r"[a-z][a-z0-9_]*", value):
+        raise DMCompileError(f"{kind} '{value}' must be snake_case.")
+    return value
+def parser_safe_text(value: str) -> str:
+    collapsed = re.sub(r"[^A-Za-z0-9 ]+", " ", value).strip().lower()
+    collapsed = re.sub(r"\s+", " ", collapsed)
+    if not collapsed:
+        raise DMCompileError(f"Unable to derive a parser-safe name from '{value}'.")
+    return collapsed
+def normalize_answer_text(value: str) -> str:
+    collapsed = re.sub(r"[^A-Za-z0-9 ]+", " ", value).strip().lower()
+    return re.sub(r"\s+", " ", collapsed)

agents/master/build.py ADDED Viewed

	@@ -0,0 +1,287 @@

+from __future__ import annotations
+import uuid
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+from textworld.generator import GameMaker, GameOptions, compile_game
+from textworld.generator.data import KnowledgeBase
+from .base import ARTIFACTS_ROOT, DMCompileError, parser_safe_text
+from .check import validate_and_normalize
+from .graph import (
+    door_room_mapping,
+    hidden_readable_ids,
+    npc_trade_mapping,
+    produced_item_ids,
+    readable_clue_mapping,
+    recipe_mapping,
+    use_effect_mapping,
+)
+from .logic import build_grammar_dir, build_logic_dir, solver_policy, submit_command_text, write_artifacts
+from .quest import parse_quest_action, simulate_walkthrough, topological_linearize
+from .schema import CompiledWorld, WorldDefinition
+class WorldCompiler:
+    def __init__(self, artifacts_root: Path | None = None) -> None:
+        self.artifacts_root = artifacts_root or ARTIFACTS_ROOT
+    def compile(self, world_input: WorldDefinition | dict[str, Any], episode_id: str | None = None) -> CompiledWorld:
+        world = validate_and_normalize(world_input)
+        episode_id = episode_id or uuid.uuid4().hex[:12]
+        artifacts_dir = self.artifacts_root / episode_id
+        artifacts_dir.mkdir(parents=True, exist_ok=True)
+        parsed_steps = [parse_quest_action(step.action) for step in topological_linearize(world.quest_chain)]
+        entity_names = self._assign_command_names(world)
+        options = GameOptions()
+        options.kb = KnowledgeBase.load(
+            logic_path=str(build_logic_dir(artifacts_dir, world)),
+            grammar_path=str(build_grammar_dir(artifacts_dir)),
+        )
+        options.path = str(artifacts_dir / "game.z8")
+        options.force_recompile = True
+        maker = GameMaker(options=options)
+        rooms, entities = self._build_entities(maker, world, entity_names)
+        maker.set_player(rooms[world.meta.start_node_id])
+        self._compile_edges(maker, world, rooms, entities)
+        self._compile_clue_sources(maker, world, entities)
+        self._compile_fixtures(maker, world, entities)
+        self._compile_npcs(maker, world, entities)
+        self._compile_recipes(maker, world, entities)
+        guardian = entities[world.meta.win_condition.target_npc_id]
+        answer = maker.new(type="answer", name="final answer token")
+        maker.nowhere.append(answer)
+        entities["__answer__"] = answer
+        maker.add_fact("guardian", guardian)
+        maker.add_fact("correct", answer, guardian)
+        walkthrough_commands = simulate_walkthrough(world, parsed_steps, entity_names)
+        game = maker.build()
+        game.objective = (
+            f"Explore {world.meta.title}, manipulate the dungeon's tools, gather every clue, "
+            f"speak to {entities[world.meta.win_condition.target_npc_id].name}, and submit the answer."
+        )
+        game.metadata.update(
+            {"episode_id": episode_id, "dm_title": world.meta.title, "start_node_id": world.meta.start_node_id}
+        )
+        compile_game(game, options)
+        write_artifacts(artifacts_dir, world, walkthrough_commands)
+        policy = solver_policy(str(options.path))
+        if not policy:
+            policy = list(walkthrough_commands)
+        return self._compiled_world(
+            episode_id,
+            artifacts_dir,
+            Path(options.path),
+            world,
+            entity_names,
+            walkthrough_commands,
+            policy,
+        )
+    def _build_entities(
+        self,
+        maker: GameMaker,
+        world: WorldDefinition,
+        entity_names: dict[str, str],
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        rooms = {
+            node.id: maker.new(type="r", name=entity_names[node.id], desc=node.description)
+            for node in world.nodes
+            if node.type in {"location", "junction"}
+        }
+        entities: dict[str, Any] = {}
+        hidden_readables = hidden_readable_ids(world)
+        recipe_outputs = {recipe.output_item_id for recipe in world.recipes}
+        produced_items = produced_item_ids(world)
+        for node in world.nodes:
+            if node.type in {"location", "junction"}:
+                continue
+            entity = self._make_node_entity(maker, node, entity_names[node.id])
+            entities[node.id] = entity
+            if node.type == "door":
+                maker.nowhere.append(entity)
+            elif node.type == "readable" and node.id in hidden_readables:
+                maker.nowhere.append(entity)
+                maker.add_fact("hidden_readable", entity)
+            else:
+                rooms[node.parent_id].add(entity)
+        for item in world.items:
+            item_type = "k" if item.subtype == "key" else "o"
+            entity = maker.new(type=item_type, name=entity_names[item.id], desc=item.description)
+            entities[item.id] = entity
+            if item.id in produced_items:
+                maker.nowhere.append(entity)
+                if item.id in recipe_outputs:
+                    maker.add_fact("fresh", entity)
+                else:
+                    maker.add_fact("stored_item", entity)
+                continue
+            holder = item.start_node_id
+            if holder is None:
+                raise DMCompileError(f"Placed item '{item.id}' is missing start_node_id.")
+            if holder in rooms:
+                rooms[holder].add(entity)
+            else:
+                entities[holder].add(entity)
+        return rooms, entities
+    @staticmethod
+    def _make_node_entity(maker: GameMaker, node: object, name: str) -> Any:
+        if node.type == "container":
+            entity = maker.new(type="c", name=name, desc=node.description)
+            entity.add_property("open" if node.open else "locked" if node.locked else "closed")
+            return entity
+        if node.type == "door":
+            entity = maker.new(type="d", name=name, desc=node.description)
+            entity.add_property("open" if node.open else "locked" if node.locked else "closed")
+            return entity
+        if node.type == "readable":
+            return maker.new(type="readable", name=name, desc=node.description)
+        if node.type == "fixture":
+            return maker.new(type="fixture", name=name, desc=node.description)
+        if node.type == "npc":
+            return maker.new(type="npc", name=name, desc=node.description)
+        raise DMCompileError(f"Unsupported node type '{node.type}'.")
+    def _compile_clue_sources(
+        self,
+        maker: GameMaker,
+        world: WorldDefinition,
+        entities: dict[str, Any],
+    ) -> None:
+        hidden_readables = hidden_readable_ids(world)
+        for node in world.nodes:
+            if node.type != "readable":
+                continue
+            readable = entities[node.id]
+            if node.requires_item_id:
+                maker.add_fact("read_requires", readable, entities[node.requires_item_id])
+                maker.add_fact("read_consumes_use" if node.consumes_item else "read_keeps_use", readable)
+            else:
+                maker.add_fact("free_read", readable)
+            if node.id in hidden_readables:
+                continue
+    def _compile_fixtures(self, maker: GameMaker, world: WorldDefinition, entities: dict[str, Any]) -> None:
+        for node in world.nodes:
+            if node.type != "fixture":
+                continue
+            fixture = entities[node.id]
+            maker.add_fact("fixture_requires", fixture, entities[node.requires_item_id])
+            maker.add_fact("sealed", fixture)
+            maker.add_fact("fixture_consumes_use" if node.consumes_item else "fixture_keeps_use", fixture)
+            if node.reveals_item_id:
+                maker.add_fact("reveals_item", fixture, entities[node.reveals_item_id])
+            if node.reveals_readable_id:
+                maker.add_fact("reveals_readable", fixture, entities[node.reveals_readable_id])
+    def _compile_npcs(
+        self,
+        maker: GameMaker,
+        world: WorldDefinition,
+        entities: dict[str, Any],
+    ) -> None:
+        guardian_id = world.meta.win_condition.target_npc_id
+        for node in world.nodes:
+            if node.type != "npc":
+                continue
+            npc = entities[node.id]
+            if node.id == guardian_id:
+                continue
+            maker.add_fact("trade_pending", npc)
+            maker.add_fact("trade_requires", npc, entities[node.requires_item_id])
+            if node.gives_item_id:
+                maker.add_fact("trade_gives_item", npc, entities[node.gives_item_id])
+            if node.gives_clue_id:
+                maker.add_fact("trade_gives_clue", npc)
+    def _compile_recipes(self, maker: GameMaker, world: WorldDefinition, entities: dict[str, Any]) -> None:
+        for recipe in world.recipes:
+            a_id, b_id = recipe.input_item_ids
+            output = entities[recipe.output_item_id]
+            maker.add_fact("combines_with", entities[a_id], entities[b_id], output)
+            maker.add_fact("combines_with", entities[b_id], entities[a_id], output)
+    @staticmethod
+    def _compile_edges(
+        maker: GameMaker,
+        world: WorldDefinition,
+        rooms: dict[str, Any],
+        entities: dict[str, Any],
+    ) -> None:
+        pair_groups: dict[frozenset[str], list[Any]] = defaultdict(list)
+        for edge in world.edges:
+            pair_groups.setdefault(frozenset({edge.from_node_id, edge.to_node_id}), []).append(edge)
+        for edges in pair_groups.values():
+            forward, backward = sorted(edges, key=lambda edge: edge.id)
+            for edge in (forward, backward):
+                maker.add_fact(f"{edge.direction}_of", rooms[edge.to_node_id], rooms[edge.from_node_id])
+            if forward.door_node_id:
+                door = entities[forward.door_node_id]
+                room_a = rooms[forward.from_node_id]
+                room_b = rooms[forward.to_node_id]
+                maker.add_fact("link", room_a, door, room_b)
+                maker.add_fact("link", room_b, door, room_a)
+                if forward.required_item_id:
+                    maker.add_fact("match", entities[forward.required_item_id], door)
+                door_is_open = door.has_property("open")
+                if door_is_open:
+                    maker.add_fact("free", room_a, room_b)
+                    maker.add_fact("free", room_b, room_a)
+            else:
+                maker.add_fact("free", rooms[forward.from_node_id], rooms[forward.to_node_id])
+                maker.add_fact("free", rooms[forward.to_node_id], rooms[forward.from_node_id])
+    def _compiled_world(
+        self,
+        episode_id: str,
+        artifacts_dir: Path,
+        game_file: Path,
+        world: WorldDefinition,
+        entity_names: dict[str, str],
+        walkthrough_commands: list[str],
+        policy: list[str],
+    ) -> CompiledWorld:
+        node_by_id = {node.id: node for node in world.nodes}
+        return CompiledWorld(
+            episode_id=episode_id,
+            world=world,
+            artifacts_dir=artifacts_dir,
+            game_file=game_file,
+            walkthrough_commands=walkthrough_commands,
+            solver_policy=policy,
+            correct_answer_normalized=submit_command_text(world).replace("submit ", "", 1),
+            correct_submit_command=submit_command_text(world),
+            guardian_id=world.meta.win_condition.target_npc_id,
+            guardian_room_id=node_by_id[world.meta.win_condition.target_npc_id].parent_id,
+            room_name_to_id={
+                entity_names[node.id]: node.id for node in world.nodes if node.type in {"location", "junction"}
+            },
+            node_command_names={node.id: entity_names[node.id] for node in world.nodes},
+            item_command_names={item.id: entity_names[item.id] for item in world.items},
+            item_start_locations={item.id: item.start_node_id for item in world.items},
+            clue_text_by_id={clue.id: clue.text for clue in world.clues},
+            readable_clue_by_id=readable_clue_mapping(world),
+            npc_trade_map=npc_trade_mapping(world),
+            recipe_map=recipe_mapping(world),
+            use_effects=use_effect_mapping(world),
+            produced_item_ids=produced_item_ids(world),
+            room_edges_by_target={(edge.from_node_id, edge.to_node_id): edge for edge in world.edges},
+            room_edges_by_direction={(edge.from_node_id, edge.direction): edge for edge in world.edges},
+            door_rooms=door_room_mapping(world),
+        )
+    @staticmethod
+    def _assign_command_names(world: WorldDefinition) -> dict[str, str]:
+        names = {node.id: parser_safe_text(node.label) for node in world.nodes}
+        names.update({item.id: parser_safe_text(item.label) for item in world.items})
+        return names

agents/master/check.py ADDED Viewed

	@@ -0,0 +1,435 @@

+from __future__ import annotations
+from collections import defaultdict, deque
+from typing import Any
+from pydantic import ValidationError
+from .base import (
+    DMCompileError,
+    MAX_CLUES,
+    MAX_ITEMS,
+    MAX_NODES,
+    MAX_QUEST_STEPS,
+    MIN_CLUES,
+    MIN_NODES,
+    MIN_QUEST_STEPS,
+    OPPOSITE_DIRECTION,
+    normalize_answer_text,
+    normalize_snake_id,
+    parser_safe_text,
+)
+from .graph import hidden_readable_ids, produced_item_ids
+from .quest import parse_quest_action, simulate_walkthrough, topological_linearize
+from .schema import (
+    CombineAction,
+    ContainerNode,
+    DoorNode,
+    GiveAction,
+    NpcNode,
+    ReadableNode,
+    SubmitAction,
+    TakeAction,
+    TalkAction,
+    UnlockAction,
+    UseAction,
+    WorldDefinition,
+)
+def validate_and_normalize(world_input: WorldDefinition | dict[str, Any]) -> WorldDefinition:
+    if isinstance(world_input, dict):
+        _reject_legacy_shapes(world_input)
+    try:
+        world = WorldDefinition.model_validate(world_input)
+    except ValidationError as exc:  # pragma: no cover - exercised indirectly in compile paths
+        raise DMCompileError(str(exc)) from exc
+    _validate_ids(world)
+    _validate_shape(world)
+    _validate_nodes(world)
+    _validate_edges(world)
+    _validate_items(world)
+    _validate_clues(world)
+    _validate_visibility(world)
+    _validate_answer_leaks(world)
+    _validate_guardian_path(world)
+    _validate_clue_gates(world)
+    _validate_item_usage(world)
+    _validate_quest_shape(world)
+    return world
+def infer_start_room(world: WorldDefinition) -> str:
+    return world.meta.start_node_id
+def _reject_legacy_shapes(world_input: dict[str, Any]) -> None:
+    for node in world_input.get("nodes", []):
+        if node.get("type") == "clue":
+            raise DMCompileError("Legacy clue nodes are not supported in v2. Use top-level clues[].")
+        if node.get("state", {}).get("npc_dialogue") is not None:
+            raise DMCompileError("Legacy npc_dialogue is not supported in v2.")
+    for edge in world_input.get("edges", []):
+        if edge.get("type") == "conditional_passage":
+            raise DMCompileError("conditional_passage is not supported in v2.")
+def _validate_ids(world: WorldDefinition) -> None:
+    global_ids: set[str] = set()
+    collections = {
+        "node": [node.id for node in world.nodes],
+        "item": [item.id for item in world.items],
+        "clue": [clue.id for clue in world.clues],
+        "recipe": [recipe.id for recipe in world.recipes],
+        "quest step": [step.step_id for step in world.quest_chain],
+    }
+    for kind, values in collections.items():
+        seen: set[str] = set()
+        for value in values:
+            normalize_snake_id(value, kind)
+            if value in seen:
+                raise DMCompileError(f"Duplicate {kind} id '{value}'.")
+            if value in global_ids:
+                raise DMCompileError(f"Duplicate world id '{value}' across collections.")
+            seen.add(value)
+            global_ids.add(value)
+def _validate_shape(world: WorldDefinition) -> None:
+    room_nodes = [node for node in world.nodes if node.type in {"location", "junction"}]
+    if len(world.nodes) < MIN_NODES:
+        raise DMCompileError(f"Worlds need at least {MIN_NODES} nodes.")
+    if len(world.nodes) > MAX_NODES:
+        raise DMCompileError(f"Worlds support at most {MAX_NODES} nodes.")
+    if len(world.items) > MAX_ITEMS:
+        raise DMCompileError(f"Worlds support at most {MAX_ITEMS} items.")
+    if len(world.clues) < MIN_CLUES or len(world.clues) > MAX_CLUES:
+        raise DMCompileError(f"Worlds must define between {MIN_CLUES} and {MAX_CLUES} clues.")
+    if len(world.quest_chain) < MIN_QUEST_STEPS or len(world.quest_chain) > MAX_QUEST_STEPS:
+        raise DMCompileError(f"quest_chain must contain between {MIN_QUEST_STEPS} and {MAX_QUEST_STEPS} steps.")
+    if world.meta.start_node_id not in {node.id for node in room_nodes}:
+        raise DMCompileError("meta.start_node_id must reference a location or junction.")
+    if world.meta.win_condition.type != "deduce":
+        raise DMCompileError("Only deduce win conditions are supported in v2.")
+    if not normalize_answer_text(world.meta.win_condition.answer_string):
+        raise DMCompileError("answer_string cannot normalize to an empty command.")
+def _validate_nodes(world: WorldDefinition) -> None:
+    node_by_id = {node.id: node for node in world.nodes}
+    item_ids = {item.id for item in world.items}
+    clue_ids = {clue.id for clue in world.clues}
+    hidden_readables = hidden_readable_ids(world)
+    guardian_id = world.meta.win_condition.target_npc_id
+    guardian_seen = False
+    for node in world.nodes:
+        if node.type in {"location", "junction"}:
+            continue
+        if node.type == "door":
+            _validate_lockable(node, item_ids)
+            continue
+        parent = node_by_id.get(node.parent_id)
+        if parent is None or parent.type not in {"location", "junction"}:
+            raise DMCompileError(f"Node '{node.id}' must live in a location or junction.")
+        if node.type == "container":
+            _validate_lockable(node, item_ids)
+        elif node.type == "readable":
+            if node.clue_id not in clue_ids:
+                raise DMCompileError(f"Readable '{node.id}' references unknown clue '{node.clue_id}'.")
+            if node.requires_item_id and node.requires_item_id not in item_ids:
+                raise DMCompileError(f"Readable '{node.id}' references unknown item '{node.requires_item_id}'.")
+        elif node.type == "fixture":
+            if node.requires_item_id not in item_ids:
+                raise DMCompileError(f"Fixture '{node.id}' references unknown item '{node.requires_item_id}'.")
+            if bool(node.reveals_item_id) == bool(node.reveals_readable_id):
+                raise DMCompileError(f"Fixture '{node.id}' must reveal exactly one item or readable.")
+            if node.reveals_item_id and node.reveals_item_id not in item_ids:
+                raise DMCompileError(f"Fixture '{node.id}' reveals unknown item '{node.reveals_item_id}'.")
+            if node.reveals_readable_id and node.reveals_readable_id not in node_by_id:
+                raise DMCompileError(f"Fixture '{node.id}' reveals unknown readable '{node.reveals_readable_id}'.")
+            if node.reveals_readable_id:
+                readable = node_by_id[node.reveals_readable_id]
+                if not isinstance(readable, ReadableNode):
+                    raise DMCompileError(f"Fixture '{node.id}' can only reveal readable nodes.")
+                if readable.parent_id != node.parent_id:
+                    raise DMCompileError(
+                        f"Fixture '{node.id}' must reveal readable '{readable.id}' in the same room."
+                    )
+        elif node.type == "npc":
+            if node.id == guardian_id:
+                guardian_seen = True
+                if node.requires_item_id or node.gives_item_id or node.gives_clue_id:
+                    raise DMCompileError("Guardian NPC cannot have trade fields.")
+            else:
+                if not node.requires_item_id:
+                    raise DMCompileError(f"NPC '{node.id}' requires requires_item_id in v2.")
+                if node.requires_item_id not in item_ids:
+                    raise DMCompileError(f"NPC '{node.id}' references unknown item '{node.requires_item_id}'.")
+                if bool(node.gives_item_id) == bool(node.gives_clue_id):
+                    raise DMCompileError(
+                        f"NPC '{node.id}' must define exactly one of gives_item_id or gives_clue_id."
+                    )
+                if node.gives_item_id and node.gives_item_id not in item_ids:
+                    raise DMCompileError(f"NPC '{node.id}' gives unknown item '{node.gives_item_id}'.")
+                if node.gives_clue_id and node.gives_clue_id not in clue_ids:
+                    raise DMCompileError(f"NPC '{node.id}' gives unknown clue '{node.gives_clue_id}'.")
+        else:  # pragma: no cover
+            raise AssertionError(f"Unhandled node type {node.type}")
+    if not guardian_seen:
+        raise DMCompileError(f"Guardian NPC '{guardian_id}' does not exist.")
+    for readable_id in hidden_readables:
+        readable = node_by_id[readable_id]
+        if not isinstance(readable, ReadableNode):
+            raise DMCompileError(f"Only readable nodes can be hidden, not '{readable_id}'.")
+def _validate_lockable(node: ContainerNode | DoorNode, item_ids: set[str]) -> None:
+    if node.open and node.locked:
+        raise DMCompileError(f"Lockable node '{node.id}' cannot be both open and locked.")
+    if node.locked and not node.lock_key_id:
+        raise DMCompileError(f"Lockable node '{node.id}' is locked but has no lock_key_id.")
+    if node.lock_key_id and node.lock_key_id not in item_ids:
+        raise DMCompileError(f"Lockable node '{node.id}' references unknown key '{node.lock_key_id}'.")
+def _validate_edges(world: WorldDefinition) -> None:
+    room_ids = {node.id for node in world.nodes if node.type in {"location", "junction"}}
+    node_by_id = {node.id: node for node in world.nodes}
+    item_ids = {item.id for item in world.items}
+    pair_groups: dict[frozenset[str], list[Any]] = defaultdict(list)
+    graph: dict[str, set[str]] = defaultdict(set)
+    direction_map: dict[tuple[str, str], str] = {}
+    for edge in world.edges:
+        if edge.from_node_id not in room_ids or edge.to_node_id not in room_ids:
+            raise DMCompileError(f"Edge '{edge.id}' must connect location or junction nodes only.")
+        if edge.from_node_id == edge.to_node_id:
+            raise DMCompileError(f"Edge '{edge.id}' cannot be self-referential.")
+        if edge.required_item_id and edge.required_item_id not in item_ids:
+            raise DMCompileError(f"Edge '{edge.id}' references unknown item '{edge.required_item_id}'.")
+        if edge.required_item_id and edge.required_item_id not in {
+            item.id for item in world.items if item.subtype == "key"
+        }:
+            raise DMCompileError(f"Edge '{edge.id}' must use a key item, not '{edge.required_item_id}'.")
+        if edge.type == "locked_passage":
+            if not edge.door_node_id:
+                raise DMCompileError(f"Locked edge '{edge.id}' requires door_node_id.")
+            if not edge.required_item_id:
+                raise DMCompileError(f"Locked edge '{edge.id}' requires required_item_id.")
+        elif edge.required_item_id is not None:
+            raise DMCompileError(f"Only locked_passage edges can reference required_item_id (edge '{edge.id}').")
+        if edge.door_node_id:
+            door = node_by_id.get(edge.door_node_id)
+            if not isinstance(door, DoorNode):
+                raise DMCompileError(f"Edge '{edge.id}' references unknown door '{edge.door_node_id}'.")
+            if edge.required_item_id and door.lock_key_id != edge.required_item_id:
+                raise DMCompileError(f"Edge '{edge.id}' and door '{door.id}' disagree on the key.")
+        key = (edge.from_node_id, edge.direction)
+        if key in direction_map:
+            raise DMCompileError(
+                f"Edges '{direction_map[key]}' and '{edge.id}' both leave '{edge.from_node_id}' via '{edge.direction}'."
+            )
+        direction_map[key] = edge.id
+        graph[edge.from_node_id].add(edge.to_node_id)
+        pair_groups[frozenset({edge.from_node_id, edge.to_node_id})].append(edge)
+    for pair, edges in pair_groups.items():
+        if len(edges) != 2:
+            raise DMCompileError(f"Edges between {', '.join(sorted(pair))} must be explicitly bidirectional.")
+        a, b = edges
+        if OPPOSITE_DIRECTION[a.direction] != b.direction:
+            raise DMCompileError(f"Edges '{a.id}' and '{b.id}' must use opposite directions.")
+        if a.type != b.type or a.required_item_id != b.required_item_id or a.door_node_id != b.door_node_id:
+            raise DMCompileError(f"Edge pair '{a.id}'/'{b.id}' must agree on type, key, and door.")
+    reachable = _reachable_rooms(graph, world.meta.start_node_id)
+    if reachable != room_ids:
+        raise DMCompileError(f"Some rooms are unreachable from the start node: {sorted(room_ids - reachable)}")
+def _validate_items(world: WorldDefinition) -> None:
+    node_by_id = {node.id: node for node in world.nodes}
+    produced = produced_item_ids(world)
+    recipe_outputs: set[str] = set()
+    recipe_inputs: set[frozenset[str]] = set()
+    for recipe in world.recipes:
+        inputs = frozenset(recipe.input_item_ids)
+        if len(inputs) != 2:
+            raise DMCompileError(f"Recipe '{recipe.id}' must have exactly two distinct input items.")
+        if inputs in recipe_inputs:
+            raise DMCompileError(f"Duplicate recipe inputs in '{recipe.id}'.")
+        recipe_inputs.add(inputs)
+        if recipe.output_item_id in recipe_outputs:
+            raise DMCompileError(f"Item '{recipe.output_item_id}' is produced by multiple recipes.")
+        recipe_outputs.add(recipe.output_item_id)
+    for item in world.items:
+        if item.id in produced and item.start_node_id is not None:
+            raise DMCompileError(f"Produced item '{item.id}' must not be initially placed.")
+        if item.id not in produced and item.start_node_id is None:
+            raise DMCompileError(f"Placed item '{item.id}' requires start_node_id.")
+        if item.start_node_id is None:
+            continue
+        holder = node_by_id.get(item.start_node_id)
+        if holder is None:
+            raise DMCompileError(f"Item '{item.id}' starts in unknown node '{item.start_node_id}'.")
+        if holder.type not in {"location", "junction", "container"}:
+            raise DMCompileError(f"Item '{item.id}' must start in a room or container.")
+        if item.subtype not in {"key", "puzzle"}:
+            raise DMCompileError(f"Item '{item.id}' uses unsupported subtype '{item.subtype}'.")
+def _validate_clues(world: WorldDefinition) -> None:
+    clue_sources: dict[str, list[str]] = defaultdict(list)
+    for node in world.nodes:
+        if isinstance(node, ReadableNode):
+            clue_sources[node.clue_id].append(node.id)
+        elif isinstance(node, NpcNode) and node.gives_clue_id:
+            clue_sources[node.gives_clue_id].append(node.id)
+    clue_ids = {clue.id for clue in world.clues}
+    if set(clue_sources) != clue_ids:
+        missing = sorted(clue_ids - set(clue_sources))
+        raise DMCompileError(f"Every clue needs exactly one source. Missing: {missing}")
+    for clue_id, source_ids in sorted(clue_sources.items()):
+        if len(source_ids) > 1:
+            raise DMCompileError(
+                f"Clue '{clue_id}' has multiple sources: {', '.join(sorted(source_ids))}."
+            )
+def _validate_visibility(world: WorldDefinition) -> None:
+    names: dict[str, str] = {}
+    for label in [node.label for node in world.nodes] + [item.label for item in world.items]:
+        safe = parser_safe_text(label)
+        if safe in names:
+            raise DMCompileError(
+                f"Visible labels '{label}' and '{names[safe]}' collapse to the same parser name '{safe}'."
+            )
+        names[safe] = label
+def _validate_answer_leaks(world: WorldDefinition) -> None:
+    answer = normalize_answer_text(world.meta.win_condition.answer_string)
+    forbidden = {f"the answer is {answer}", f"answer is {answer}", f"submit {answer}"}
+    text_fragments = [world.meta.title]
+    text_fragments.extend(clue.text for clue in world.clues)
+    for node in world.nodes:
+        text_fragments.extend([node.label, node.description])
+        if isinstance(node, ReadableNode):
+            text_fragments.append(node.text_content)
+    for text in text_fragments:
+        normalized = normalize_answer_text(text)
+        if any(phrase in normalized for phrase in forbidden):
+            raise DMCompileError("World leaks the final answer too directly. Clues must stay partial.")
+def _validate_guardian_path(world: WorldDefinition) -> None:
+    node_by_id = {node.id: node for node in world.nodes}
+    guardian = node_by_id[world.meta.win_condition.target_npc_id]
+    graph: dict[str, set[str]] = defaultdict(set)
+    for edge in world.edges:
+        if edge.type == "passage":
+            graph[edge.from_node_id].add(edge.to_node_id)
+    reachable = _reachable_rooms(graph, world.meta.start_node_id)
+    if guardian.parent_id not in reachable:
+        raise DMCompileError("Guardian room must be reachable from the start without item gates.")
+def _validate_clue_gates(world: WorldDefinition) -> None:
+    reachable = _reachable_zero_item_rooms(world)
+    hidden_readables = hidden_readable_ids(world)
+    for node in world.nodes:
+        if isinstance(node, ReadableNode):
+            if node.id in hidden_readables:
+                continue
+            if node.parent_id not in reachable:
+                continue
+            if node.requires_item_id:
+                continue
+            raise DMCompileError(
+                f"Readable '{node.id}' exposes clue '{node.clue_id}' without any item interaction."
+            )
+        if isinstance(node, NpcNode) and node.gives_clue_id and not node.requires_item_id:
+            raise DMCompileError(f"NPC '{node.id}' gives clue '{node.gives_clue_id}' without an item gate.")
+def _validate_item_usage(world: WorldDefinition) -> None:
+    quest_items: set[str] = set()
+    ordered = topological_linearize(world.quest_chain)
+    for action in (parse_quest_action(step.action) for step in ordered):
+        if isinstance(action, UnlockAction):
+            quest_items.add(action.key_id)
+        elif isinstance(action, (UseAction, GiveAction)):
+            quest_items.add(action.item_id)
+        elif isinstance(action, CombineAction):
+            quest_items.update({action.item_a_id, action.item_b_id})
+        elif isinstance(action, TakeAction):
+            quest_items.add(action.item_id)
+    mechanical_items = {
+        edge.required_item_id
+        for edge in world.edges
+        if edge.required_item_id
+    }
+    for node in world.nodes:
+        if node.type == "container" and node.lock_key_id:
+            mechanical_items.add(node.lock_key_id)
+        elif node.type == "door" and node.lock_key_id:
+            mechanical_items.add(node.lock_key_id)
+        elif node.type == "readable" and node.requires_item_id:
+            mechanical_items.add(node.requires_item_id)
+        elif node.type == "fixture":
+            mechanical_items.add(node.requires_item_id)
+            if node.reveals_item_id:
+                mechanical_items.add(node.reveals_item_id)
+        elif node.type == "npc":
+            if node.requires_item_id:
+                mechanical_items.add(node.requires_item_id)
+            if node.gives_item_id:
+                mechanical_items.add(node.gives_item_id)
+    for recipe in world.recipes:
+        mechanical_items.update(recipe.input_item_ids)
+        mechanical_items.add(recipe.output_item_id)
+    for item in world.items:
+        if item.id not in quest_items and item.id not in mechanical_items:
+            raise DMCompileError(f"Unused decorative items are not supported in v2: '{item.id}'.")
+def _validate_quest_shape(world: WorldDefinition) -> None:
+    ordered = topological_linearize(world.quest_chain)
+    parsed = [parse_quest_action(step.action) for step in ordered]
+    if not isinstance(parsed[-1], SubmitAction):
+        raise DMCompileError('The final quest step must be submit("answer").')
+    if len(parsed) < 2 or not isinstance(parsed[-2], TalkAction):
+        raise DMCompileError("The penultimate quest step must be talk(guardian).")
+    if parsed[-2].target_node_id != world.meta.win_condition.target_npc_id:
+        raise DMCompileError("The final talk step must target the guardian NPC.")
+    if normalize_answer_text(parsed[-1].answer_text) != normalize_answer_text(world.meta.win_condition.answer_string):
+        raise DMCompileError("The final submit step must match win_condition.answer_string.")
+    entity_names = {node.id: parser_safe_text(node.label) for node in world.nodes}
+    entity_names.update({item.id: parser_safe_text(item.label) for item in world.items})
+    simulate_walkthrough(world, parsed, entity_names)
+def _reachable_rooms(graph: dict[str, set[str]], start: str) -> set[str]:
+    seen = {start}
+    queue = deque([start])
+    while queue:
+        current = queue.popleft()
+        for nxt in graph.get(current, set()):
+            if nxt not in seen:
+                seen.add(nxt)
+                queue.append(nxt)
+    return seen
+def _reachable_zero_item_rooms(world: WorldDefinition) -> set[str]:
+    graph: dict[str, set[str]] = defaultdict(set)
+    for edge in world.edges:
+        if edge.type == "passage":
+            graph[edge.from_node_id].add(edge.to_node_id)
+    return _reachable_rooms(graph, world.meta.start_node_id)

agents/master/env.py ADDED Viewed

	@@ -0,0 +1,236 @@

+from __future__ import annotations
+import math
+import uuid
+from pathlib import Path
+from typing import Any
+from .base import DMCompileError, DMInterfaceError, MAX_STEP_MULTIPLIER, TARGET_RATIO, TARGET_RATIO_SIGMA
+from .build import WorldCompiler
+from .interface import InterfaceAdapter, SimpleInterfaceAdapter
+from .play import EpisodeRunner, WalkthroughRunner
+from .schema import (
+    CompiledWorld,
+    DMAction,
+    DMFeedback,
+    DMObservation,
+    DMRewardBreakdown,
+    DMState,
+    Turn,
+    WorldDefinition,
+)
+from .session import EpisodeSession
+from .snapshots import LiveObserver
+from agents.shared.openenv_compat import Environment, StepResult, build_step_result
+class DMEnvironment(Environment[DMAction, DMObservation, DMState]):
+    def __init__(
+        self,
+        artifacts_root: Path | None = None,
+        target_ratio: float = TARGET_RATIO,
+        reward_sigma: float = TARGET_RATIO_SIGMA,
+        max_step_multiplier: int = MAX_STEP_MULTIPLIER,
+        interface_adapter: InterfaceAdapter = SimpleInterfaceAdapter(),
+        default_runner: EpisodeRunner | None = None,
+    ) -> None:
+        super().__init__()
+        if interface_adapter is None:
+            raise ValueError("interface_adapter must not be None.")
+        self.compiler = WorldCompiler(artifacts_root=artifacts_root)
+        self.target_ratio = target_ratio
+        self.reward_sigma = reward_sigma
+        self.max_step_multiplier = max_step_multiplier
+        self.interface_adapter = interface_adapter
+        self.default_runner = default_runner or WalkthroughRunner()
+        self.episode_count = 0
+        self.success_count = 0
+        self._state = DMState(
+            episode_id=uuid.uuid4().hex[:12],
+            target_ratio=target_ratio,
+        )
+        self.last_compiled_world: CompiledWorld | None = None
+    def reset(self, difficulty_hint: float | None = None, seed: int | None = None) -> DMObservation:
+        del seed
+        episode_target_ratio = self.target_ratio if difficulty_hint is None else difficulty_hint
+        self._state = DMState(
+            episode_id=uuid.uuid4().hex[:12],
+            compile_status="pending",
+            episode_status="running",
+            cumulative_success_rate=self._running_success_rate(),
+            target_ratio=episode_target_ratio,
+            difficulty_hint=difficulty_hint,
+        )
+        self.last_compiled_world = None
+        return self._apply_transform(
+            DMObservation(
+                done=False,
+                reward=None,
+                target_ratio_used=episode_target_ratio,
+            )
+        )
+    def step(  # type: ignore[override]
+        self,
+        action: DMAction | WorldDefinition | dict[str, Any],
+        runner: EpisodeRunner | None = None,
+        observer: LiveObserver | None = None,
+        timeout_s: float | None = None,
+    ) -> StepResult[DMObservation]:
+        del timeout_s
+        world_input = action.world_definition if isinstance(action, DMAction) else action
+        compiled: CompiledWorld | None = None
+        session: EpisodeSession | None = None
+        if observer is not None:
+            observer.on_run_start(self._state.episode_id, world_input)
+        self.last_compiled_world = None
+        self._state.current_world = None
+        try:
+            compiled = self.compiler.compile(world_input, episode_id=self._state.episode_id)
+            self.last_compiled_world = compiled
+            self._state.current_world = compiled.world
+            self._state.compile_status = "valid"
+            max_steps = max(1, len(compiled.solver_policy) * self.max_step_multiplier)
+            def on_turn(current_session: EpisodeSession, turn: Turn) -> None:
+                self._state.step_count = current_session.steps_taken
+                if observer is not None:
+                    observer.on_turn(current_session, turn)
+            session = EpisodeSession(
+                compiled,
+                interface_adapter=self.interface_adapter,
+                turn_listener=on_turn,
+            )
+            if observer is not None:
+                observer.on_compile_success(compiled, session)
+            (runner or self.default_runner).run(session, max_steps=max_steps)
+            player_won = bool(session.player_won)
+            min_steps = len(compiled.solver_policy)
+            reward_breakdown = self._reward_breakdown(player_won, session.steps_taken, min_steps)
+            reward = reward_breakdown.reward
+            self.episode_count += 1
+            self.success_count += int(player_won)
+            self._state.step_count = session.steps_taken
+            self._state.episode_status = "complete" if player_won else "failed"
+            self._state.cumulative_success_rate = self._running_success_rate()
+            observation = self._apply_transform(
+                DMObservation(
+                    episode_transcript=session.transcript,
+                    player_won=player_won,
+                    steps_taken=session.steps_taken,
+                    min_steps=min_steps,
+                    ratio=(session.steps_taken / min_steps) if min_steps else None,
+                    reward=reward,
+                    done=True,
+                    feedback=self._build_feedback(compiled, session),
+                    reward_breakdown=reward_breakdown,
+                    target_ratio_used=self._state.target_ratio,
+                )
+            )
+            if observer is not None:
+                observer.on_complete(compiled, session, observation)
+            return build_step_result(observation)
+        except (DMCompileError, DMInterfaceError, ValueError) as exc:
+            self.last_compiled_world = None
+            self._state.current_world = None
+            self._state.compile_status = "invalid"
+            self._state.episode_status = "failed"
+            if observer is not None:
+                observer.on_error(
+                    episode_id=self._state.episode_id,
+                    error=str(exc),
+                    world_input=world_input,
+                    compiled=compiled,
+                    session=session,
+                )
+            observation = self._apply_transform(
+                DMObservation(
+                    player_won=False,
+                    compile_error=str(exc),
+                    reward=0.0,
+                    done=True,
+                    reward_breakdown=DMRewardBreakdown(
+                        reward_mode="compile_failure_penalty",
+                        player_won=False,
+                        target_ratio=self._state.target_ratio,
+                        quality_score=0.0,
+                        reward=0.0,
+                    ),
+                    target_ratio_used=self._state.target_ratio,
+                )
+            )
+            return build_step_result(observation)
+        finally:
+            if session is not None:
+                session.close()
+    def compile_world(
+        self,
+        world_input: WorldDefinition | dict[str, Any],
+        *,
+        episode_id: str | None = None,
+    ) -> CompiledWorld:
+        return self.compiler.compile(world_input, episode_id=episode_id)
+    def play(
+        self,
+        world_input: WorldDefinition | dict[str, Any],
+        runner: EpisodeRunner | None = None,
+        observer: LiveObserver | None = None,
+    ) -> StepResult[DMObservation]:
+        self.reset()
+        return self.step(world_input, runner=runner, observer=observer)
+    @property
+    def state(self) -> DMState:
+        return self._state
+    def _reward_breakdown(
+        self,
+        player_won: bool,
+        steps_taken: int | None,
+        min_steps: int | None,
+    ) -> DMRewardBreakdown:
+        raw_ratio: float | None = None
+        clamped_ratio: float | None = None
+        target_ratio_delta: float | None = None
+        efficiency_score: float | None = None
+        quality_score = 0.0
+        if steps_taken is not None and min_steps is not None and min_steps > 0:
+            raw_ratio = steps_taken / min_steps
+            clamped_ratio = max(raw_ratio, 1.0)
+            target_ratio_delta = abs(clamped_ratio - self._state.target_ratio)
+            if player_won and steps_taken > 0:
+                efficiency_score = min(1.0, min_steps / steps_taken)
+                sigma_sq = max(self.reward_sigma, 1e-6) ** 2
+                quality_score = math.exp(-((clamped_ratio - self._state.target_ratio) ** 2) / (2.0 * sigma_sq))
+        reward = quality_score if player_won else 0.0
+        return DMRewardBreakdown(
+            reward_mode="gaussian_target_ratio",
+            player_won=player_won,
+            raw_ratio=raw_ratio,
+            clamped_ratio=clamped_ratio,
+            target_ratio=self._state.target_ratio,
+            target_ratio_delta=target_ratio_delta,
+            efficiency_score=efficiency_score,
+            quality_score=quality_score,
+            reward=reward,
+        )
+    def _build_feedback(self, compiled: CompiledWorld, session: EpisodeSession) -> DMFeedback:
+        room_ids = [node.id for node in compiled.world.nodes if node.type in {"location", "junction"}]
+        clue_ids = [clue.id for clue in compiled.world.clues]
+        unique_rooms = [node_id for node_id in session.visited_nodes if node_id in room_ids]
+        return DMFeedback(
+            unreachable_nodes=sorted(set(room_ids) - set(unique_rooms)),
+            unused_items=sorted({item.id for item in compiled.world.items} - session.used_items),
+            clues_missed=sorted(set(clue_ids) - session.discovered_clues),
+            mean_steps_per_room=session.steps_taken / max(1, len(set(unique_rooms))),
+            invalid_command_count=session.invalid_command_count,
+            wrong_submit_count=session.wrong_submit_count,
+        )
+    def _running_success_rate(self) -> float:
+        return 0.0 if self.episode_count == 0 else self.success_count / self.episode_count

agents/master/graph.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from __future__ import annotations
+from collections import defaultdict
+from .schema import DoorNode, Edge, NpcTrade, ReadableNode, UseEffect, WorldDefinition
+def readable_clue_mapping(world: WorldDefinition) -> dict[str, str]:
+    return {node.id: node.clue_id for node in world.nodes if isinstance(node, ReadableNode)}
+def clue_source_mapping(world: WorldDefinition) -> dict[str, str]:
+    mapping = {node.clue_id: node.id for node in world.nodes if isinstance(node, ReadableNode)}
+    for node in world.nodes:
+        if node.type == "npc" and node.gives_clue_id:
+            mapping[node.gives_clue_id] = node.id
+    return mapping
+def npc_trade_mapping(world: WorldDefinition) -> dict[str, NpcTrade]:
+    trades: dict[str, NpcTrade] = {}
+    for node in world.nodes:
+        if node.type != "npc" or node.id == world.meta.win_condition.target_npc_id:
+            continue
+        trades[node.id] = NpcTrade(
+            required_item_id=node.requires_item_id or "",
+            gives_item_id=node.gives_item_id,
+            gives_clue_id=node.gives_clue_id,
+        )
+    return trades
+def use_effect_mapping(world: WorldDefinition) -> dict[str, UseEffect]:
+    effects: dict[str, UseEffect] = {}
+    for node in world.nodes:
+        if node.type == "readable" and node.requires_item_id:
+            effects[node.id] = UseEffect(
+                required_item_id=node.requires_item_id,
+                clue_id=node.clue_id,
+                consumes_item=node.consumes_item,
+            )
+        elif node.type == "fixture":
+            effects[node.id] = UseEffect(
+                required_item_id=node.requires_item_id,
+                reveals_item_id=node.reveals_item_id,
+                reveals_readable_id=node.reveals_readable_id,
+                consumes_item=node.consumes_item,
+            )
+    return effects
+def recipe_mapping(world: WorldDefinition) -> dict[frozenset[str], str]:
+    return {frozenset(recipe.input_item_ids): recipe.output_item_id for recipe in world.recipes}
+def produced_item_ids(world: WorldDefinition) -> set[str]:
+    produced = {recipe.output_item_id for recipe in world.recipes}
+    for node in world.nodes:
+        if node.type == "npc" and node.gives_item_id:
+            produced.add(node.gives_item_id)
+        if node.type == "fixture" and node.reveals_item_id:
+            produced.add(node.reveals_item_id)
+    return produced
+def hidden_readable_ids(world: WorldDefinition) -> set[str]:
+    return {node.reveals_readable_id for node in world.nodes if node.type == "fixture" and node.reveals_readable_id}
+def door_room_mapping(world: WorldDefinition) -> dict[str, frozenset[str]]:
+    mapping: dict[str, set[str]] = defaultdict(set)
+    for edge in world.edges:
+        if edge.door_node_id:
+            mapping[edge.door_node_id].add(edge.from_node_id)
+            mapping[edge.door_node_id].add(edge.to_node_id)
+    return {door_id: frozenset(rooms) for door_id, rooms in mapping.items()}
+def edge_for_door(world: WorldDefinition, door_id: str) -> Edge | None:
+    for edge in world.edges:
+        if edge.door_node_id == door_id:
+            return edge
+    return None
+def door_nodes(world: WorldDefinition) -> dict[str, DoorNode]:
+    return {node.id: node for node in world.nodes if isinstance(node, DoorNode)}

agents/master/interface.py ADDED Viewed

	@@ -0,0 +1,831 @@

+from __future__ import annotations
+import json
+import os
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Literal, Protocol
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+from textworld.core import GameState
+from agents.hero.cli import parse_cli_command
+from .base import DMInterfaceError, SUPPORTED_DIRECTIONS
+if TYPE_CHECKING:
+    from .session import EpisodeSession
+DEFAULT_GEMINI_MODEL = "gemini-2.5-flash-lite"
+_TEXTWORLD_PROMPT_LINE_RE = re.compile(r"^\s*>\s.*-\=\s.*=\-(?:\d+/\d+)?\s*$")
+_TEXTWORLD_BANNER_CHAR_RE = re.compile(r"[\\|$_/]")
+_TEXTWORLD_ROOM_HEADER_RE = re.compile(r"^\s*-\=\s*(?P<label>.+?)\s*\=-\s*$")
+_TEXTWORLD_META_LINE_RE = re.compile(r"^\s*(?:score:|moves:|available commands:|type 'help')", re.IGNORECASE)
+class InterfaceAdapter(Protocol):
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        ...
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        ...
+class SimpleInterfaceAdapter:
+    """A deterministic parser for explicit non-LLM play."""
+    _ARTICLE_RE = re.compile(r"\b(the|a|an)\b", re.IGNORECASE)
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        command = raw_command.strip()
+        lowered = command.lower()
+        if lowered in SUPPORTED_DIRECTIONS:
+            return "go " + lowered
+        if lowered in {"look", "look around"}:
+            return "look"
+        if lowered in {"inventory", "check inventory", "show inventory"}:
+            return "inventory"
+        if lowered in {"wait", "pass"}:
+            return "wait"
+        if lowered.startswith("answer "):
+            return "submit " + command[7:].strip()
+        if lowered.startswith("say "):
+            return "submit " + command[4:].strip().strip("\"'")
+        if lowered.startswith("talk to "):
+            return "talk " + command[8:].strip()
+        if lowered.startswith("speak to "):
+            return "talk " + command[9:].strip()
+        if lowered.startswith("use ") and " on " in lowered:
+            item_text, target_text = re.split(r"\s+on\s+", command[4:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "use " + self._normalize_object_text(item_text) + " on " + self._normalize_object_text(target_text)
+        if lowered.startswith("give ") and " to " in lowered:
+            item_text, target_text = re.split(r"\s+to\s+", command[5:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "give " + self._normalize_object_text(item_text) + " to " + self._normalize_object_text(target_text)
+        if lowered.startswith("combine ") and " with " in lowered:
+            item_a, item_b = re.split(r"\s+with\s+", command[8:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "combine " + self._normalize_object_text(item_a) + " with " + self._normalize_object_text(item_b)
+        if lowered.startswith("combine ") and " and " in lowered:
+            item_a, item_b = re.split(r"\s+and\s+", command[8:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "combine " + self._normalize_object_text(item_a) + " with " + self._normalize_object_text(item_b)
+        parts = command.split(maxsplit=1)
+        if len(parts) != 2:
+            return lowered
+        verb = parts[0].lower()
+        if verb not in {"read", "talk", "open", "take", "unlock", "examine"}:
+            return lowered
+        normalized = self._normalize_object_text(parts[1])
+        if verb == "examine":
+            if session.node_id_for_command_name(normalized, node_types={"readable"}):
+                return "read " + normalized
+            if session.node_id_for_command_name(normalized, node_types={"npc"}):
+                return "talk " + normalized
+        return verb + " " + normalized
+    def _normalize_object_text(self, text: str) -> str:
+        object_text = self._ARTICLE_RE.sub(" ", text)
+        return re.sub(r"\s+", " ", object_text).strip().lower()
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        del state
+        return enrich_feedback_text(sanitize_feedback_text(feedback), session)
+class StrictCliInterfaceAdapter:
+    """A deterministic adapter for parser-style CLI commands."""
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        del session
+        parsed = parse_cli_command(raw_command)
+        if not parsed.valid or parsed.normalized_command is None:
+            raise DMInterfaceError(parsed.error or "Command does not match the strict CLI grammar.")
+        return parsed.normalized_command
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        del state
+        return enrich_feedback_text(sanitize_feedback_text(feedback), session)
+@dataclass(frozen=True)
+class _TranslationGlossary:
+    canonical_to_alias: dict[str, str]
+    alias_to_canonical: dict[str, str]
+class GeminiInterfaceAdapter:
+    _ARTICLE_RE = re.compile(r"\b(the|a|an)\b", re.IGNORECASE)
+    _PARSER_SAFE_NAME_RE = re.compile(r"^[a-z0-9]+(?: [a-z0-9]+)*$")
+    _TRAILING_POLITENESS_RE = re.compile(r"(?:\s+(?:please|for me|thanks|thank you))+[.!?]*$", re.IGNORECASE)
+    _COMMAND_SYSTEM = (
+        "Translate the player's text into exactly one canonical dungeon command. "
+        "Return only the command and nothing else."
+    )
+    _OBSERVATION_SYSTEM = (
+        "Rewrite dungeon feedback in at most two short sentences. "
+        "Preserve facts exactly. Do not infer, solve, explain, or add implications."
+    )
+    _TRANSLATED_COMMAND_SYSTEM = (
+        "The player is using a corporate app metaphor layered over a fantasy dungeon. "
+        "Translate the player's text back into exactly one canonical dungeon command from the underlying fantasy world. "
+        "Return only the canonical command and nothing else."
+    )
+    _TRANSLATED_OBSERVATION_SYSTEM = (
+        "Rewrite the dungeon observation as a corporate app interface while preserving facts one-to-one. "
+        "Use the provided aliases exactly, keep directions unchanged, and do not add hints, solutions, or new mechanics."
+    )
+    _TRANSLATION_GLOSSARY_SYSTEM = (
+        "Create a one-to-one alias glossary that maps fantasy dungeon terms into a corporate app metaphor. "
+        "Return JSON only."
+    )
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = DEFAULT_GEMINI_MODEL,
+        narrate_observations: bool = False,
+        translation_mode: Literal["none", "corporate_app"] = "none",
+        max_admissible_commands: int = 18,
+    ) -> None:
+        if translation_mode not in {"none", "corporate_app"}:
+            raise ValueError(f"Unsupported Gemini translation mode: {translation_mode}")
+        self.model = model
+        self.narrate_observations = narrate_observations
+        self.translation_mode = translation_mode
+        self.max_admissible_commands = max_admissible_commands
+        self._client = self._create_client(api_key)
+        self._translation_glossary_cache: dict[str, _TranslationGlossary] = {}
+        self._translation_observation_cache: dict[tuple[str, str], str] = {}
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        lowered = raw_command.strip().lower()
+        if not lowered:
+            raise DMInterfaceError("Command must not be empty.")
+        admissible = set(session.available_commands())
+        direct = self._normalize_generated_command(self._preprocess_player_text(lowered))
+        if resolved := self._resolve_candidate_command(direct, session, admissible):
+            return resolved
+        movement = self._extract_direction_command(lowered, admissible)
+        if movement is not None:
+            return movement
+        prompt = self._command_prompt(raw_command, session, admissible)
+        generated = self._generate_command(
+            system_instruction=self._TRANSLATED_COMMAND_SYSTEM if self._translation_enabled() else self._COMMAND_SYSTEM,
+            prompt=prompt,
+            max_output_tokens=48,
+            temperature=0.1,
+        )
+        if resolved := self._resolve_candidate_command(generated, session, admissible):
+            return resolved
+        raise DMInterfaceError(f"Gemini returned an invalid command: {generated or '<empty>'}")
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        sanitized = sanitize_feedback_text(feedback)
+        enriched = enrich_feedback_text(sanitized, session)
+        if not sanitized:
+            return enriched
+        if self._translation_enabled():
+            cache_key = (self._translation_cache_key(session), enriched)
+            cached = self._translation_observation_cache.get(cache_key)
+            if cached is not None:
+                return cached
+            prompt = self._observation_prompt(enriched, session)
+            generated = self._generate_observation(
+                system_instruction=self._TRANSLATED_OBSERVATION_SYSTEM,
+                prompt=prompt,
+                max_output_tokens=220 if not self.narrate_observations else 120,
+                temperature=0.2,
+            )
+            if not generated:
+                raise DMInterfaceError("Gemini returned an empty translated observation.")
+            self._translation_observation_cache[cache_key] = generated
+            return generated
+        if not self.narrate_observations:
+            return enriched
+        if self._should_preserve_feedback(sanitized, state):
+            return enriched
+        prompt = self._observation_prompt(sanitized, session)
+        generated = self._generate_observation(
+            system_instruction=self._OBSERVATION_SYSTEM,
+            prompt=prompt,
+            max_output_tokens=80,
+            temperature=0.2,
+        )
+        if not generated:
+            raise DMInterfaceError("Gemini returned an empty observation.")
+        return enrich_feedback_text(generated, session)
+    def _create_client(self, api_key: str | None) -> genai.Client:
+        load_dotenv(self._repo_root() / ".env", override=False)
+        key = api_key or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        if not key:
+            raise DMInterfaceError("Missing GEMINI_API_KEY or GOOGLE_API_KEY.")
+        return genai.Client(api_key=key)
+    @staticmethod
+    def _repo_root() -> Path:
+        return Path(__file__).resolve().parents[2]
+    def _command_prompt(self, raw_command: str, session: EpisodeSession, admissible: set[str]) -> str:
+        commands = sorted(admissible)[: self.max_admissible_commands]
+        interactables = self._interactables(session)
+        current_room = session.state.location or session.current_room_id
+        lines: list[str] = []
+        if self._translation_enabled():
+            glossary = self._translation_glossary(session)
+            lines.extend(
+                [
+                    "The player only sees the translated corporate-app interface.",
+                    "Map their request back to the underlying dungeon command.",
+                    "Treat rooms as apps/workspaces, NPCs as coworkers or reviewers, and items as files, tools, credentials, or tickets.",
+                    "Translated aliases (alias => canonical):",
+                    *[f"- {alias} => {canonical}" for alias, canonical in sorted(glossary.alias_to_canonical.items())],
+                ]
+            )
+        lines.extend(
+            [
+                "Use an exact visible command whenever possible.",
+                "Allowed verbs: go, open, unlock, take, read, use, combine, give, talk, submit, look, inventory, wait",
+                f"Room: {current_room}",
+                "Visible commands:",
+                *[f"- {command}" for command in commands],
+            ]
+        )
+        if interactables:
+            lines.append(f"Objects here: {', '.join(interactables)}")
+        lines.append("If the player is answering the guardian, use: submit <answer>")
+        lines.append("If no valid mapping exists, return INVALID")
+        lines.append(f"Player text: {raw_command.strip()}")
+        return "\n".join(lines)
+    def _observation_prompt(self, feedback: str, session: EpisodeSession) -> str:
+        current_room = session.state.location or session.current_room_id
+        if self._translation_enabled():
+            glossary = self._translation_glossary(session)
+            lines = [
+                f"Canonical room: {current_room}",
+                "Use this exact alias glossary (canonical => alias):",
+                *[f"- {canonical} => {alias}" for canonical, alias in sorted(glossary.canonical_to_alias.items())],
+                "Preserve the same facts, object counts, and navigation affordances.",
+                "Keep any 'Visible here:' and 'Exits:' sections, but rewrite the entity names with the aliases above.",
+            ]
+            if self.narrate_observations:
+                lines.append("Keep the response compact.")
+            lines.append("Canonical observation:")
+            lines.append(feedback)
+            return "\n".join(lines)
+        return (
+            f"Room: {current_room}\n"
+            "Describe only what the game text explicitly says.\n"
+            "Never reveal what a clue means or what answer it implies.\n"
+            f"Feedback: {feedback}"
+        )
+    def _translation_glossary_prompt(self, session: EpisodeSession) -> str:
+        lines = [
+            "Return JSON with shape: {\"aliases\": [{\"source\": \"...\", \"alias\": \"...\"}]}",
+            "Rules:",
+            "- Every alias must be unique.",
+            "- Use lowercase letters, numbers, and spaces only.",
+            "- Do not use articles like a, an, or the.",
+            "- Keep aliases short and parser-safe.",
+            "- Rooms should feel like apps, dashboards, workspaces, portals, or queues.",
+            "- NPCs should feel like coworkers, reviewers, owners, admins, or operators.",
+            "- Items should feel like files, tickets, tokens, credentials, tools, or documents.",
+            "- Preserve identity one-to-one. Do not merge multiple source terms into one alias.",
+            "Terms:",
+        ]
+        for kind, source in self._translation_terms(session):
+            lines.append(f"- {kind}: {source}")
+        return "\n".join(lines)
+    def _interactables(self, session: EpisodeSession) -> list[str]:
+        names: list[str] = []
+        for node in session.compiled.world.nodes:
+            if getattr(node, "parent_id", None) != session.current_room_id:
+                continue
+            safe_name = session.compiled.node_command_names.get(node.id)
+            if safe_name is not None and node.type in {"container", "readable", "npc", "door", "fixture"}:
+                names.append(safe_name)
+        return sorted(names)[:8]
+    def _generate_response(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        response = self._client.models.generate_content(
+            model=self.model,
+            contents=f"{system_instruction}\n\n{prompt}",
+            config=types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                candidate_count=1,
+            ),
+        )
+        return getattr(response, "text", "") or ""
+    def _generate_command(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        return self._sanitize_command_response(
+            self._generate_response(
+                system_instruction=system_instruction,
+                prompt=prompt,
+                max_output_tokens=max_output_tokens,
+                temperature=temperature,
+            )
+        )
+    def _generate_observation(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        return self._sanitize_multiline_response(
+            self._generate_response(
+                system_instruction=system_instruction,
+                prompt=prompt,
+                max_output_tokens=max_output_tokens,
+                temperature=temperature,
+            )
+        )
+    def _generate_json(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        return self._sanitize_json_response(
+            self._generate_response(
+                system_instruction=system_instruction,
+                prompt=prompt,
+                max_output_tokens=max_output_tokens,
+                temperature=temperature,
+            )
+        )
+    def _resolve_candidate_command(
+        self,
+        candidate: str,
+        session: EpisodeSession,
+        admissible: set[str],
+    ) -> str | None:
+        for option in self._candidate_variants(candidate, session):
+            if not option:
+                continue
+            if option == "invalid":
+                continue
+            if resolved := self._resolve_admissible_command(option, admissible):
+                return resolved
+            if self._allow_unlisted_canonical(option):
+                return option
+        return None
+    def _candidate_variants(self, candidate: str, session: EpisodeSession) -> list[str]:
+        variants = [self._normalize_generated_command(candidate)]
+        if self._translation_enabled():
+            canonicalized = self._canonicalize_translated_command(variants[0], session)
+            if canonicalized not in variants:
+                variants.insert(0, canonicalized)
+        return variants
+    def _canonicalize_translated_command(self, command: str, session: EpisodeSession) -> str:
+        glossary = self._translation_glossary(session)
+        rewritten = command
+        for alias, canonical in sorted(glossary.alias_to_canonical.items(), key=lambda item: (-len(item[0]), item[0])):
+            rewritten = re.sub(
+                rf"(?<![a-z0-9]){re.escape(alias)}(?![a-z0-9])",
+                canonical,
+                rewritten,
+            )
+        return self._normalize_generated_command(rewritten)
+    def _translation_glossary(self, session: EpisodeSession) -> _TranslationGlossary:
+        cache_key = self._translation_cache_key(session)
+        cached = self._translation_glossary_cache.get(cache_key)
+        if cached is not None:
+            return cached
+        terms = self._translation_terms(session)
+        generated = self._generate_json(
+            system_instruction=self._TRANSLATION_GLOSSARY_SYSTEM,
+            prompt=self._translation_glossary_prompt(session),
+            max_output_tokens=700,
+            temperature=0.2,
+        )
+        glossary = self._parse_translation_glossary(generated, terms)
+        self._translation_glossary_cache[cache_key] = glossary
+        return glossary
+    def _parse_translation_glossary(
+        self,
+        payload: str,
+        terms: list[tuple[str, str]],
+    ) -> _TranslationGlossary:
+        try:
+            data = json.loads(payload)
+        except json.JSONDecodeError as exc:
+            raise DMInterfaceError("Gemini returned invalid translation glossary JSON.") from exc
+        raw_aliases: dict[str, str] = {}
+        if isinstance(data, dict):
+            aliases = data.get("aliases", data)
+            if isinstance(aliases, dict):
+                raw_aliases = {
+                    self._normalize_object_text(str(source)): str(alias)
+                    for source, alias in aliases.items()
+                    if isinstance(source, str)
+                }
+            elif isinstance(aliases, list):
+                for entry in aliases:
+                    if not isinstance(entry, dict):
+                        continue
+                    source = entry.get("source")
+                    alias = entry.get("alias")
+                    if isinstance(source, str) and isinstance(alias, str):
+                        raw_aliases[self._normalize_object_text(source)] = alias
+        if not raw_aliases:
+            raise DMInterfaceError("Gemini returned an empty translation glossary.")
+        canonical_to_alias: dict[str, str] = {}
+        alias_to_canonical: dict[str, str] = {}
+        used_aliases: set[str] = set()
+        for _kind, source in terms:
+            requested_alias = self._normalize_parser_safe_alias(raw_aliases.get(source, ""))
+            alias = self._dedupe_alias(source, requested_alias, used_aliases)
+            canonical_to_alias[source] = alias
+            alias_to_canonical[alias] = source
+            used_aliases.add(alias)
+        return _TranslationGlossary(
+            canonical_to_alias=canonical_to_alias,
+            alias_to_canonical=alias_to_canonical,
+        )
+    def _translation_terms(self, session: EpisodeSession) -> list[tuple[str, str]]:
+        terms: list[tuple[str, str]] = []
+        seen: set[str] = set()
+        for node in session.compiled.world.nodes:
+            source = session.compiled.node_command_names.get(node.id)
+            if source is None or source in seen:
+                continue
+            kind = "room" if node.type in {"location", "junction"} else node.type
+            seen.add(source)
+            terms.append((kind, source))
+        for item in session.compiled.world.items:
+            source = session.compiled.item_command_names.get(item.id)
+            if source is None or source in seen:
+                continue
+            seen.add(source)
+            terms.append(("item", source))
+        answer = session.compiled.correct_answer_normalized
+        if answer and answer not in seen:
+            terms.append(("answer", answer))
+        return sorted(terms, key=lambda item: (item[0], item[1]))
+    def _dedupe_alias(self, source: str, alias: str, used_aliases: set[str]) -> str:
+        for candidate in (alias, source):
+            if candidate and candidate not in used_aliases:
+                return candidate
+        suffix = 2
+        while True:
+            candidate = f"{source} {suffix}"
+            if candidate not in used_aliases and self._PARSER_SAFE_NAME_RE.fullmatch(candidate):
+                return candidate
+            suffix += 1
+    def _normalize_parser_safe_alias(self, value: str) -> str:
+        alias = self._normalize_object_text(value)
+        if not alias or not self._PARSER_SAFE_NAME_RE.fullmatch(alias):
+            return ""
+        return alias
+    def _translation_cache_key(self, session: EpisodeSession) -> str:
+        episode_id = getattr(session.compiled, "episode_id", "") or "session"
+        return f"{episode_id}:{session.compiled.game_file}"
+    def _translation_enabled(self) -> bool:
+        return self.translation_mode != "none"
+    @classmethod
+    def _preprocess_player_text(cls, text: str) -> str:
+        normalized = re.sub(r"\s+", " ", text.strip().lower())
+        replacements = (
+            ("pick up ", "take "),
+            ("grab ", "take "),
+            ("using ", "with "),
+            ("talk to ", "talk "),
+            ("speak to ", "talk "),
+        )
+        for source, target in replacements:
+            normalized = normalized.replace(source, target)
+        prefixes = (
+            "please ",
+            "please, ",
+            "can you ",
+            "could you ",
+            "would you ",
+            "will you ",
+            "go ahead and ",
+            "i want to ",
+            "i'd like to ",
+            "try to ",
+        )
+        stripped = True
+        while stripped:
+            stripped = False
+            for prefix in prefixes:
+                if normalized.startswith(prefix):
+                    normalized = normalized[len(prefix) :].strip()
+                    stripped = True
+        normalized = cls._TRAILING_POLITENESS_RE.sub("", normalized).strip()
+        return normalized
+    @staticmethod
+    def _extract_direction_command(text: str, admissible: set[str]) -> str | None:
+        directions = [direction for direction in SUPPORTED_DIRECTIONS if re.search(rf"\b{direction}\b", text)]
+        if len(directions) != 1:
+            return None
+        if not re.search(r"\b(go|head|move|walk|run|travel|enter|step)\b", text):
+            return None
+        candidate = f"go {directions[0]}"
+        return candidate if candidate in admissible else None
+    @staticmethod
+    def _allow_unlisted_canonical(command: str) -> bool:
+        return GeminiInterfaceAdapter._is_canonical_command(command) and not GeminiInterfaceAdapter._contains_conversational_fluff(command)
+    @staticmethod
+    def _contains_conversational_fluff(command: str) -> bool:
+        return bool(
+            re.search(
+                r"\b(for me|please|thanks|thank you|could you|can you|would you|will you)\b",
+                command,
+            )
+        )
+    @staticmethod
+    def _normalize_generated_command(text: str) -> str:
+        normalized = re.sub(r"\s+", " ", text.strip().lower())
+        normalized = normalized.removeprefix("command: ").removeprefix("response: ").strip()
+        normalized = normalized.rstrip(".!?")
+        if normalized in SUPPORTED_DIRECTIONS:
+            return "go " + normalized
+        if normalized.startswith("talk to "):
+            return "talk " + GeminiInterfaceAdapter._normalize_object_text(normalized[8:].strip())
+        if normalized.startswith("speak to "):
+            return "talk " + GeminiInterfaceAdapter._normalize_object_text(normalized[9:].strip())
+        if normalized.startswith("answer "):
+            return "submit " + normalized[7:].strip()
+        if normalized.startswith("say "):
+            return "submit " + normalized[4:].strip().strip("\"'")
+        if normalized.startswith("combine ") and " and " in normalized:
+            item_a, item_b = normalized[8:].split(" and ", 1)
+            return "combine " + GeminiInterfaceAdapter._normalize_object_text(item_a) + " with " + GeminiInterfaceAdapter._normalize_object_text(item_b)
+        if normalized.startswith("unlock ") and " with " in normalized:
+            target, key = normalized[7:].split(" with ", 1)
+            return "unlock " + GeminiInterfaceAdapter._normalize_object_text(target) + " with " + GeminiInterfaceAdapter._normalize_object_text(key)
+        if normalized.startswith("use ") and " on " in normalized:
+            item, target = normalized[4:].split(" on ", 1)
+            return "use " + GeminiInterfaceAdapter._normalize_object_text(item) + " on " + GeminiInterfaceAdapter._normalize_object_text(target)
+        if normalized.startswith("give ") and " to " in normalized:
+            item, target = normalized[5:].split(" to ", 1)
+            return "give " + GeminiInterfaceAdapter._normalize_object_text(item) + " to " + GeminiInterfaceAdapter._normalize_object_text(target)
+        if normalized.startswith("combine ") and " with " in normalized:
+            item_a, item_b = normalized[8:].split(" with ", 1)
+            return "combine " + GeminiInterfaceAdapter._normalize_object_text(item_a) + " with " + GeminiInterfaceAdapter._normalize_object_text(item_b)
+        if normalized.startswith(("open ", "read ", "talk ", "take ", "examine ")):
+            verb, obj = normalized.split(" ", 1)
+            return verb + " " + GeminiInterfaceAdapter._normalize_object_text(obj)
+        return normalized
+    @staticmethod
+    def _normalize_object_text(text: str) -> str:
+        object_text = GeminiInterfaceAdapter._ARTICLE_RE.sub(" ", text)
+        return re.sub(r"\s+", " ", object_text).strip().lower()
+    @staticmethod
+    def _is_canonical_command(command: str) -> bool:
+        if command in {"look", "inventory", "wait"}:
+            return True
+        if command.startswith("go "):
+            return command[3:] in SUPPORTED_DIRECTIONS
+        if command.startswith(("open ", "read ", "talk ", "submit ")):
+            return bool(command.split(maxsplit=1)[1].strip())
+        if command.startswith("use "):
+            return " on " in command and all(part.strip() for part in command[4:].split(" on ", 1))
+        if command.startswith("combine "):
+            return " with " in command and all(part.strip() for part in command[8:].split(" with ", 1))
+        if command.startswith("give "):
+            return " to " in command and all(part.strip() for part in command[5:].split(" to ", 1))
+        if command.startswith("take "):
+            return bool(command.split(maxsplit=1)[1].strip())
+        if command.startswith("unlock "):
+            if " with " not in command:
+                return False
+            door_text, key_text = command[7:].split(" with ", 1)
+            return bool(door_text.strip() and key_text.strip())
+        return False
+    @staticmethod
+    def _sanitize_command_response(text: str) -> str:
+        cleaned = text.strip().strip("`").strip().strip("\"'")
+        if not cleaned:
+            return ""
+        first_line = cleaned.splitlines()[0].strip()
+        if ":" in first_line:
+            prefix, suffix = first_line.split(":", 1)
+            if prefix.lower() in {"command", "response"}:
+                first_line = suffix.strip()
+        return re.sub(r"\s+", " ", first_line).strip().lower()
+    @staticmethod
+    def _sanitize_multiline_response(text: str) -> str:
+        cleaned = GeminiInterfaceAdapter._sanitize_json_response(text)
+        if not cleaned:
+            return ""
+        lines: list[str] = []
+        blank_run = 0
+        for raw_line in cleaned.splitlines():
+            line = raw_line.strip()
+            if not line:
+                blank_run += 1
+                if blank_run <= 1:
+                    lines.append("")
+                continue
+            blank_run = 0
+            if ":" in line:
+                prefix, suffix = line.split(":", 1)
+                if prefix.lower() == "observation":
+                    line = suffix.strip()
+            lines.append(line)
+        return "\n".join(lines).strip().strip("\"'")
+    @staticmethod
+    def _sanitize_json_response(text: str) -> str:
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            cleaned = re.sub(r"^```(?:json|text)?\s*", "", cleaned)
+            cleaned = re.sub(r"\s*```$", "", cleaned)
+        return cleaned.strip()
+    @staticmethod
+    def _should_preserve_feedback(feedback: str, state: GameState | None) -> bool:
+        if '"' in feedback or "'" in feedback:
+            return True
+        if state is not None and (state.last_command or "").startswith("read"):
+            return True
+        return False
+    @staticmethod
+    def _resolve_admissible_command(candidate: str, admissible: set[str]) -> str | None:
+        if candidate in admissible:
+            return candidate
+        if " " not in candidate:
+            return None
+        verb, remainder = candidate.split(" ", 1)
+        candidate_tokens = [token for token in re.split(r"\s+", remainder) if token and token not in {"from", "with", "on", "to"}]
+        matches: list[tuple[int, str]] = []
+        for option in admissible:
+            if not option.startswith(verb + " "):
+                continue
+            option_tokens = [token for token in re.split(r"\s+", option[len(verb) + 1 :]) if token and token not in {"from", "with", "on", "to"}]
+            if candidate_tokens and all(token in option_tokens for token in candidate_tokens):
+                matches.append((len(option_tokens), option))
+        if not matches:
+            return None
+        matches.sort(key=lambda item: (item[0], item[1]))
+        return matches[0][1]
+def sanitize_feedback_text(feedback: str) -> str:
+    lines = feedback.replace("\r\n", "\n").splitlines()
+    cleaned_lines: list[str] = []
+    for raw_line in lines:
+        line = raw_line.rstrip()
+        stripped = line.strip()
+        if not stripped:
+            cleaned_lines.append("")
+            continue
+        if _TEXTWORLD_PROMPT_LINE_RE.match(line):
+            continue
+        if stripped.startswith(">"):
+            continue
+        if _TEXTWORLD_META_LINE_RE.match(stripped):
+            continue
+        room_match = _TEXTWORLD_ROOM_HEADER_RE.match(stripped)
+        if room_match:
+            cleaned_lines.append(f"Location: {room_match.group('label').strip()}")
+            continue
+        if _is_probable_banner_line(stripped):
+            continue
+        cleaned_lines.append(stripped)
+    start_index = 0
+    for index, line in enumerate(cleaned_lines):
+        stripped = line.strip()
+        if not stripped:
+            continue
+        if stripped.startswith("Explore ") or stripped.startswith("Location: ") or not _is_probable_banner_line(stripped):
+            start_index = index
+            break
+    useful_lines = cleaned_lines[start_index:]
+    collapsed: list[str] = []
+    blank_run = 0
+    for line in useful_lines:
+        stripped = line.strip()
+        if not stripped:
+            blank_run += 1
+            if blank_run <= 1:
+                collapsed.append("")
+            continue
+        blank_run = 0
+        collapsed.append(stripped)
+    return "\n".join(collapsed).strip()
+def enrich_feedback_text(feedback: str, session: EpisodeSession) -> str:
+    supplement_lines = _observation_context_lines(session)
+    if not supplement_lines:
+        return feedback.strip()
+    merged: list[str] = []
+    base = feedback.strip()
+    if base:
+        merged.append(base)
+    for line in supplement_lines:
+        if line not in base:
+            merged.append(line)
+    return "\n\n".join(merged).strip()
+def _observation_context_lines(session: EpisodeSession) -> list[str]:
+    visible = _visible_entities(session)
+    exits = sorted(command[3:] for command in session.available_commands() if command.startswith("go "))
+    lines: list[str] = []
+    if visible:
+        lines.append("Visible here: " + ", ".join(visible))
+    if exits:
+        lines.append("Exits: " + ", ".join(exits))
+    return lines
+def _visible_entities(session: EpisodeSession) -> list[str]:
+    visible: list[str] = []
+    seen: set[str] = set()
+    for node in session.compiled.world.nodes:
+        if getattr(node, "parent_id", None) != session.current_room_id:
+            continue
+        if node.type == "readable" and node.id not in session.revealed_readables:
+            continue
+        name = session.compiled.node_command_names.get(node.id)
+        if name and name not in seen:
+            seen.add(name)
+            visible.append(name)
+    for edge in session.compiled.world.edges:
+        if edge.from_node_id != session.current_room_id or not edge.door_node_id:
+            continue
+        name = session.compiled.node_command_names.get(edge.door_node_id)
+        if name and name not in seen:
+            seen.add(name)
+            visible.append(name)
+    for item in session.compiled.world.items:
+        if session.item_locations.get(item.id) != session.current_room_id:
+            continue
+        name = session.compiled.item_command_names.get(item.id)
+        if name and name not in seen:
+            seen.add(name)
+            visible.append(name)
+    return visible
+def _is_probable_banner_line(line: str) -> bool:
+    if len(line) < 12:
+        return False
+    if line.startswith("Explore ") or line.startswith("Location: "):
+        return False
+    banner_chars = len(_TEXTWORLD_BANNER_CHAR_RE.findall(line))
+    return banner_chars >= max(4, len(line) // 6)

agents/master/logic.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from __future__ import annotations
+import json
+import shutil
+import textwrap
+from pathlib import Path
+import textworld
+from textworld.core import EnvInfos
+from textworld.generator.data import LOGIC_DATA_PATH, TEXT_GRAMMARS_PATH
+from .base import (
+    CUSTOM_GRAMMAR_DIR,
+    CUSTOM_LOGIC_DIR,
+    normalize_answer_text,
+    suppress_unsupported_game_warning,
+)
+from .schema import WorldDefinition
+def build_logic_dir(artifacts_dir: Path, world: WorldDefinition) -> Path:
+    logic_dir = artifacts_dir / "kb_logic"
+    logic_dir.mkdir(parents=True, exist_ok=True)
+    overrides = {path.name for path in CUSTOM_LOGIC_DIR.glob("*.twl")}
+    for builtin in Path(LOGIC_DATA_PATH).glob("*.twl"):
+        if builtin.name not in overrides:
+            shutil.copy(builtin, logic_dir / builtin.name)
+    for custom in CUSTOM_LOGIC_DIR.glob("*.twl"):
+        shutil.copy(custom, logic_dir / custom.name)
+    (logic_dir / "world_submit_overlay.twl").write_text(submission_overlay(world), encoding="utf-8")
+    return logic_dir
+def build_grammar_dir(artifacts_dir: Path) -> Path:
+    grammar_dir = artifacts_dir / "kb_grammar"
+    grammar_dir.mkdir(parents=True, exist_ok=True)
+    overrides = {path.name for path in CUSTOM_GRAMMAR_DIR.glob("*.twg")}
+    for builtin in Path(TEXT_GRAMMARS_PATH).glob("*.twg"):
+        if builtin.name not in overrides:
+            shutil.copy(builtin, grammar_dir / builtin.name)
+    for custom in CUSTOM_GRAMMAR_DIR.glob("*.twg"):
+        shutil.copy(custom, grammar_dir / custom.name)
+    return grammar_dir
+def submit_command_text(world: WorldDefinition) -> str:
+    return "submit " + normalize_answer_text(world.meta.win_condition.answer_string)
+def submission_overlay(world: WorldDefinition) -> str:
+    answer = submit_command_text(world).replace('"', '\\"')
+    return textwrap.dedent(
+        f'''
+        type submission {{
+            rules {{
+                submit/final :: $at(P, r) & $at(npc, r) & $guardian(npc) & $consulted(npc) & $correct(answer, npc) -> solved(answer);
+            }}
+            reverse_rules {{
+                submit/final :: submit/final;
+            }}
+            inform7 {{
+                commands {{
+                    submit/final :: "{answer}" :: "taking inventory";
+                }}
+                code :: """
+                    Understand "{answer}" as taking inventory.
+                    After taking inventory:
+                        if the player's command matches the text "{answer}":
+                            repeat with candidate running through answer-likes:
+                                now candidate is solved;
+                """;
+            }}
+        }}
+        '''
+    ).strip() + "\n"
+def write_artifacts(artifacts_dir: Path, world: WorldDefinition, walkthrough_commands: list[str]) -> None:
+    (artifacts_dir / "world_definition.normalized.json").write_text(world.model_dump_json(indent=2), encoding="utf-8")
+    (artifacts_dir / "walkthrough.json").write_text(json.dumps(walkthrough_commands, indent=2), encoding="utf-8")
+def solver_policy(game_file: str) -> list[str]:
+    with suppress_unsupported_game_warning():
+        env = textworld.start(game_file, request_infos=EnvInfos(policy_commands=True, extras=["walkthrough"]))
+        try:
+            state = env.reset()
+        finally:
+            close = getattr(env, "close", None)
+            if callable(close):
+                close()
+    return list(state.policy_commands or state.get("extra.walkthrough") or [])

agents/master/main.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from .base import DMCompileError, DMInterfaceError
+from .env import DMEnvironment
+from .interface import DEFAULT_GEMINI_MODEL, GeminiInterfaceAdapter, SimpleInterfaceAdapter
+from .play import ManualRunner, RandomAdmissibleRunner, WalkthroughRunner
+from .sample import load_world, sample_world_definition
+from .server import run_server
+from .snapshots import DEFAULT_LIVE_DIR, LiveSnapshotWriter
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Dungeon DM environment harness")
+    parser.add_argument("mode", choices=["validate", "play", "sample", "serve"], help="What to do.")
+    parser.add_argument("world", nargs="?", help="Path to a world-definition JSON file.")
+    parser.add_argument("--runner", choices=["walkthrough", "random", "manual"], default="walkthrough")
+    parser.add_argument("--interface", choices=["simple", "gemini"], default="simple")
+    parser.add_argument("--model", default=DEFAULT_GEMINI_MODEL)
+    parser.add_argument("--narrate", action="store_true", help="Narrate observations through Gemini.")
+    parser.add_argument("--live", action="store_true", help="Write live viewer snapshots while playing.")
+    parser.add_argument("--live-dir", type=Path, default=DEFAULT_LIVE_DIR)
+    parser.add_argument("--port", type=int, default=8000)
+    args = parser.parse_args(argv)
+    if args.mode == "serve":
+        run_server(port=args.port, live_dir=args.live_dir)
+        return 0
+    if args.mode == "sample":
+        print(json.dumps(sample_world_definition(), indent=2))
+        return 0
+    if not args.world:
+        parser.error("A world-definition JSON file is required for validate/play.")
+    try:
+        adapter = SimpleInterfaceAdapter()
+        if args.interface == "gemini":
+            adapter = GeminiInterfaceAdapter(model=args.model, narrate_observations=args.narrate)
+        env = DMEnvironment(interface_adapter=adapter)
+        world = load_world(args.world)
+        if args.mode == "validate":
+            compiled = env.compile_world(world)
+            print(f"Compiled successfully: {compiled.game_file}")
+            print(f"Solver policy: {compiled.solver_policy}")
+            return 0
+        runner = {"manual": ManualRunner(), "random": RandomAdmissibleRunner(), "walkthrough": WalkthroughRunner()}[
+            args.runner
+        ]
+        observer = LiveSnapshotWriter(live_dir=args.live_dir, runner_name=args.runner) if args.live else None
+        result = env.play(world, runner=runner, observer=observer)
+        if result.observation.compile_error is not None:
+            print(result.observation.compile_error, file=sys.stderr)
+            return 1
+        print(
+            json.dumps(
+                {
+                    "reward": result.reward,
+                    "done": result.done,
+                    "observation": result.observation.model_dump(),
+                },
+                indent=2,
+            )
+        )
+        return 0
+    except (DMCompileError, DMInterfaceError, ValueError) as exc:
+        print(str(exc), file=sys.stderr)
+        return 1

agents/master/play.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from __future__ import annotations
+import random
+from typing import Iterable, Protocol, TYPE_CHECKING
+from .base import DMInterfaceError
+if TYPE_CHECKING:
+    from .session import EpisodeSession
+class EpisodeRunner(Protocol):
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        ...
+class WalkthroughRunner:
+    def __init__(self, commands: Iterable[str] | None = None) -> None:
+        self._commands = list(commands) if commands is not None else None
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        commands = list(self._commands or session.compiled.solver_policy)
+        for command in commands:
+            if session.done or session.steps_taken >= max_steps:
+                return
+            session.step(command)
+class CommandSequenceRunner:
+    def __init__(self, commands: Iterable[str]) -> None:
+        self._commands = list(commands)
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        for command in self._commands:
+            if session.done or session.steps_taken >= max_steps:
+                return
+            session.step(command)
+class RandomAdmissibleRunner:
+    def __init__(self, seed: int | None = None) -> None:
+        self._rng = random.Random(seed)
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        while not session.done and session.steps_taken < max_steps:
+            options = session.available_commands()
+            if not options:
+                return
+            session.step(self._rng.choice(options))
+class ManualRunner:
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        print(session.current_feedback())
+        while not session.done and session.steps_taken < max_steps:
+            print()
+            print(f"Step {session.steps_taken + 1}/{max_steps}")
+            command = input("> ").strip()
+            if command in {"quit", "exit"}:
+                return
+            try:
+                turn = session.step(command)
+            except DMInterfaceError:
+                print("I'm not sure what you mean. Try rephrasing that command.")
+                if session.available_commands():
+                    print("Admissible:", ", ".join(session.available_commands()))
+                continue
+            print(turn.observation)
+            if session.available_commands():
+                print("Admissible:", ", ".join(session.available_commands()))

agents/master/policy.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from __future__ import annotations
+from typing import Protocol
+from pydantic import Field
+from agents.shared.llm_client import StructuredModelClient
+from agents.shared.model_schema import StrictModel
+from .schema import WorldDefinition
+class DungeonMasterPolicyError(RuntimeError):
+    pass
+class DungeonMasterPolicy(Protocol):
+    def generate_world(
+        self,
+        *,
+        target_ratio: float,
+        repair_context: "DMRepairContext | None" = None,
+    ) -> WorldDefinition:
+        ...
+class DMRepairContext(StrictModel):
+    attempt_number: int
+    error_message: str
+    previous_candidate_json: str | None = None
+class WinConditionCandidate(StrictModel):
+    type: str
+    target_npc_id: str
+    answer_string: str
+class WorldMetaCandidate(StrictModel):
+    title: str
+    difficulty_target: float
+    start_node_id: str
+    win_condition: WinConditionCandidate
+class WorldNodeCandidate(StrictModel):
+    id: str
+    type: str
+    label: str
+    description: str
+    parent_id: str | None = None
+    open: bool | None = None
+    locked: bool | None = None
+    lock_key_id: str | None = None
+    clue_id: str | None = None
+    requires_item_id: str | None = None
+    consumes_item: bool | None = None
+    text_content: str | None = None
+    reveals_item_id: str | None = None
+    reveals_readable_id: str | None = None
+    gives_item_id: str | None = None
+    gives_clue_id: str | None = None
+class EdgeCandidate(StrictModel):
+    id: str
+    from_node_id: str
+    to_node_id: str
+    direction: str
+    type: str
+    required_item_id: str | None = None
+    door_node_id: str | None = None
+class ItemCandidate(StrictModel):
+    id: str
+    label: str
+    description: str
+    subtype: str
+    start_node_id: str | None = None
+class ClueCandidate(StrictModel):
+    id: str
+    text: str
+class RecipeCandidate(StrictModel):
+    id: str
+    input_item_ids: list[str]
+    output_item_id: str
+class QuestStepCandidate(StrictModel):
+    step_id: str
+    description: str
+    requires_step_ids: list[str] = Field(default_factory=list)
+    action: str
+class WorldDefinitionCandidate(StrictModel):
+    meta: WorldMetaCandidate
+    nodes: list[WorldNodeCandidate]
+    edges: list[EdgeCandidate]
+    items: list[ItemCandidate]
+    clues: list[ClueCandidate]
+    recipes: list[RecipeCandidate] = Field(default_factory=list)
+    quest_chain: list[QuestStepCandidate]
+class DungeonMasterLLMPolicy:
+    def __init__(
+        self,
+        client: StructuredModelClient,
+        *,
+        model_name: str,
+        temperature: float = 0.0,
+        max_output_tokens: int = 8192,
+    ) -> None:
+        self.client = client
+        self.model_name = model_name
+        self.temperature = temperature
+        self.max_output_tokens = max_output_tokens
+    def generate_world(
+        self,
+        *,
+        target_ratio: float,
+        repair_context: DMRepairContext | None = None,
+    ) -> WorldDefinition:
+        from .prompt import build_dm_world_messages
+        try:
+            candidate = self.client.generate_structured(
+                build_dm_world_messages(target_ratio=target_ratio, repair_context=repair_context),
+                WorldDefinitionCandidate,
+                model_name=self.model_name,
+                temperature=self.temperature,
+                max_output_tokens=self.max_output_tokens,
+            )
+            return WorldDefinition.model_validate(candidate.model_dump(mode="json", exclude_none=True))
+        except Exception as exc:
+            raise DungeonMasterPolicyError(self._normalize_error(exc)) from exc
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        return " ".join(str(exc).split()) or exc.__class__.__name__

agents/master/prompt.py ADDED Viewed

	@@ -0,0 +1,371 @@

+from __future__ import annotations
+import json
+from typing import TYPE_CHECKING
+from agents.shared.model_schema import ModelMessage
+from .sample import sample_world_definition
+if TYPE_CHECKING:
+    from .policy import DMRepairContext
+DM_WORLD_SYSTEM_PROMPT = """You are the dungeon master policy for a structured text adventure generator.
+Return exactly one valid WorldDefinition JSON object as minified JSON on a single line.
+Do not use markdown fences, indentation, comments, or extra prose.
+World requirements:
+- Build a fair, solvable mystery dungeon with 4 to 6 rooms.
+- Use only the supported schema fields and node types.
+- Use ids in snake_case.
+- Set meta.difficulty_target equal to the requested target ratio.
+- The win condition must be deduce with a short lowercase answer string.
+- The final answer must never be leaked directly in clue text.
+- The world must be mechanically consistent: all references must point to real ids and every puzzle chain must be completable.
+- Do not add unsupported fields to node variants. In particular, location, junction, and door nodes must not include `parent_id`.
+- Every readable must include `text_content`.
+- Keep the world compact enough to fit in one response: short labels, short descriptions, and a concise quest chain.
+Supported mechanics:
+- Containers and doors can be opened.
+- Locked doors require a real key item.
+- Readables may require an item before they become legible.
+- Fixtures may reveal an item or a readable after a correct use action.
+- NPCs may trade one required item for one item or one clue.
+- Recipes combine exactly two items into one output item.
+- Navigation uses only passage and locked_passage edges.
+Quest-chain rules:
+- Every quest action must be one of:
+  open(node_id)
+  take(item_id,source_node_id)
+  unlock(door_id,key_id)
+  go(room_id)
+  read(readable_id)
+  use(item_id,target_node_id)
+  combine(item_a_id,item_b_id)
+  give(item_id,npc_id)
+  talk(npc_id)
+  submit("answer")
+- Do not invent unsupported actions such as inspect(), search(), solve(), explore(), or win().
+- The quest chain must be topologically valid and correspond to a real solvable playthrough.
+- Every quest step object must use exactly these keys: step_id, description, requires_step_ids, action.
+- Use requires_step_ids (plural) even for one dependency. Never use requires_step_id.
+- Include exactly 3 clues that narrow the answer without stating it directly.
+- Include a guardian NPC for the final submission.
+- Every clue id in clues[] must have exactly one real source: either one readable.clue_id or one non-guardian npc.gives_clue_id.
+- Do not include unused clue ids and do not leave readables without clue_id.
+- Clue text and readable text must never contain the exact answer_string.
+- Every room-to-room connection must include the reverse edge explicitly.
+- Every locked_passage pair must reference a real door node id that already exists in nodes[].
+- Any item used by required_item_id or lock_key_id must have subtype key.
+- Keep descriptions and clue texts short, concrete, and under 14 words when possible.
+- Prefer 4 rooms, 9 to 11 nodes, 4 to 5 items, 3 clues, 0 recipes, and 6 to 9 quest steps.
+- Use the shortest valid quest chain that still supports the target difficulty.
+- meta must include title, difficulty_target, start_node_id, and win_condition.
+- item objects use subtype, never type.
+- clue objects use id and text, never clue_id.
+- fixture objects use reveals_item_id or reveals_readable_id.
+- NPC trade objects use requires_item_id plus gives_item_id or gives_clue_id.
+Reliability matters more than novelty. Stay close to the reference world's mechanical bundle unless repair feedback requires a different fix.
+"""
+_DM_WORLD_USER_PROMPTS = (
+    (
+        "Generate one full WorldDefinition JSON object as minified one-line JSON.\n"
+        "Requested target ratio: {target_ratio}\n\n"
+        "Hard output requirements:\n"
+        "- Required top-level fields: meta, nodes, edges, items, clues, recipes, quest_chain.\n"
+        "- Supported node types: location, junction, container, door, readable, fixture, npc.\n"
+        "- Supported edge types: passage, locked_passage.\n"
+        "- Supported item subtypes: key, puzzle.\n"
+        "- Every locked_passage must reference a real door_node_id and a real required_item_id.\n"
+        "- Every locked door must have a matching lock_key_id.\n"
+        "- Every fixture must have requires_item_id and reveal at most one item or one readable.\n"
+        "- Location, junction, and door nodes must not include parent_id.\n"
+        "- Every readable must include text_content.\n"
+        "- Every non-guardian NPC trade must require a real item.\n"
+        "- Use 6 to 9 quest steps unless a shorter valid chain is clearly enough.\n"
+        "- meta must include title, start_node_id, and win_condition.\n"
+        "- items use subtype, not type.\n"
+        "- clues use id, not clue_id.\n"
+        "- every clue id must have exactly one readable or non-guardian npc source.\n"
+        "- fixtures use reveals_item_id or reveals_readable_id.\n"
+        "- NPC trades use requires_item_id plus gives_item_id or gives_clue_id.\n"
+        "- every locked_passage must reference a real door node id and a key item.\n"
+        "- The final answer must stay implicit until the player gathers clues and speaks to the guardian.\n\n"
+        "Compact structural snippets to mimic exactly:\n"
+        "meta={meta_example_json}\n"
+        "item={item_example_json}\n"
+        "clue={clue_example_json}\n"
+        "fixture={fixture_example_json}\n"
+        "npc={npc_example_json}\n"
+        "quest_step={quest_step_example_json}\n"
+        "edge_pair={edge_pair_example_json}\n"
+        "readable={readable_example_json}\n"
+    ),
+    (
+        "Produce a compact but fully valid WorldDefinition JSON object as minified one-line JSON.\n"
+        "Target difficulty ratio: {target_ratio}\n\n"
+        "Mechanical constraints:\n"
+        "- Output minified JSON only on one line.\n"
+        "- Keep the graph solvable and internally consistent.\n"
+        "- Keep all ids in snake_case and all references real.\n"
+        "- Preserve the supported node, edge, and item types exactly.\n"
+        "- Do not add unsupported fields to node variants.\n"
+        "- Every readable must include text_content.\n"
+        "- The world must require clue gathering before the guardian submission.\n"
+        "- Use exactly 3 clues.\n"
+        "- Every clue id must appear exactly once in a readable.clue_id or npc.gives_clue_id.\n"
+        "- Every edge pair must include both directions explicitly.\n"
+        "- Every locked_passage must reference a real door node id already present in nodes[].\n"
+        "- Any required_item_id on a locked_passage must be a key item.\n"
+        "- Quest steps must use requires_step_ids (plural).\n\n"
+        "Exact meta example:\n{meta_example_json}\n"
+        "Exact item example:\n{item_example_json}\n"
+        "Exact clue example:\n{clue_example_json}\n"
+        "Exact fixture example:\n{fixture_example_json}\n"
+        "Exact NPC example:\n{npc_example_json}\n"
+        "Exact quest step example:\n{quest_step_example_json}\n"
+        "Exact bidirectional edge example:\n{edge_pair_example_json}\n"
+        "Exact readable example:\n{readable_example_json}\n"
+    ),
+    (
+        "Return one original WorldDefinition JSON object for a mystery dungeon as minified one-line JSON.\n"
+        "Requested target ratio: {target_ratio}\n\n"
+        "Checklist:\n"
+        "- 4 to 6 rooms.\n"
+        "- 3 to 5 clues.\n"
+        "- A real guardian NPC for the final answer.\n"
+        "- A quest chain that compiles into a real walkthrough.\n"
+        "- No unsupported extra fields and no missing required fields like readable.text_content.\n"
+        "- No unsupported mechanics, no unsupported actions, no prose.\n"
+        "- Use requires_step_ids (plural), not requires_step_id.\n"
+        "- Use exactly 3 clues and explicit reverse edges.\n"
+        "- Every clue id must have exactly one source and no clue may be orphaned.\n"
+        "- Every locked_passage must use an existing door node id and a key item.\n"
+        "- Prefer 6 to 9 quest steps, not long walkthroughs.\n\n"
+        "Mini schema examples:\n"
+        "meta={meta_example_json}\n"
+        "item={item_example_json}\n"
+        "clue={clue_example_json}\n"
+        "fixture={fixture_example_json}\n"
+        "npc={npc_example_json}\n"
+        "quest_step={quest_step_example_json}\n"
+        "edge_pair={edge_pair_example_json}\n"
+        "readable={readable_example_json}\n"
+    ),
+)
+_DM_META_EXAMPLE = {
+    "title": "The Ember Vault",
+    "difficulty_target": 1.75,
+    "start_node_id": "foyer",
+    "win_condition": {
+        "type": "deduce",
+        "target_npc_id": "stone_guardian",
+        "answer_string": "vesna",
+    },
+}
+_DM_ITEM_EXAMPLE = {
+    "id": "brass_key",
+    "subtype": "key",
+    "start_node_id": "entry_chest",
+    "label": "Brass Key",
+    "description": "short key description",
+}
+_DM_CLUE_EXAMPLE = {
+    "id": "initial_clue",
+    "text": "short clue text",
+}
+_DM_FIXTURE_EXAMPLE = {
+    "id": "stone_well",
+    "type": "fixture",
+    "parent_id": "courtyard",
+    "requires_item_id": "full_map",
+    "consumes_item": False,
+    "reveals_item_id": None,
+    "reveals_readable_id": "water_plaque",
+    "label": "Stone Well",
+    "description": "short fixture description",
+}
+_DM_NPC_EXAMPLE = {
+    "id": "cartographer",
+    "type": "npc",
+    "parent_id": "gallery",
+    "requires_item_id": "full_map",
+    "gives_item_id": "lens",
+    "gives_clue_id": None,
+    "label": "Cartographer",
+    "description": "short npc description",
+}
+_DM_QUEST_STEP_EXAMPLE = {
+    "step_id": "open_entry_chest",
+    "description": "open the chest",
+    "requires_step_ids": [],
+    "action": "open(entry_chest)",
+}
+_DM_EDGE_PAIR_EXAMPLE = [
+    {
+        "id": "foyer_east",
+        "from_node_id": "foyer",
+        "to_node_id": "workshop",
+        "direction": "east",
+        "type": "locked_passage",
+        "required_item_id": "brass_key",
+        "door_node_id": "iron_door",
+    },
+    {
+        "id": "workshop_west",
+        "from_node_id": "workshop",
+        "to_node_id": "foyer",
+        "direction": "west",
+        "type": "locked_passage",
+        "required_item_id": "brass_key",
+        "door_node_id": "iron_door",
+    },
+]
+_DM_READABLE_EXAMPLE = {
+    "id": "ash_mural",
+    "type": "readable",
+    "parent_id": "workshop",
+    "clue_id": "initial_clue",
+    "requires_item_id": "torch",
+    "consumes_item": False,
+    "label": "Ash Mural",
+    "description": "short readable description",
+    "text_content": "short readable text",
+}
+def _compress_reference_world_for_prompt(reference_world: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "meta": reference_world.get("meta", {}),
+        "nodes": [
+            _compress_world_node(node)
+            for node in reference_world.get("nodes", [])
+            if isinstance(node, dict)
+        ],
+        "edges": [
+            {
+                key: edge[key]
+                for key in ("id", "from_node_id", "to_node_id", "direction", "type", "required_item_id", "door_node_id")
+                if key in edge
+            }
+            for edge in reference_world.get("edges", [])
+            if isinstance(edge, dict)
+        ],
+        "items": [
+            {
+                **{
+                    key: item[key]
+                    for key in ("id", "subtype", "start_node_id")
+                    if key in item
+                },
+                "label": str(item.get("label") or item.get("id") or "item"),
+                "description": "short item description",
+            }
+            for item in reference_world.get("items", [])
+            if isinstance(item, dict)
+        ],
+        "clues": [
+            {"id": clue["id"], "text": "short clue text"}
+            for clue in reference_world.get("clues", [])
+            if isinstance(clue, dict) and "id" in clue
+        ],
+        "recipes": [
+            {
+                key: recipe[key]
+                for key in ("id", "input_item_ids", "output_item_id")
+                if key in recipe
+            }
+            for recipe in reference_world.get("recipes", [])
+            if isinstance(recipe, dict)
+        ],
+        "quest_chain": [
+            {
+                **{
+                    key: step[key]
+                    for key in ("step_id", "requires_step_ids", "action")
+                    if key in step
+                },
+                "description": "short quest step",
+            }
+            for step in reference_world.get("quest_chain", [])
+            if isinstance(step, dict)
+        ],
+    }
+def _compress_world_node(node: dict[str, Any]) -> dict[str, Any]:
+    compressed = {
+        key: node[key]
+        for key in (
+            "id",
+            "type",
+            "parent_id",
+            "open",
+            "locked",
+            "lock_key_id",
+            "clue_id",
+            "requires_item_id",
+            "consumes_item",
+            "reveals_item_id",
+            "reveals_readable_id",
+            "gives_item_id",
+            "gives_clue_id",
+        )
+        if key in node
+    }
+    compressed["label"] = str(node.get("label") or node.get("id") or node.get("type") or "node")
+    compressed["description"] = f"short {str(node.get('type') or 'node')} description"
+    if node.get("type") == "readable":
+        compressed["text_content"] = "short readable text"
+    return compressed
+def build_dm_world_messages(
+    *,
+    target_ratio: float,
+    repair_context: "DMRepairContext | None" = None,
+    reference_world: dict[str, Any] | None = None,
+    prompt_style: int = 0,
+) -> list[ModelMessage]:
+    exemplar_world = reference_world or sample_world_definition()
+    structural_exemplar = _compress_reference_world_for_prompt(exemplar_world)
+    template = _DM_WORLD_USER_PROMPTS[prompt_style % len(_DM_WORLD_USER_PROMPTS)]
+    prompt = template.format(
+        target_ratio=target_ratio,
+        reference_world_json=json.dumps(structural_exemplar, separators=(",", ":")),
+        meta_example_json=json.dumps(_DM_META_EXAMPLE, separators=(",", ":")),
+        item_example_json=json.dumps(_DM_ITEM_EXAMPLE, separators=(",", ":")),
+        clue_example_json=json.dumps(_DM_CLUE_EXAMPLE, separators=(",", ":")),
+        fixture_example_json=json.dumps(_DM_FIXTURE_EXAMPLE, separators=(",", ":")),
+        npc_example_json=json.dumps(_DM_NPC_EXAMPLE, separators=(",", ":")),
+        quest_step_example_json=json.dumps(_DM_QUEST_STEP_EXAMPLE, separators=(",", ":")),
+        edge_pair_example_json=json.dumps(_DM_EDGE_PAIR_EXAMPLE, separators=(",", ":")),
+        readable_example_json=json.dumps(_DM_READABLE_EXAMPLE, separators=(",", ":")),
+    )
+    if repair_context is not None:
+        prompt += (
+            "\nThe previous WorldDefinition failed schema validation or compilation.\n"
+            f"Repair attempt: {repair_context.attempt_number}\n"
+            f"Normalized error: {repair_context.error_message}\n"
+            "Return a fully corrected WorldDefinition only.\n"
+        )
+        if repair_context.previous_candidate_json:
+            prompt += f"Previous invalid WorldDefinition JSON:\n{repair_context.previous_candidate_json}\n"
+    return [
+        ModelMessage(role="system", content=DM_WORLD_SYSTEM_PROMPT),
+        ModelMessage(role="user", content=prompt),
+    ]

agents/master/quest.py ADDED Viewed

	@@ -0,0 +1,418 @@

+from __future__ import annotations
+import re
+from collections import defaultdict, deque
+from .base import (
+    COMBINE_RE,
+    DMCompileError,
+    GIVE_RE,
+    GO_RE,
+    INVENTORY_ID,
+    OPEN_RE,
+    READ_RE,
+    STORED_ID,
+    SUBMIT_RE,
+    TALK_RE,
+    TAKE_RE,
+    UNLOCK_RE,
+    USE_RE,
+    normalize_answer_text,
+)
+from .graph import door_room_mapping, hidden_readable_ids, recipe_mapping, use_effect_mapping
+from .schema import (
+    CombineAction,
+    ContainerNode,
+    FixtureNode,
+    GiveAction,
+    GoAction,
+    Item,
+    NpcNode,
+    OpenAction,
+    QuestAction,
+    QuestStep,
+    ReadAction,
+    ReadableNode,
+    SimulationState,
+    SubmitAction,
+    TalkAction,
+    TakeAction,
+    UnlockAction,
+    UseAction,
+    WorldDefinition,
+)
+def topological_linearize(steps: list[QuestStep]) -> list[QuestStep]:
+    by_id = {step.step_id: step for step in steps}
+    for step in steps:
+        for dependency in step.requires_step_ids:
+            if dependency not in by_id:
+                raise DMCompileError(f"Quest step '{step.step_id}' depends on unknown step '{dependency}'.")
+    visiting: set[str] = set()
+    visited: set[str] = set()
+    def visit(step_id: str) -> None:
+        if step_id in visited:
+            return
+        if step_id in visiting:
+            raise DMCompileError("quest_chain contains a cycle.")
+        visiting.add(step_id)
+        for dependency in by_id[step_id].requires_step_ids:
+            visit(dependency)
+        visiting.remove(step_id)
+        visited.add(step_id)
+    for step in steps:
+        visit(step.step_id)
+    seen: set[str] = set()
+    for step in steps:
+        missing = [dependency for dependency in step.requires_step_ids if dependency not in seen]
+        if missing:
+            raise DMCompileError(
+                f"Quest step '{step.step_id}' appears before its required steps: {', '.join(sorted(missing))}."
+            )
+        seen.add(step.step_id)
+    return steps
+def parse_quest_action(text: str) -> QuestAction:
+    compact = re.sub(r"\s+", "", text)
+    if match := GO_RE.fullmatch(compact):
+        return GoAction(target_node_id=match.group("target"))
+    if match := OPEN_RE.fullmatch(compact):
+        return OpenAction(target_node_id=match.group("target"))
+    if match := UNLOCK_RE.fullmatch(compact):
+        return UnlockAction(door_id=match.group("door"), key_id=match.group("key"))
+    if match := TAKE_RE.fullmatch(compact):
+        return TakeAction(item_id=match.group("item"), source_node_id=match.group("source"))
+    if match := READ_RE.fullmatch(compact):
+        return ReadAction(target_node_id=match.group("target"))
+    if match := USE_RE.fullmatch(compact):
+        return UseAction(item_id=match.group("item"), target_node_id=match.group("target"))
+    if match := COMBINE_RE.fullmatch(compact):
+        return CombineAction(item_a_id=match.group("item_a"), item_b_id=match.group("item_b"))
+    if match := GIVE_RE.fullmatch(compact):
+        return GiveAction(item_id=match.group("item"), npc_id=match.group("npc"))
+    if match := TALK_RE.fullmatch(compact):
+        return TalkAction(target_node_id=match.group("target"))
+    if match := SUBMIT_RE.fullmatch(text.strip()):
+        return SubmitAction(answer_text=match.group("answer"))
+    raise DMCompileError(f"Unsupported quest action DSL '{text}'.")
+def simulate_walkthrough(
+    world: WorldDefinition,
+    actions: list[QuestAction],
+    entity_names: dict[str, str],
+) -> list[str]:
+    node_by_id = {node.id: node for node in world.nodes}
+    item_by_id = {item.id: item for item in world.items}
+    edge_by_target = {(edge.from_node_id, edge.to_node_id): edge for edge in world.edges}
+    door_rooms = door_room_mapping(world)
+    hidden_readables = hidden_readable_ids(world)
+    use_effects = use_effect_mapping(world)
+    recipes = recipe_mapping(world)
+    clue_ids = {clue.id for clue in world.clues}
+    state = SimulationState(
+        current_room_id=world.meta.start_node_id,
+        item_locations={item.id: item.start_node_id or STORED_ID for item in world.items},
+        visited_nodes={world.meta.start_node_id},
+        revealed_readables={node.id for node in world.nodes if node.type == "readable" and node.id not in hidden_readables},
+    )
+    for node in world.nodes:
+        if node.type in {"container", "door"}:
+            if node.open:
+                state.open_nodes.add(node.id)
+            if node.locked:
+                state.locked_nodes.add(node.id)
+    commands: list[str] = []
+    for action in actions:
+        if isinstance(action, GoAction):
+            _apply_go(action, edge_by_target, state, commands)
+        elif isinstance(action, OpenAction):
+            _apply_open(action, node_by_id, door_rooms, state, entity_names, commands)
+        elif isinstance(action, UnlockAction):
+            _apply_unlock(action, node_by_id, item_by_id, door_rooms, state, entity_names, commands)
+        elif isinstance(action, TakeAction):
+            _apply_take(action, node_by_id, item_by_id, state, entity_names, commands)
+        elif isinstance(action, ReadAction):
+            _apply_read(action, node_by_id, state, entity_names, commands)
+        elif isinstance(action, UseAction):
+            _apply_use(action, node_by_id, state, entity_names, commands, use_effects)
+        elif isinstance(action, CombineAction):
+            _apply_combine(action, state, entity_names, commands, recipes)
+        elif isinstance(action, GiveAction):
+            _apply_give(action, node_by_id, state, entity_names, commands)
+        elif isinstance(action, TalkAction):
+            _apply_talk(action, node_by_id, state, entity_names, commands)
+        elif isinstance(action, SubmitAction):
+            _apply_submit(action, world, node_by_id, state, commands, clue_ids)
+        else:  # pragma: no cover
+            raise AssertionError(f"Unhandled quest action {action!r}")
+    return commands
+def _apply_go(
+    action: GoAction,
+    edge_by_target: dict[tuple[str, str], object],
+    state: SimulationState,
+    commands: list[str],
+) -> None:
+    edge = edge_by_target.get((state.current_room_id, action.target_node_id))
+    if edge is None:
+        raise DMCompileError(
+            f"Quest moves from '{state.current_room_id}' to non-adjacent room '{action.target_node_id}'."
+        )
+    if edge.door_node_id and edge.door_node_id not in state.open_nodes:
+        raise DMCompileError(f"Quest moves through closed door '{edge.door_node_id}'.")
+    if edge.type == "locked_passage" and edge.door_node_id in state.locked_nodes:
+        raise DMCompileError(f"Quest moves through locked door '{edge.door_node_id}'.")
+    state.current_room_id = edge.to_node_id
+    state.visited_nodes.add(edge.to_node_id)
+    commands.append(f"go {edge.direction}")
+def _apply_open(
+    action: OpenAction,
+    node_by_id: dict[str, object],
+    door_rooms: dict[str, frozenset[str]],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    node = node_by_id.get(action.target_node_id)
+    if node is None or node.type not in {"container", "door"}:
+        raise DMCompileError(f"open(...) targets unknown lockable '{action.target_node_id}'.")
+    if node.id in state.locked_nodes:
+        raise DMCompileError(f"Quest opens locked '{node.id}' before unlocking it.")
+    if node.type == "door":
+        if state.current_room_id not in door_rooms.get(node.id, frozenset()):
+            raise DMCompileError(f"Door '{node.id}' is not reachable from room '{state.current_room_id}'.")
+    else:
+        _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    state.open_nodes.add(node.id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"open {entity_names[node.id]}")
+def _apply_unlock(
+    action: UnlockAction,
+    node_by_id: dict[str, object],
+    item_by_id: dict[str, Item],
+    door_rooms: dict[str, frozenset[str]],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    if action.key_id not in item_by_id:
+        raise DMCompileError(f"Quest references unknown key '{action.key_id}'.")
+    if action.key_id not in state.inventory:
+        raise DMCompileError(f"Quest unlocks '{action.door_id}' without key '{action.key_id}'.")
+    node = node_by_id.get(action.door_id)
+    if node is None or node.type not in {"door", "container"}:
+        raise DMCompileError(f"unlock(...) targets unknown lockable '{action.door_id}'.")
+    if node.lock_key_id != action.key_id:
+        raise DMCompileError(f"'{node.id}' does not match key '{action.key_id}'.")
+    if node.type == "door":
+        if state.current_room_id not in door_rooms.get(node.id, frozenset()):
+            raise DMCompileError(f"Door '{node.id}' is not reachable from room '{state.current_room_id}'.")
+    else:
+        _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    state.locked_nodes.discard(node.id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"unlock {entity_names[node.id]} with {entity_names[action.key_id]}")
+def _apply_take(
+    action: TakeAction,
+    node_by_id: dict[str, object],
+    item_by_id: dict[str, Item],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    item = item_by_id.get(action.item_id)
+    if item is None:
+        raise DMCompileError(f"Quest references unknown item '{action.item_id}'.")
+    actual_location = state.item_locations.get(item.id)
+    if actual_location != action.source_node_id:
+        raise DMCompileError(
+            f"Quest expects item '{item.id}' in '{action.source_node_id}', but it is in '{actual_location}'."
+        )
+    if action.source_node_id == state.current_room_id:
+        command = f"take {entity_names[item.id]}"
+    else:
+        source = node_by_id.get(action.source_node_id)
+        if source is None or not isinstance(source, ContainerNode):
+            raise DMCompileError(f"Quest cannot take '{item.id}' from '{action.source_node_id}'.")
+        _require_parent_room(source.parent_id, source.id, state.current_room_id)
+        if source.id not in state.open_nodes:
+            raise DMCompileError(f"Quest takes from closed container '{source.id}'.")
+        command = f"take {entity_names[item.id]} from {entity_names[source.id]}"
+    state.inventory.add(item.id)
+    state.item_locations[item.id] = INVENTORY_ID
+    state.visited_nodes.add(item.id)
+    commands.append(command)
+def _apply_read(
+    action: ReadAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    node = _typed_node(node_by_id, action.target_node_id, ReadableNode, "read")
+    _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    if node.id not in state.revealed_readables:
+        raise DMCompileError(f"Readable '{node.id}' has not been revealed yet.")
+    if node.requires_item_id and node.id not in state.prepared_readables:
+        raise DMCompileError(f"Readable '{node.id}' still requires item '{node.requires_item_id}'.")
+    state.discovered_clues.add(node.clue_id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"read {entity_names[node.id]}")
+def _apply_use(
+    action: UseAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+    use_effects: dict[str, object],
+) -> None:
+    effect = use_effects.get(action.target_node_id)
+    if effect is None:
+        raise DMCompileError(f"use(...) targets unknown use-effect node '{action.target_node_id}'.")
+    if effect.required_item_id != action.item_id:
+        raise DMCompileError(f"'{action.target_node_id}' does not accept item '{action.item_id}'.")
+    if action.item_id not in state.inventory:
+        raise DMCompileError(f"Quest uses item '{action.item_id}' before taking it.")
+    node = node_by_id.get(action.target_node_id)
+    if node is None or node.type not in {"readable", "fixture"}:
+        raise DMCompileError(f"use(...) targets unsupported node '{action.target_node_id}'.")
+    _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    if isinstance(node, ReadableNode) and node.id not in state.revealed_readables:
+        raise DMCompileError(f"Readable '{node.id}' has not been revealed yet.")
+    if effect.consumes_item:
+        state.inventory.remove(action.item_id)
+        state.item_locations[action.item_id] = None
+    if effect.clue_id:
+        state.prepared_readables.add(node.id)
+        state.discovered_clues.add(effect.clue_id)
+    if effect.reveals_item_id:
+        state.item_locations[effect.reveals_item_id] = state.current_room_id
+    if effect.reveals_readable_id:
+        state.revealed_readables.add(effect.reveals_readable_id)
+    if isinstance(node, FixtureNode):
+        state.used_fixtures.add(node.id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"use {entity_names[action.item_id]} on {entity_names[node.id]}")
+def _apply_combine(
+    action: CombineAction,
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+    recipes: dict[frozenset[str], str],
+) -> None:
+    recipe_key = frozenset({action.item_a_id, action.item_b_id})
+    output_item_id = recipes.get(recipe_key)
+    if output_item_id is None:
+        raise DMCompileError(f"No recipe combines '{action.item_a_id}' with '{action.item_b_id}'.")
+    if action.item_a_id not in state.inventory or action.item_b_id not in state.inventory:
+        raise DMCompileError("Quest combines items before both are in inventory.")
+    state.inventory.remove(action.item_a_id)
+    state.inventory.remove(action.item_b_id)
+    state.item_locations[action.item_a_id] = None
+    state.item_locations[action.item_b_id] = None
+    state.inventory.add(output_item_id)
+    state.item_locations[output_item_id] = INVENTORY_ID
+    state.produced_items.add(output_item_id)
+    state.visited_nodes.add(output_item_id)
+    commands.append(f"combine {entity_names[action.item_a_id]} with {entity_names[action.item_b_id]}")
+def _apply_give(
+    action: GiveAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    npc = _typed_node(node_by_id, action.npc_id, NpcNode, "give")
+    _require_parent_room(npc.parent_id, npc.id, state.current_room_id)
+    if action.item_id not in state.inventory:
+        raise DMCompileError(f"Quest gives '{action.item_id}' before taking it.")
+    if npc.requires_item_id != action.item_id:
+        raise DMCompileError(f"NPC '{npc.id}' does not want '{action.item_id}'.")
+    if npc.id in state.satisfied_npcs:
+        raise DMCompileError(f"Quest trades with NPC '{npc.id}' more than once.")
+    state.inventory.remove(action.item_id)
+    state.item_locations[action.item_id] = None
+    if npc.gives_item_id:
+        state.inventory.add(npc.gives_item_id)
+        state.item_locations[npc.gives_item_id] = INVENTORY_ID
+        state.produced_items.add(npc.gives_item_id)
+    if npc.gives_clue_id:
+        state.discovered_clues.add(npc.gives_clue_id)
+    state.satisfied_npcs.add(npc.id)
+    state.visited_nodes.add(npc.id)
+    commands.append(f"give {entity_names[action.item_id]} to {entity_names[npc.id]}")
+def _apply_talk(
+    action: TalkAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    npc = _typed_node(node_by_id, action.target_node_id, NpcNode, "talk")
+    _require_parent_room(npc.parent_id, npc.id, state.current_room_id)
+    state.consulted_npcs.add(npc.id)
+    state.visited_nodes.add(npc.id)
+    commands.append(f"talk {entity_names[npc.id]}")
+def _apply_submit(
+    action: SubmitAction,
+    world: WorldDefinition,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    commands: list[str],
+    clue_ids: set[str],
+) -> None:
+    guardian_id = world.meta.win_condition.target_npc_id
+    guardian = _typed_node(node_by_id, guardian_id, NpcNode, "submit")
+    _require_parent_room(guardian.parent_id, guardian.id, state.current_room_id)
+    if guardian.id not in state.consulted_npcs:
+        raise DMCompileError("Quest submits before talking to the guardian.")
+    if state.discovered_clues != clue_ids:
+        missing = sorted(clue_ids - state.discovered_clues)
+        raise DMCompileError(f"Quest submits before all clues are discovered: {missing}")
+    if normalize_answer_text(action.answer_text) != normalize_answer_text(world.meta.win_condition.answer_string):
+        raise DMCompileError("The final submit step must match win_condition.answer_string.")
+    commands.append("submit " + normalize_answer_text(action.answer_text))
+def _typed_node(node_by_id: dict[str, object], node_id: str, expected: type, label: str):
+    node = node_by_id.get(node_id)
+    if node is None or not isinstance(node, expected):
+        raise DMCompileError(f"{label}(...) targets unknown {expected.__name__.lower()} '{node_id}'.")
+    return node
+def _require_parent_room(parent_id: str, node_id: str, current_room_id: str) -> None:
+    if parent_id != current_room_id:
+        raise DMCompileError(
+            f"Quest interacts with '{node_id}' from room '{current_room_id}', but it lives in '{parent_id}'."
+        )

agents/master/sample.py ADDED Viewed

	@@ -0,0 +1,499 @@

+from __future__ import annotations
+import json
+import random
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+@dataclass(frozen=True)
+class WorldTheme:
+    title: str
+    answer: str
+    foyer_label: str
+    foyer_description: str
+    shrine_label: str
+    shrine_description: str
+    workshop_label: str
+    workshop_description: str
+    courtyard_label: str
+    courtyard_description: str
+    gallery_label: str
+    gallery_description: str
+    entry_chest_label: str
+    entry_chest_description: str
+    iron_door_label: str
+    iron_door_description: str
+    ash_mural_label: str
+    ash_mural_description: str
+    ash_mural_text: str
+    iron_chest_label: str
+    iron_chest_description: str
+    stone_well_label: str
+    stone_well_description: str
+    water_plaque_label: str
+    water_plaque_description: str
+    water_plaque_text: str
+    cartographer_label: str
+    cartographer_description: str
+    faded_letter_label: str
+    faded_letter_description: str
+    faded_letter_text: str
+    stone_guardian_label: str
+    stone_guardian_description: str
+    brass_key_label: str
+    brass_key_description: str
+    torch_label: str
+    torch_description: str
+    torn_map_left_label: str
+    torn_map_left_description: str
+    torn_map_right_label: str
+    torn_map_right_description: str
+    full_map_label: str
+    full_map_description: str
+    lens_label: str
+    lens_description: str
+    initial_clue_text: str
+    river_clue_text: str
+    waterwarden_clue_text: str
+_WORLD_THEMES: tuple[WorldTheme, ...] = (
+    WorldTheme(
+        title="The River Ward",
+        answer="mira",
+        foyer_label="Foyer",
+        foyer_description="A drafty entry hall with passages north, south, east, and west.",
+        shrine_label="Shrine",
+        shrine_description="An open shrine watched by a silent stone guardian.",
+        workshop_label="Workshop",
+        workshop_description="An ash-streaked workshop lit by a guttering lamp.",
+        courtyard_label="Courtyard",
+        courtyard_description="Rainwater gathers around a cracked stone well.",
+        gallery_label="Gallery",
+        gallery_description="Portraits of the wardens hang above a long dust-covered table.",
+        entry_chest_label="Entry Chest",
+        entry_chest_description="A squat travel chest sits beside the door.",
+        iron_door_label="Iron Door",
+        iron_door_description="A blackened iron door seals the workshop.",
+        ash_mural_label="Ash Mural",
+        ash_mural_description="An ash-dark mural is impossible to make out with the naked eye.",
+        ash_mural_text="The mural preserves one line: the betrayer's name begins with M.",
+        iron_chest_label="Iron Chest",
+        iron_chest_description="A soot-stained iron chest is tucked under a bench.",
+        stone_well_label="Stone Well",
+        stone_well_description="Etchings circle the well's rim, but they only align from the proper vantage.",
+        water_plaque_label="Water Plaque",
+        water_plaque_description="A bronze plaque slides out from the well masonry.",
+        water_plaque_text="The betrayer lived closest to the river gate.",
+        cartographer_label="Cartographer",
+        cartographer_description="The cartographer studies the walls and waits for a completed survey.",
+        faded_letter_label="Faded Letter",
+        faded_letter_description="A faded letter is still too blurred to decipher.",
+        faded_letter_text="Of the wardens, only Mira kept quarters beside the water.",
+        stone_guardian_label="Stone Guardian",
+        stone_guardian_description="The guardian asks for the betrayer's name once you are ready.",
+        brass_key_label="Brass Key",
+        brass_key_description="A brass key with soot in its teeth.",
+        torch_label="Torch",
+        torch_description="A pitch torch with a steady flame.",
+        torn_map_left_label="Torn Map Left",
+        torn_map_left_description="The left half of a survey map.",
+        torn_map_right_label="Torn Map Right",
+        torn_map_right_description="The right half of a survey map.",
+        full_map_label="Full Map",
+        full_map_description="A restored map of the ward.",
+        lens_label="Lens",
+        lens_description="A polished lens in a brass frame.",
+        initial_clue_text="The betrayer's name begins with M.",
+        river_clue_text="The betrayer lived closest to the river gate.",
+        waterwarden_clue_text="Of the wardens, only Mira kept quarters beside the water.",
+    ),
+    WorldTheme(
+        title="The Ember Vault",
+        answer="vesna",
+        foyer_label="Receiving Hall",
+        foyer_description="A warm stone hall lined with soot and copper hooks.",
+        shrine_label="Crucible Shrine",
+        shrine_description="A brass sentinel stands before a furnace-bright altar.",
+        workshop_label="Forge Annex",
+        workshop_description="Bellows creak above benches powdered with black ash.",
+        courtyard_label="Quench Yard",
+        courtyard_description="A cracked basin gathers rain beside the old quench line.",
+        gallery_label="Ledger Hall",
+        gallery_description="Burned account books rest beneath portraits of furnace wardens.",
+        entry_chest_label="Courier Trunk",
+        entry_chest_description="A courier trunk waits under a soot-marked peg rail.",
+        iron_door_label="Furnace Door",
+        iron_door_description="A scorched iron door blocks the annex.",
+        ash_mural_label="Cinder Frieze",
+        ash_mural_description="A smoke-dark frieze only sharpens under moving flame.",
+        ash_mural_text="A surviving line says the betrayer's name begins with V.",
+        iron_chest_label="Coal Locker",
+        iron_chest_description="A riveted locker is wedged beneath a slagged bench.",
+        stone_well_label="Quench Basin",
+        stone_well_description="Marks on the basin align only when seen with the full survey.",
+        water_plaque_label="Cooling Plaque",
+        water_plaque_description="A brass plate rises from a seam in the basin stone.",
+        water_plaque_text="The betrayer worked closest to the quench trench.",
+        cartographer_label="Quartermaster",
+        cartographer_description="The quartermaster trades only for a complete furnace survey.",
+        faded_letter_label="Scorched Ledger",
+        faded_letter_description="Heat has blurred the ink into copper-colored streaks.",
+        faded_letter_text="Only Vesna kept the cooling ledgers beside the trench.",
+        stone_guardian_label="Brass Sentinel",
+        stone_guardian_description="The sentinel requests the betrayer's name when the case is ready.",
+        brass_key_label="Copper Key",
+        brass_key_description="A copper key with furnace grit packed in the cuts.",
+        torch_label="Coal Torch",
+        torch_description="A coal torch that burns with a steady orange core.",
+        torn_map_left_label="Smelter Map Left",
+        torn_map_left_description="The left half of a furnace survey.",
+        torn_map_right_label="Smelter Map Right",
+        torn_map_right_description="The right half of a furnace survey.",
+        full_map_label="Furnace Survey",
+        full_map_description="A restored survey of the ember vault.",
+        lens_label="Gauge Lens",
+        lens_description="A thick gauge lens set in a brass ring.",
+        initial_clue_text="The betrayer's name begins with V.",
+        river_clue_text="The betrayer worked closest to the quench trench.",
+        waterwarden_clue_text="Only Vesna kept the cooling ledgers beside the trench.",
+    ),
+    WorldTheme(
+        title="The Astral Archive",
+        answer="selene",
+        foyer_label="Entry Rotunda",
+        foyer_description="A quiet rotunda opens toward stacked corridors and a dim observatory stair.",
+        shrine_label="Moon Chapel",
+        shrine_description="A silver warden stands beneath a ceiling of cold stars.",
+        workshop_label="Chart Room",
+        workshop_description="Tables of brass instruments glint in powdery moon dust.",
+        courtyard_label="Star Court",
+        courtyard_description="A dry fountain mirrors the constellations in chipped stone.",
+        gallery_label="Catalog Hall",
+        gallery_description="Glass cases hold the names of long-dead archivists.",
+        entry_chest_label="Porter's Case",
+        entry_chest_description="A leather case rests under the chart hooks.",
+        iron_door_label="Star Door",
+        iron_door_description="A ribbed iron door seals the chart room.",
+        ash_mural_label="Night Chart",
+        ash_mural_description="The chart is unreadable until lit from the proper angle.",
+        ash_mural_text="One surviving note says the betrayer's name begins with S.",
+        iron_chest_label="Index Chest",
+        iron_chest_description="A narrow chest sits below a shelf of cracked lenses.",
+        stone_well_label="Dry Fountain",
+        stone_well_description="Its star marks align only when the full survey is restored.",
+        water_plaque_label="Star Plaque",
+        water_plaque_description="A silver plaque slides free from the fountain rim.",
+        water_plaque_text="The betrayer slept nearest the eastern telescope.",
+        cartographer_label="Archivist",
+        cartographer_description="The archivist will trade for a complete celestial survey.",
+        faded_letter_label="Blurred Index",
+        faded_letter_description="The index script is too faint without magnification.",
+        faded_letter_text="Among the archivists, only Selene kept quarters by the east telescope.",
+        stone_guardian_label="Silver Warden",
+        stone_guardian_description="The warden will hear the accusation once you have evidence.",
+        brass_key_label="Star Key",
+        brass_key_description="A slim key engraved with a crescent notch.",
+        torch_label="Lamp Wand",
+        torch_description="A narrow lamp wand with a clean blue flame.",
+        torn_map_left_label="Celestial Map Left",
+        torn_map_left_description="The left half of a star survey.",
+        torn_map_right_label="Celestial Map Right",
+        torn_map_right_description="The right half of a star survey.",
+        full_map_label="Celestial Survey",
+        full_map_description="A restored survey of the astral archive.",
+        lens_label="Astrolabe Lens",
+        lens_description="A polished lens mounted in silver wire.",
+        initial_clue_text="The betrayer's name begins with S.",
+        river_clue_text="The betrayer slept nearest the eastern telescope.",
+        waterwarden_clue_text="Among the archivists, only Selene kept quarters by the east telescope.",
+    ),
+    WorldTheme(
+        title="The Glass Conservatory",
+        answer="liora",
+        foyer_label="Gate House",
+        foyer_description="A humid gate house opens onto vine-choked passages.",
+        shrine_label="Bloom Shrine",
+        shrine_description="A mossy guardian waits among chipped planters.",
+        workshop_label="Potting Room",
+        workshop_description="Clay dust and root knives cover the worktables.",
+        courtyard_label="Glass Court",
+        courtyard_description="A cracked basin sits beneath panes webbed with ivy.",
+        gallery_label="Seed Gallery",
+        gallery_description="Pressed flowers hang beside records of vanished caretakers.",
+        entry_chest_label="Garden Chest",
+        entry_chest_description="A cedar chest is tucked beside the rain cloaks.",
+        iron_door_label="Greenhouse Door",
+        iron_door_description="A warped iron door blocks the potting room.",
+        ash_mural_label="Vine Panel",
+        ash_mural_description="The panel's scratches only read clearly under a steady flame.",
+        ash_mural_text="A scratched line says the betrayer's name begins with L.",
+        iron_chest_label="Tool Locker",
+        iron_chest_description="A damp locker crouches under a potting bench.",
+        stone_well_label="Ivy Basin",
+        stone_well_description="The etched rings align only when the full garden survey is in hand.",
+        water_plaque_label="Root Plaque",
+        water_plaque_description="A greened plaque slides from the basin wall.",
+        water_plaque_text="The betrayer tended the beds nearest the rain cistern.",
+        cartographer_label="Head Gardener",
+        cartographer_description="The gardener will barter only for a complete bed map.",
+        faded_letter_label="Watered Note",
+        faded_letter_description="The note is blurred by old rain and fertilizer.",
+        faded_letter_text="Only Liora kept the cistern ledgers beside the rain beds.",
+        stone_guardian_label="Moss Guardian",
+        stone_guardian_description="The guardian listens when you are ready to name the betrayer.",
+        brass_key_label="Trellis Key",
+        brass_key_description="A greened key shaped like a curling vine.",
+        torch_label="Glass Lantern",
+        torch_description="A glass-sided lantern with a bright white flame.",
+        torn_map_left_label="Bed Map Left",
+        torn_map_left_description="The left half of a conservatory plan.",
+        torn_map_right_label="Bed Map Right",
+        torn_map_right_description="The right half of a conservatory plan.",
+        full_map_label="Bed Survey",
+        full_map_description="A restored survey of the conservatory beds.",
+        lens_label="Prism Lens",
+        lens_description="A prism lens wrapped in tarnished copper.",
+        initial_clue_text="The betrayer's name begins with L.",
+        river_clue_text="The betrayer tended the beds nearest the rain cistern.",
+        waterwarden_clue_text="Only Liora kept the cistern ledgers beside the rain beds.",
+    ),
+    WorldTheme(
+        title="The Salt Bastion",
+        answer="corin",
+        foyer_label="Watch Hall",
+        foyer_description="A salt-stung hall opens toward barracks, chapel, and the sea court.",
+        shrine_label="Tide Chapel",
+        shrine_description="A stone warden keeps watch over a shrine of ropes and shells.",
+        workshop_label="Signal Room",
+        workshop_description="Lantern hooks sway above benches dusted with salt ash.",
+        courtyard_label="Sea Court",
+        courtyard_description="A dry cistern sits beneath walls pitted by ocean wind.",
+        gallery_label="Roll Hall",
+        gallery_description="Roster boards hang beneath portraits of old coast captains.",
+        entry_chest_label="Harbor Chest",
+        entry_chest_description="A travel chest sits beside a rack of oilskins.",
+        iron_door_label="Beacon Door",
+        iron_door_description="A rusted iron door bars the signal room.",
+        ash_mural_label="Signal Board",
+        ash_mural_description="Salt haze hides the markings until a lamp is raised close.",
+        ash_mural_text="A surviving mark says the betrayer's name begins with C.",
+        iron_chest_label="Tar Locker",
+        iron_chest_description="A tar-black locker hides below a signal bench.",
+        stone_well_label="Dry Cistern",
+        stone_well_description="Its carved rings make sense only with the restored coast survey.",
+        water_plaque_label="Harbor Plaque",
+        water_plaque_description="A plaque rises from a crack in the cistern lip.",
+        water_plaque_text="The betrayer bunked nearest the harbor chain.",
+        cartographer_label="Harbor Clerk",
+        cartographer_description="The clerk trades only for a complete bastion survey.",
+        faded_letter_label="Salted Roll",
+        faded_letter_description="Salt has crusted over the roster names.",
+        faded_letter_text="Only Corin kept the harbor ledgers beside the chain gate.",
+        stone_guardian_label="Stone Warden",
+        stone_guardian_description="The warden asks for the betrayer's name when the proof is ready.",
+        brass_key_label="Anchor Key",
+        brass_key_description="A heavy key stamped with a worn anchor.",
+        torch_label="Signal Lamp",
+        torch_description="A shuttered lamp with a disciplined yellow flame.",
+        torn_map_left_label="Coast Map Left",
+        torn_map_left_description="The left half of a bastion survey.",
+        torn_map_right_label="Coast Map Right",
+        torn_map_right_description="The right half of a bastion survey.",
+        full_map_label="Coast Survey",
+        full_map_description="A restored survey of the salt bastion.",
+        lens_label="Captain's Lens",
+        lens_description="A salt-clear lens held in a bronze ring.",
+        initial_clue_text="The betrayer's name begins with C.",
+        river_clue_text="The betrayer bunked nearest the harbor chain.",
+        waterwarden_clue_text="Only Corin kept the harbor ledgers beside the chain gate.",
+    ),
+)
+def sample_world_definition(seed: int | None = None, difficulty_target: float = 1.5) -> dict[str, Any]:
+    theme = _select_theme(seed)
+    return _build_world(theme, difficulty_target=difficulty_target)
+def load_world(path: str) -> dict[str, Any]:
+    return json.loads(Path(path).read_text(encoding="utf-8"))
+def _select_theme(seed: int | None) -> WorldTheme:
+    if seed is None:
+        return _WORLD_THEMES[0]
+    rng = random.Random(seed)
+    return _WORLD_THEMES[rng.randrange(len(_WORLD_THEMES))]
+def _build_world(theme: WorldTheme, *, difficulty_target: float) -> dict[str, Any]:
+    return {
+        "meta": {
+            "title": theme.title,
+            "difficulty_target": difficulty_target,
+            "start_node_id": "foyer",
+            "win_condition": {
+                "type": "deduce",
+                "target_npc_id": "stone_guardian",
+                "answer_string": theme.answer,
+            },
+        },
+        "nodes": [
+            {"id": "foyer", "type": "location", "label": theme.foyer_label, "description": theme.foyer_description},
+            {"id": "shrine", "type": "location", "label": theme.shrine_label, "description": theme.shrine_description},
+            {"id": "workshop", "type": "location", "label": theme.workshop_label, "description": theme.workshop_description},
+            {"id": "courtyard", "type": "location", "label": theme.courtyard_label, "description": theme.courtyard_description},
+            {"id": "gallery", "type": "location", "label": theme.gallery_label, "description": theme.gallery_description},
+            {
+                "id": "entry_chest",
+                "type": "container",
+                "label": theme.entry_chest_label,
+                "description": theme.entry_chest_description,
+                "parent_id": "foyer",
+                "open": False,
+                "locked": False,
+                "lock_key_id": None,
+            },
+            {
+                "id": "iron_door",
+                "type": "door",
+                "label": theme.iron_door_label,
+                "description": theme.iron_door_description,
+                "open": False,
+                "locked": True,
+                "lock_key_id": "brass_key",
+            },
+            {
+                "id": "ash_mural",
+                "type": "readable",
+                "label": theme.ash_mural_label,
+                "description": theme.ash_mural_description,
+                "parent_id": "workshop",
+                "clue_id": "initial_clue",
+                "requires_item_id": "torch",
+                "consumes_item": False,
+                "text_content": theme.ash_mural_text,
+            },
+            {
+                "id": "iron_chest",
+                "type": "container",
+                "label": theme.iron_chest_label,
+                "description": theme.iron_chest_description,
+                "parent_id": "workshop",
+                "open": False,
+                "locked": False,
+                "lock_key_id": None,
+            },
+            {
+                "id": "stone_well",
+                "type": "fixture",
+                "label": theme.stone_well_label,
+                "description": theme.stone_well_description,
+                "parent_id": "courtyard",
+                "requires_item_id": "full_map",
+                "reveals_item_id": None,
+                "reveals_readable_id": "water_plaque",
+                "consumes_item": False,
+            },
+            {
+                "id": "water_plaque",
+                "type": "readable",
+                "label": theme.water_plaque_label,
+                "description": theme.water_plaque_description,
+                "parent_id": "courtyard",
+                "clue_id": "river_clue",
+                "requires_item_id": None,
+                "consumes_item": False,
+                "text_content": theme.water_plaque_text,
+            },
+            {
+                "id": "cartographer",
+                "type": "npc",
+                "label": theme.cartographer_label,
+                "description": theme.cartographer_description,
+                "parent_id": "gallery",
+                "requires_item_id": "full_map",
+                "gives_item_id": "lens",
+                "gives_clue_id": None,
+            },
+            {
+                "id": "faded_letter",
+                "type": "readable",
+                "label": theme.faded_letter_label,
+                "description": theme.faded_letter_description,
+                "parent_id": "gallery",
+                "clue_id": "waterwarden_clue",
+                "requires_item_id": "lens",
+                "consumes_item": False,
+                "text_content": theme.faded_letter_text,
+            },
+            {
+                "id": "stone_guardian",
+                "type": "npc",
+                "label": theme.stone_guardian_label,
+                "description": theme.stone_guardian_description,
+                "parent_id": "shrine",
+                "requires_item_id": None,
+                "gives_item_id": None,
+                "gives_clue_id": None,
+            },
+        ],
+        "edges": [
+            {"id": "foyer_north", "from_node_id": "foyer", "to_node_id": "shrine", "direction": "north", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "shrine_south", "from_node_id": "shrine", "to_node_id": "foyer", "direction": "south", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "foyer_east", "from_node_id": "foyer", "to_node_id": "workshop", "direction": "east", "type": "locked_passage", "required_item_id": "brass_key", "door_node_id": "iron_door"},
+            {"id": "workshop_west", "from_node_id": "workshop", "to_node_id": "foyer", "direction": "west", "type": "locked_passage", "required_item_id": "brass_key", "door_node_id": "iron_door"},
+            {"id": "foyer_west", "from_node_id": "foyer", "to_node_id": "courtyard", "direction": "west", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "courtyard_east", "from_node_id": "courtyard", "to_node_id": "foyer", "direction": "east", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "foyer_south", "from_node_id": "foyer", "to_node_id": "gallery", "direction": "south", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "gallery_north", "from_node_id": "gallery", "to_node_id": "foyer", "direction": "north", "type": "passage", "required_item_id": None, "door_node_id": None},
+        ],
+        "items": [
+            {"id": "brass_key", "label": theme.brass_key_label, "description": theme.brass_key_description, "subtype": "key", "start_node_id": "entry_chest"},
+            {"id": "torch", "label": theme.torch_label, "description": theme.torch_description, "subtype": "puzzle", "start_node_id": "workshop"},
+            {"id": "torn_map_left", "label": theme.torn_map_left_label, "description": theme.torn_map_left_description, "subtype": "puzzle", "start_node_id": "iron_chest"},
+            {"id": "torn_map_right", "label": theme.torn_map_right_label, "description": theme.torn_map_right_description, "subtype": "puzzle", "start_node_id": "courtyard"},
+            {"id": "full_map", "label": theme.full_map_label, "description": theme.full_map_description, "subtype": "puzzle", "start_node_id": None},
+            {"id": "lens", "label": theme.lens_label, "description": theme.lens_description, "subtype": "puzzle", "start_node_id": None},
+        ],
+        "clues": [
+            {"id": "initial_clue", "text": theme.initial_clue_text},
+            {"id": "river_clue", "text": theme.river_clue_text},
+            {"id": "waterwarden_clue", "text": theme.waterwarden_clue_text},
+        ],
+        "recipes": [
+            {
+                "id": "restore_map",
+                "input_item_ids": ["torn_map_left", "torn_map_right"],
+                "output_item_id": "full_map",
+            }
+        ],
+        "quest_chain": [
+            {"step_id": "open_entry_chest", "description": f"Open the {theme.entry_chest_label.lower()}.", "requires_step_ids": [], "action": "open(entry_chest)"},
+            {"step_id": "take_brass_key", "description": f"Take the {theme.brass_key_label.lower()}.", "requires_step_ids": ["open_entry_chest"], "action": "take(brass_key,entry_chest)"},
+            {"step_id": "unlock_workshop", "description": f"Unlock the {theme.iron_door_label.lower()}.", "requires_step_ids": ["take_brass_key"], "action": "unlock(iron_door,brass_key)"},
+            {"step_id": "open_workshop", "description": f"Open the {theme.iron_door_label.lower()}.", "requires_step_ids": ["unlock_workshop"], "action": "open(iron_door)"},
+            {"step_id": "go_workshop", "description": f"Enter the {theme.workshop_label.lower()}.", "requires_step_ids": ["open_workshop"], "action": "go(workshop)"},
+            {"step_id": "take_torch", "description": f"Take the {theme.torch_label.lower()}.", "requires_step_ids": ["go_workshop"], "action": "take(torch,workshop)"},
+            {"step_id": "use_torch_on_mural", "description": f"Use the {theme.torch_label.lower()} on the {theme.ash_mural_label.lower()}.", "requires_step_ids": ["take_torch"], "action": "use(torch,ash_mural)"},
+            {"step_id": "open_iron_chest", "description": f"Open the {theme.iron_chest_label.lower()}.", "requires_step_ids": ["go_workshop"], "action": "open(iron_chest)"},
+            {"step_id": "take_left_map", "description": f"Take the {theme.torn_map_left_label.lower()}.", "requires_step_ids": ["open_iron_chest"], "action": "take(torn_map_left,iron_chest)"},
+            {"step_id": "return_foyer", "description": f"Return to the {theme.foyer_label.lower()}.", "requires_step_ids": ["take_left_map"], "action": "go(foyer)"},
+            {"step_id": "go_courtyard", "description": f"Head to the {theme.courtyard_label.lower()}.", "requires_step_ids": ["return_foyer"], "action": "go(courtyard)"},
+            {"step_id": "take_right_map", "description": f"Take the {theme.torn_map_right_label.lower()}.", "requires_step_ids": ["go_courtyard"], "action": "take(torn_map_right,courtyard)"},
+            {"step_id": "combine_map", "description": f"Restore the {theme.full_map_label.lower()}.", "requires_step_ids": ["take_right_map"], "action": "combine(torn_map_left,torn_map_right)"},
+            {"step_id": "use_map_on_well", "description": f"Use the {theme.full_map_label.lower()} on the {theme.stone_well_label.lower()}.", "requires_step_ids": ["combine_map"], "action": "use(full_map,stone_well)"},
+            {"step_id": "read_plaque", "description": f"Read the {theme.water_plaque_label.lower()}.", "requires_step_ids": ["use_map_on_well"], "action": "read(water_plaque)"},
+            {"step_id": "go_foyer_again", "description": f"Go back to the {theme.foyer_label.lower()}.", "requires_step_ids": ["read_plaque"], "action": "go(foyer)"},
+            {"step_id": "go_gallery", "description": f"Head to the {theme.gallery_label.lower()}.", "requires_step_ids": ["go_foyer_again"], "action": "go(gallery)"},
+            {"step_id": "give_map", "description": f"Give the map to the {theme.cartographer_label.lower()}.", "requires_step_ids": ["go_gallery"], "action": "give(full_map,cartographer)"},
+            {"step_id": "use_lens_on_letter", "description": f"Use the {theme.lens_label.lower()} on the {theme.faded_letter_label.lower()}.", "requires_step_ids": ["give_map"], "action": "use(lens,faded_letter)"},
+            {"step_id": "return_foyer_final", "description": f"Return to the {theme.foyer_label.lower()} again.", "requires_step_ids": ["use_lens_on_letter"], "action": "go(foyer)"},
+            {"step_id": "go_shrine", "description": f"Go to the {theme.shrine_label.lower()}.", "requires_step_ids": ["return_foyer_final"], "action": "go(shrine)"},
+            {"step_id": "talk_guardian", "description": f"Speak to the {theme.stone_guardian_label.lower()}.", "requires_step_ids": ["go_shrine"], "action": "talk(stone_guardian)"},
+            {"step_id": "submit_answer", "description": "Submit the betrayer's name.", "requires_step_ids": ["talk_guardian"], "action": f'submit("{theme.answer}")'},
+        ],
+    }

agents/master/schema.py ADDED Viewed

	@@ -0,0 +1,316 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Annotated, Literal, TypeAlias
+from pydantic import BaseModel, ConfigDict, Field
+from agents.shared.openenv_compat import Action, Observation, State
+class StrictModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+class WorldMeta(StrictModel):
+    title: str
+    difficulty_target: float
+    start_node_id: str
+    win_condition: "WinCondition"
+class WinCondition(StrictModel):
+    type: Literal["deduce"]
+    target_npc_id: str
+    answer_string: str
+class BaseNode(StrictModel):
+    id: str
+    label: str
+    description: str
+class LocationNode(BaseNode):
+    type: Literal["location"]
+class JunctionNode(BaseNode):
+    type: Literal["junction"]
+class ContainerNode(BaseNode):
+    type: Literal["container"]
+    parent_id: str
+    open: bool = False
+    locked: bool = False
+    lock_key_id: str | None = None
+class DoorNode(BaseNode):
+    type: Literal["door"]
+    open: bool = False
+    locked: bool = False
+    lock_key_id: str | None = None
+class ReadableNode(BaseNode):
+    type: Literal["readable"]
+    parent_id: str
+    clue_id: str
+    requires_item_id: str | None = None
+    consumes_item: bool = False
+    text_content: str
+class FixtureNode(BaseNode):
+    type: Literal["fixture"]
+    parent_id: str
+    requires_item_id: str
+    reveals_item_id: str | None = None
+    reveals_readable_id: str | None = None
+    consumes_item: bool = False
+class NpcNode(BaseNode):
+    type: Literal["npc"]
+    parent_id: str
+    requires_item_id: str | None = None
+    gives_item_id: str | None = None
+    gives_clue_id: str | None = None
+WorldNode: TypeAlias = Annotated[
+    LocationNode | JunctionNode | ContainerNode | DoorNode | ReadableNode | FixtureNode | NpcNode,
+    Field(discriminator="type"),
+]
+class Edge(StrictModel):
+    id: str
+    from_node_id: str
+    to_node_id: str
+    direction: Literal["north", "south", "east", "west", "up", "down", "in", "out"]
+    type: Literal["passage", "locked_passage"]
+    required_item_id: str | None = None
+    door_node_id: str | None = None
+class Item(StrictModel):
+    id: str
+    label: str
+    description: str
+    subtype: Literal["key", "puzzle"]
+    start_node_id: str | None = None
+class Clue(StrictModel):
+    id: str
+    text: str
+class Recipe(StrictModel):
+    id: str
+    input_item_ids: list[str] = Field(min_length=2, max_length=2)
+    output_item_id: str
+class QuestStep(StrictModel):
+    step_id: str
+    description: str
+    requires_step_ids: list[str] = Field(default_factory=list)
+    action: str
+class WorldDefinition(StrictModel):
+    meta: WorldMeta
+    nodes: list[WorldNode]
+    edges: list[Edge]
+    items: list[Item]
+    clues: list[Clue]
+    recipes: list[Recipe] = Field(default_factory=list)
+    quest_chain: list[QuestStep]
+class DMAction(Action):
+    world_definition: WorldDefinition
+class Turn(StrictModel):
+    step: int
+    player_action: str
+    textworld_command: str
+    observation: str
+    game_state_delta: dict[str, object]
+class DMFeedback(StrictModel):
+    unreachable_nodes: list[str]
+    unused_items: list[str]
+    clues_missed: list[str]
+    mean_steps_per_room: float
+    invalid_command_count: int = 0
+    wrong_submit_count: int = 0
+class DMRewardBreakdown(StrictModel):
+    reward_mode: Literal["gaussian_target_ratio", "compile_failure_penalty"] = "gaussian_target_ratio"
+    player_won: bool
+    raw_ratio: float | None = None
+    clamped_ratio: float | None = None
+    target_ratio: float
+    target_ratio_delta: float | None = None
+    efficiency_score: float | None = None
+    quality_score: float = 0.0
+    reward: float
+class DMObservation(Observation):
+    episode_transcript: list[Turn] = Field(default_factory=list)
+    player_won: bool | None = None
+    steps_taken: int | None = None
+    min_steps: int | None = None
+    ratio: float | None = None
+    compile_error: str | None = None
+    feedback: DMFeedback | None = None
+    reward_breakdown: DMRewardBreakdown | None = None
+    target_ratio_used: float | None = None
+class DMState(State):
+    current_world: WorldDefinition | None = None
+    compile_status: Literal["valid", "invalid", "pending"] = "pending"
+    episode_status: Literal["running", "complete", "failed"] = "running"
+    cumulative_success_rate: float = 0.0
+    target_ratio: float = 0.0
+    difficulty_hint: float | None = None
+@dataclass(frozen=True)
+class GoAction:
+    target_node_id: str
+@dataclass(frozen=True)
+class OpenAction:
+    target_node_id: str
+@dataclass(frozen=True)
+class UnlockAction:
+    door_id: str
+    key_id: str
+@dataclass(frozen=True)
+class TakeAction:
+    item_id: str
+    source_node_id: str
+@dataclass(frozen=True)
+class ReadAction:
+    target_node_id: str
+@dataclass(frozen=True)
+class UseAction:
+    item_id: str
+    target_node_id: str
+@dataclass(frozen=True)
+class CombineAction:
+    item_a_id: str
+    item_b_id: str
+@dataclass(frozen=True)
+class GiveAction:
+    item_id: str
+    npc_id: str
+@dataclass(frozen=True)
+class TalkAction:
+    target_node_id: str
+@dataclass(frozen=True)
+class SubmitAction:
+    answer_text: str
+QuestAction = (
+    GoAction
+    | OpenAction
+    | UnlockAction
+    | TakeAction
+    | ReadAction
+    | UseAction
+    | CombineAction
+    | GiveAction
+    | TalkAction
+    | SubmitAction
+)
+@dataclass(frozen=True)
+class NpcTrade:
+    required_item_id: str
+    gives_item_id: str | None
+    gives_clue_id: str | None
+@dataclass(frozen=True)
+class UseEffect:
+    required_item_id: str
+    clue_id: str | None = None
+    reveals_item_id: str | None = None
+    reveals_readable_id: str | None = None
+    consumes_item: bool = False
+@dataclass
+class CompiledWorld:
+    episode_id: str
+    world: WorldDefinition
+    artifacts_dir: Path
+    game_file: Path
+    walkthrough_commands: list[str]
+    solver_policy: list[str]
+    correct_answer_normalized: str
+    correct_submit_command: str
+    guardian_id: str
+    guardian_room_id: str
+    room_name_to_id: dict[str, str]
+    node_command_names: dict[str, str]
+    item_command_names: dict[str, str]
+    item_start_locations: dict[str, str | None]
+    clue_text_by_id: dict[str, str]
+    readable_clue_by_id: dict[str, str]
+    npc_trade_map: dict[str, NpcTrade]
+    recipe_map: dict[frozenset[str], str]
+    use_effects: dict[str, UseEffect]
+    produced_item_ids: set[str]
+    room_edges_by_target: dict[tuple[str, str], Edge]
+    room_edges_by_direction: dict[tuple[str, str], Edge]
+    door_rooms: dict[str, frozenset[str]]
+@dataclass
+class SimulationState:
+    current_room_id: str
+    inventory: set[str] = field(default_factory=set)
+    item_locations: dict[str, str | None] = field(default_factory=dict)
+    open_nodes: set[str] = field(default_factory=set)
+    locked_nodes: set[str] = field(default_factory=set)
+    discovered_clues: set[str] = field(default_factory=set)
+    consulted_npcs: set[str] = field(default_factory=set)
+    satisfied_npcs: set[str] = field(default_factory=set)
+    revealed_readables: set[str] = field(default_factory=set)
+    prepared_readables: set[str] = field(default_factory=set)
+    used_fixtures: set[str] = field(default_factory=set)
+    produced_items: set[str] = field(default_factory=set)
+    visited_nodes: set[str] = field(default_factory=set)

agents/master/server.py ADDED Viewed

	@@ -0,0 +1,370 @@

+from __future__ import annotations
+import json
+import mimetypes
+import threading
+from http import HTTPStatus
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+from .base import DMCompileError, DMInterfaceError
+from .build import WorldCompiler
+from .interface import GeminiInterfaceAdapter, SimpleInterfaceAdapter
+from .schema import CompiledWorld, WorldDefinition
+from .session import EpisodeSession
+from .snapshots import (
+    DEFAULT_LIVE_DIR,
+    STATE_FILENAME,
+    WORLD_FILENAME,
+    LiveCurrentRoom,
+    LiveMetrics,
+    LiveRuntime,
+    LiveStateSnapshot,
+    load_live_payload,
+)
+WEB_DIST_DIR = Path(__file__).resolve().parents[2] / "www" / "dist"
+class GameSessionManager:
+    """Thread-safe container for an interactive play session."""
+    def __init__(self, live_dir: Path, use_gemini: bool = False) -> None:
+        self._lock = threading.Lock()
+        self._session: EpisodeSession | None = None
+        self._compiled: CompiledWorld | None = None
+        self._compiler = WorldCompiler()
+        self._live_dir = live_dir
+        self._use_gemini = use_gemini
+        self._clear_stale_files()
+    def _clear_stale_files(self) -> None:
+        """Remove leftover state/world JSON from a previous session."""
+        for fname in (STATE_FILENAME, WORLD_FILENAME):
+            path = self._live_dir / fname
+            path.unlink(missing_ok=True)
+    def start(self, world_input: WorldDefinition | dict[str, Any]) -> dict[str, Any]:
+        with self._lock:
+            if self._session is not None:
+                self._session.close()
+            compiled = self._compiler.compile(world_input)
+            adapter = self._make_adapter()
+            session = EpisodeSession(compiled, interface_adapter=adapter)
+            self._compiled = compiled
+            self._session = session
+            self._write_world(compiled.world)
+            self._write_state("running")
+            return {
+                "ok": True,
+                "episode_id": compiled.episode_id,
+                "observation": session.current_feedback(),
+                "available_commands": session.available_commands(),
+                "room": self._room_info(session),
+            }
+    def reset(self) -> dict[str, Any]:
+        with self._lock:
+            if self._session is not None:
+                self._session.close()
+            self._session = None
+            self._compiled = None
+            self._clear_stale_files()
+            return {"ok": True}
+    def command(self, raw_command: str) -> dict[str, Any]:
+        with self._lock:
+            session = self._session
+            if session is None:
+                return {"ok": False, "error": "No active session. POST /api/start first."}
+            if session.done:
+                return {
+                    "ok": False,
+                    "error": "Episode is complete.",
+                    "done": True,
+                    "player_won": session.player_won,
+                }
+            try:
+                turn = session.step(raw_command)
+            except (DMInterfaceError, RuntimeError) as exc:
+                return {"ok": False, "error": str(exc)}
+            status = "complete" if session.done and session.player_won else (
+                "failed" if session.done else "running"
+            )
+            self._write_state(status)
+            return {
+                "ok": True,
+                "step": turn.step,
+                "command": turn.textworld_command,
+                "observation": turn.observation,
+                "done": session.done,
+                "player_won": session.player_won,
+                "available_commands": [] if session.done else session.available_commands(),
+                "room": self._room_info(session),
+            }
+    def get_state_payload(self) -> dict[str, Any] | None:
+        with self._lock:
+            session = self._session
+            compiled = self._compiled
+            if session is None or compiled is None:
+                return None
+            return self._snapshot(session, compiled).model_dump()
+    def _make_adapter(self) -> SimpleInterfaceAdapter | GeminiInterfaceAdapter:
+        if self._use_gemini:
+            try:
+                return GeminiInterfaceAdapter(narrate_observations=True)
+            except DMInterfaceError:
+                pass
+        return SimpleInterfaceAdapter()
+    def _write_world(self, world: WorldDefinition) -> None:
+        self._write_json(WORLD_FILENAME, world.model_dump_json(indent=2))
+    def _write_state(self, status: str) -> None:
+        session = self._session
+        compiled = self._compiled
+        if session is None or compiled is None:
+            return
+        snapshot = self._snapshot(session, compiled, status=status)
+        self._write_json(STATE_FILENAME, snapshot.model_dump_json(indent=2))
+    def _snapshot(
+        self,
+        session: EpisodeSession,
+        compiled: CompiledWorld,
+        status: str | None = None,
+    ) -> LiveStateSnapshot:
+        from datetime import datetime, timezone
+        room_ids = {
+            node.id for node in compiled.world.nodes if node.type in {"location", "junction"}
+        }
+        commands = [] if session.done else session.available_commands()
+        if status is None:
+            if session.done:
+                status = "complete" if session.player_won else "failed"
+            else:
+                status = "running"
+        return LiveStateSnapshot(
+            episode_id=compiled.episode_id,
+            status=status,
+            updated_at=datetime.now(timezone.utc).isoformat(),
+            title=compiled.world.meta.title,
+            transcript=list(session.transcript),
+            metrics=LiveMetrics(
+                steps_taken=session.steps_taken,
+                min_steps=len(compiled.solver_policy),
+                ratio=session.steps_taken / len(compiled.solver_policy) if compiled.solver_policy else None,
+                player_won=session.player_won if session.done else None,
+            ),
+            runtime=LiveRuntime(
+                current_room_id=session.current_room_id,
+                inventory_item_ids=sorted(session.inventory),
+                discovered_clue_ids=sorted(session.discovered_clues),
+                traded_npc_ids=sorted(session.traded_npcs),
+                visited_room_ids=sorted(room_ids & session.visited_nodes),
+                available_commands=commands,
+                invalid_command_count=session.invalid_command_count,
+                wrong_submit_count=session.wrong_submit_count,
+                open_node_ids=sorted(session.open_nodes),
+                locked_node_ids=sorted(session.locked_nodes),
+            ),
+            current_room=self._current_room_snapshot(session),
+        )
+    @staticmethod
+    def _current_room_snapshot(session: EpisodeSession) -> LiveCurrentRoom | None:
+        node_by_id = {node.id: node for node in session.compiled.world.nodes}
+        room = node_by_id.get(session.current_room_id)
+        if room is None:
+            return None
+        visible_nodes = [
+            node.id
+            for node in session.compiled.world.nodes
+            if getattr(node, "parent_id", None) == session.current_room_id
+            and (node.type != "readable" or node.id in session.revealed_readables)
+        ]
+        visible_nodes.extend(
+            sorted(
+                door_id
+                for door_id, rooms in session.compiled.door_rooms.items()
+                if session.current_room_id in rooms
+            )
+        )
+        visible_items = sorted(
+            item_id
+            for item_id, location in session.item_locations.items()
+            if location == session.current_room_id
+        )
+        return LiveCurrentRoom(
+            id=room.id,
+            label=room.label,
+            description=room.description,
+            visible_node_ids=sorted(set(visible_nodes)),
+            visible_item_ids=visible_items,
+        )
+    @staticmethod
+    def _room_info(session: EpisodeSession) -> dict[str, Any]:
+        node_by_id = {node.id: node for node in session.compiled.world.nodes}
+        room = node_by_id.get(session.current_room_id)
+        return {
+            "id": session.current_room_id,
+            "label": room.label if room else session.current_room_id,
+            "description": room.description if room else "",
+        }
+    def _write_json(self, filename: str, payload: str) -> None:
+        self._live_dir.mkdir(parents=True, exist_ok=True)
+        path = self._live_dir / filename
+        tmp_path = path.with_suffix(path.suffix + ".tmp")
+        tmp_path.write_text(payload + "\n", encoding="utf-8")
+        tmp_path.replace(path)
+def create_server(
+    *,
+    live_dir: Path | None = None,
+    host: str = "127.0.0.1",
+    port: int = 8000,
+    use_gemini: bool = False,
+) -> ThreadingHTTPServer:
+    resolved_live_dir = live_dir or DEFAULT_LIVE_DIR
+    game = GameSessionManager(resolved_live_dir, use_gemini=use_gemini)
+    class LiveViewerHandler(BaseHTTPRequestHandler):
+        server_version = "AgentsMasterLive/1.0"
+        def do_GET(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            if path == "/api/state":
+                self._serve_live_file(STATE_FILENAME)
+                return
+            if path == "/api/world":
+                self._serve_live_file(WORLD_FILENAME)
+                return
+            if path == "/":
+                self._serve_index()
+                return
+            if path == "/favicon.ico":
+                self.send_response(HTTPStatus.NO_CONTENT)
+                self.end_headers()
+                return
+            if self._serve_web_file(path):
+                return
+            if WEB_DIST_DIR.exists() and Path(path).suffix == "":
+                self._serve_index()
+                return
+            self._respond(HTTPStatus.NOT_FOUND, b"Not found\n", "text/plain; charset=utf-8")
+        def do_POST(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            body = self._read_body()
+            if path == "/api/reset":
+                result = game.reset()
+                self._json_respond(HTTPStatus.OK, result)
+                return
+            if path == "/api/start":
+                try:
+                    world_input = json.loads(body) if body else None
+                    if world_input is None:
+                        self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": "Missing JSON body."})
+                        return
+                    result = game.start(world_input)
+                    self._json_respond(HTTPStatus.OK, result)
+                except (DMCompileError, ValueError, json.JSONDecodeError) as exc:
+                    self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)})
+                return
+            if path == "/api/command":
+                try:
+                    data = json.loads(body) if body else {}
+                    command = data.get("command", "").strip()
+                    if not command:
+                        self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": "Missing 'command' field."})
+                        return
+                    result = game.command(command)
+                    self._json_respond(HTTPStatus.OK, result)
+                except json.JSONDecodeError as exc:
+                    self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)})
+                return
+            self._respond(HTTPStatus.NOT_FOUND, b"Not found\n", "text/plain; charset=utf-8")
+        def log_message(self, format: str, *args: object) -> None:  # noqa: A003
+            del format, args
+        def _read_body(self) -> bytes:
+            length = int(self.headers.get("Content-Length", 0))
+            return self.rfile.read(length) if length > 0 else b""
+        def _serve_index(self) -> None:
+            index_path = WEB_DIST_DIR / "index.html"
+            if index_path.is_file():
+                self._respond(HTTPStatus.OK, index_path.read_bytes(), "text/html; charset=utf-8")
+            else:
+                from .templates import render_index
+                self._respond(HTTPStatus.OK, render_index().encode("utf-8"), "text/html; charset=utf-8")
+        def _serve_live_file(self, filename: str) -> None:
+            payload = load_live_payload(resolved_live_dir, filename)
+            if payload is None:
+                self.send_response(HTTPStatus.NO_CONTENT)
+                self.send_header("Cache-Control", "no-store")
+                self.end_headers()
+                return
+            self._respond(
+                HTTPStatus.OK, payload, "application/json; charset=utf-8",
+                extra_headers={"Cache-Control": "no-store"},
+            )
+        def _serve_web_file(self, path: str) -> bool:
+            candidate = (WEB_DIST_DIR / path.lstrip("/")).resolve()
+            try:
+                candidate.relative_to(WEB_DIST_DIR.resolve())
+            except ValueError:
+                return False
+            if not candidate.is_file():
+                return False
+            content_type = mimetypes.guess_type(candidate.name)[0] or "application/octet-stream"
+            self._respond(HTTPStatus.OK, candidate.read_bytes(), content_type)
+            return True
+        def _json_respond(self, status: HTTPStatus, data: dict[str, Any]) -> None:
+            payload = json.dumps(data).encode("utf-8")
+            self._respond(status, payload, "application/json; charset=utf-8",
+                          extra_headers={"Cache-Control": "no-store"})
+        def _respond(
+            self, status: HTTPStatus, payload: bytes, content_type: str,
+            *, extra_headers: dict[str, str] | None = None,
+        ) -> None:
+            self.send_response(status)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(payload)))
+            if extra_headers:
+                for key, value in extra_headers.items():
+                    self.send_header(key, value)
+            self.end_headers()
+            self.wfile.write(payload)
+    return ThreadingHTTPServer((host, port), LiveViewerHandler)
+def run_server(*, port: int = 8000, live_dir: Path | None = None, host: str = "127.0.0.1", use_gemini: bool = False) -> None:
+    server = create_server(live_dir=live_dir, host=host, port=port, use_gemini=use_gemini)
+    print(f"Serving live viewer on http://{host}:{server.server_address[1]}")
+    try:
+        server.serve_forever()
+    finally:
+        server.server_close()

agents/master/session.py ADDED Viewed

	@@ -0,0 +1,484 @@

+from __future__ import annotations
+import json
+import textwrap
+from collections import deque
+from typing import TYPE_CHECKING, Any, Callable
+import textworld
+from textworld.core import EnvInfos, GameState
+from .base import INVENTORY_ID, normalize_answer_text, suppress_unsupported_game_warning
+from .interface import InterfaceAdapter, SimpleInterfaceAdapter
+from .schema import CompiledWorld, Turn
+if TYPE_CHECKING:
+    TurnListener = Callable[["EpisodeSession", Turn], None]
+class EpisodeSession:
+    def __init__(
+        self,
+        compiled: CompiledWorld,
+        interface_adapter: InterfaceAdapter = SimpleInterfaceAdapter(),
+        turn_listener: "TurnListener | None" = None,
+    ) -> None:
+        if interface_adapter is None:
+            raise ValueError("interface_adapter must not be None.")
+        self.compiled = compiled
+        self.interface_adapter = interface_adapter
+        self.turn_listener = turn_listener
+        with suppress_unsupported_game_warning():
+            self.env = textworld.start(str(compiled.game_file), request_infos=self._requested_infos())
+            self.state = self.env.reset()
+        self._closed = False
+        self.done = False
+        self.player_won = False
+        self.steps_taken = 0
+        self.invalid_command_count = 0
+        self.wrong_submit_count = 0
+        self.used_items: set[str] = set()
+        self.discovered_clues: set[str] = set()
+        self.consulted_npcs: set[str] = set()
+        self.traded_npcs: set[str] = set()
+        self.prepared_readables: set[str] = set()
+        self.completed_recipe_outputs: set[str] = set()
+        self.completed_use_targets: set[str] = set()
+        self.unlocked_doors: set[str] = set()
+        self.consulted_guardian = False
+        self.hidden_readables = {
+            effect.reveals_readable_id for effect in compiled.use_effects.values() if effect.reveals_readable_id
+        }
+        self.revealed_readables = {
+            node.id for node in compiled.world.nodes if node.type == "readable" and node.id not in self.hidden_readables
+        }
+        self.item_locations = dict(compiled.item_start_locations)
+        self.inventory = {item_id for item_id, location in self.item_locations.items() if location == INVENTORY_ID}
+        self.open_nodes = {
+            node.id for node in compiled.world.nodes if node.type in {"container", "door"} and getattr(node, "open", False)
+        }
+        self.locked_nodes = {
+            node.id for node in compiled.world.nodes if node.type in {"container", "door"} and getattr(node, "locked", False)
+        }
+        self.current_room_id = compiled.world.meta.start_node_id
+        self.visited_nodes: set[str] = {self.current_room_id}
+        self.transcript: list[Turn] = []
+        self.recent_normalized_commands: deque[str] = deque(maxlen=3)
+        self._node_by_id = {node.id: node for node in compiled.world.nodes}
+        self._label_by_id = {node.id: node.label for node in compiled.world.nodes}
+        self._label_by_id.update({item.id: item.label for item in compiled.world.items})
+        self._item_name_to_id = {name: item_id for item_id, name in compiled.item_command_names.items()}
+        self.last_state_fingerprint = self.state_fingerprint()
+    @staticmethod
+    def _requested_infos() -> EnvInfos:
+        return EnvInfos(
+            feedback=True,
+            description=True,
+            inventory=True,
+            location=True,
+            facts=False,
+            won=True,
+            lost=True,
+            score=True,
+            moves=True,
+            last_action=True,
+            last_command=True,
+            admissible_commands=True,
+            policy_commands=True,
+            extras=["walkthrough"],
+        )
+    def available_commands(self) -> list[str]:
+        commands = set(self.state.admissible_commands or [])
+        commands.update(self._custom_commands())
+        return sorted(commands)
+    def current_feedback(self) -> str:
+        return self.interface_adapter.render_observation(self.state.feedback or "", self.state, self)
+    def state_fingerprint(self) -> str:
+        return json.dumps(
+            {
+                "room": self.current_room_id,
+                "inventory": sorted(self.inventory),
+                "clues": sorted(self.discovered_clues),
+                "opened": sorted(self.open_nodes),
+                "traded": sorted(self.traded_npcs),
+                "use_targets": sorted(self.completed_use_targets),
+                "recipe_outputs": sorted(self.completed_recipe_outputs),
+            },
+            sort_keys=True,
+        )
+    def node_id_for_command_name(self, command_name: str, node_types: set[str] | None = None) -> str | None:
+        for node in self.compiled.world.nodes:
+            safe_name = self.compiled.node_command_names.get(node.id)
+            if safe_name != command_name:
+                continue
+            if node_types is None or node.type in node_types:
+                return node.id
+        return None
+    def step(self, raw_command: str) -> Turn:
+        if self.done:
+            raise RuntimeError("Episode is already complete.")
+        lowered = self.interface_adapter.translate_command(raw_command, self).lower().strip()
+        if turn := self._handle_submit(raw_command, lowered):
+            return turn
+        if self._is_wrapper_command(lowered):
+            return self._step_wrapper(raw_command, lowered)
+        return self._step_env(raw_command, lowered)
+    def _handle_submit(self, raw_command: str, lowered: str) -> Turn | None:
+        if not lowered.startswith("submit "):
+            return None
+        answer = normalize_answer_text(lowered[7:])
+        if self.current_room_id != self.compiled.guardian_room_id or self.compiled.guardian_id not in self.consulted_npcs:
+            return self._wrapper_only_turn(
+                raw_command,
+                lowered,
+                "The guardian has not asked for your answer yet.",
+                {"wrapper": "submit_rejected", "reason": "guardian_not_ready"},
+            )
+        required_clues = set(self.compiled.clue_text_by_id)
+        if self.discovered_clues != required_clues:
+            return self._wrapper_only_turn(
+                raw_command,
+                lowered,
+                "The guardian waits. You have not gathered enough evidence yet.",
+                {
+                    "wrapper": "submit_rejected",
+                    "reason": "missing_clues",
+                    "missing_clues": sorted(required_clues - self.discovered_clues),
+                },
+            )
+        if answer != self.compiled.correct_answer_normalized:
+            self.wrong_submit_count += 1
+            return self._wrapper_only_turn(
+                raw_command,
+                lowered,
+                "The guardian shakes their head. That answer is wrong.",
+                {"wrapper": "submit_rejected", "reason": "wrong_answer", "submitted": answer},
+            )
+        self.steps_taken += 1
+        self.done = True
+        self.player_won = True
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=self.compiled.correct_submit_command,
+            observation="The guardian weighs your answer, then nods.\n\nThe dungeon yields. You solved it.",
+            game_state_delta={"wrapper": "submit_forwarded", "won": True, "location": self.current_room_id},
+        )
+        return self._record_turn(turn)
+    def _step_env(self, raw_command: str, lowered: str) -> Turn:
+        previous = self.state
+        admissible = set(previous.admissible_commands or [])
+        self.state, _, env_done = self.env.step(lowered)
+        self.steps_taken += 1
+        succeeded = lowered in admissible
+        if not succeeded:
+            self.invalid_command_count += 1
+        else:
+            self._apply_env_side_effects(lowered)
+        self.done = bool(env_done or self.state.won)
+        observation = self.interface_adapter.render_observation(self.state.feedback or "", self.state, self)
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=lowered,
+            observation=observation,
+            game_state_delta=self._compute_delta(previous, self.state, succeeded, self.current_room_id),
+        )
+        return self._record_turn(turn)
+    def _step_wrapper(self, raw_command: str, lowered: str) -> Turn:
+        observation, delta = self._apply_wrapper_command(lowered)
+        self.steps_taken += 1
+        if delta.get("succeeded") is False:
+            self.invalid_command_count += 1
+        delta.setdefault("location", self.current_room_id)
+        rendered = self.interface_adapter.render_observation(observation, self.state, self)
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=lowered,
+            observation=rendered,
+            game_state_delta=delta,
+        )
+        return self._record_turn(turn)
+    def _apply_env_side_effects(self, command: str) -> None:
+        if command.startswith("go "):
+            direction = command[3:].strip()
+            edge = self.compiled.room_edges_by_direction.get((self.current_room_id, direction))
+            if edge is not None:
+                self.current_room_id = edge.to_node_id
+                self.visited_nodes.add(edge.to_node_id)
+            return
+        if command.startswith("open "):
+            node_id = self.node_id_for_command_name(command[5:].strip(), node_types={"container", "door"})
+            if node_id:
+                self.open_nodes.add(node_id)
+                self.visited_nodes.add(node_id)
+            return
+        if command.startswith("unlock ") and " with " in command:
+            target_name, key_name = command[7:].split(" with ", 1)
+            target_id = self.node_id_for_command_name(target_name.strip(), node_types={"container", "door"})
+            if target_id:
+                self.locked_nodes.discard(target_id)
+                if self._node_by_id[target_id].type == "door":
+                    self.unlocked_doors.add(target_id)
+                self.visited_nodes.add(target_id)
+            self._mark_item_by_name(key_name.strip())
+            return
+        if command.startswith("take "):
+            item_name = command[5:].split(" from ", 1)[0].strip()
+            item_id = self._item_name_to_id.get(item_name)
+            if item_id:
+                self.inventory.add(item_id)
+                self.item_locations[item_id] = INVENTORY_ID
+                self.used_items.add(item_id)
+                self.visited_nodes.add(item_id)
+    def _apply_wrapper_command(self, command: str) -> tuple[str, dict[str, Any]]:
+        if command.startswith("read "):
+            return self._apply_read(command)
+        if command.startswith("talk "):
+            return self._apply_talk(command)
+        if command.startswith("use ") and " on " in command:
+            return self._apply_use(command)
+        if command.startswith("combine ") and " with " in command:
+            return self._apply_combine(command)
+        if command.startswith("give ") and " to " in command:
+            return self._apply_give(command)
+        raise RuntimeError(f"Unsupported wrapper command '{command}'.")
+    def _apply_read(self, command: str) -> tuple[str, dict[str, Any]]:
+        readable_id = self.node_id_for_command_name(command[5:].strip(), node_types={"readable"})
+        if not readable_id or readable_id not in self.revealed_readables:
+            return self._fail("You can't read that right now.", command)
+        node = self._node_by_id[readable_id]
+        if node.parent_id != self.current_room_id:
+            return self._fail("You are too far away to read that.", command)
+        if node.requires_item_id and readable_id not in self.prepared_readables:
+            return self._fail("You still need the right tool before the text becomes legible.", command)
+        clue_id = self.compiled.readable_clue_by_id[readable_id]
+        self.discovered_clues.add(clue_id)
+        self.visited_nodes.add(readable_id)
+        return self._success(
+            textwrap.dedent(
+                f"""
+                {node.description}
+                "{self.compiled.clue_text_by_id[clue_id]}"
+                """
+            ).strip(),
+            command,
+        )
+    def _apply_talk(self, command: str) -> tuple[str, dict[str, Any]]:
+        npc_id = self.node_id_for_command_name(command[5:].strip(), node_types={"npc"})
+        if not npc_id:
+            return self._fail("You can't talk to that right now.", command)
+        node = self._node_by_id[npc_id]
+        if node.parent_id != self.current_room_id:
+            return self._fail("You are too far away to talk to that.", command)
+        self.consulted_npcs.add(npc_id)
+        if npc_id == self.compiled.guardian_id:
+            self.consulted_guardian = True
+        self.visited_nodes.add(npc_id)
+        return self._success(node.description, command)
+    def _apply_use(self, command: str) -> tuple[str, dict[str, Any]]:
+        item_name, target_name = command[4:].split(" on ", 1)
+        item_id = self._item_name_to_id.get(item_name.strip())
+        target_id = self.node_id_for_command_name(target_name.strip(), node_types={"readable", "fixture"})
+        if not item_id or item_id not in self.inventory:
+            return self._fail("You don't have the item needed for that.", command)
+        if not target_id:
+            return self._fail("You can't use that here.", command)
+        target = self._node_by_id[target_id]
+        if target.parent_id != self.current_room_id:
+            return self._fail("That target is not within reach.", command)
+        effect = self.compiled.use_effects.get(target_id)
+        if effect is None or effect.required_item_id != item_id:
+            return self._fail("That item doesn't seem to work there.", command)
+        if effect.consumes_item:
+            self.inventory.discard(item_id)
+            self.item_locations[item_id] = None
+        self.used_items.add(item_id)
+        self.visited_nodes.add(target_id)
+        self.completed_use_targets.add(target_id)
+        if effect.clue_id:
+            self.prepared_readables.add(target_id)
+            self.discovered_clues.add(effect.clue_id)
+            return self._success(
+                textwrap.dedent(
+                    f"""
+                    {target.description}
+                    "{self.compiled.clue_text_by_id[effect.clue_id]}"
+                    """
+                ).strip(),
+                command,
+            )
+        if effect.reveals_readable_id:
+            self.revealed_readables.add(effect.reveals_readable_id)
+            return self._success(f"The {self._label_by_id[effect.reveals_readable_id]} is revealed.", command)
+        if effect.reveals_item_id:
+            self.item_locations[effect.reveals_item_id] = self.current_room_id
+            return self._success(f"The {self._label_by_id[effect.reveals_item_id]} is revealed.", command)
+        return self._fail("Nothing happens.", command)
+    def _apply_combine(self, command: str) -> tuple[str, dict[str, Any]]:
+        item_a_name, item_b_name = command[8:].split(" with ", 1)
+        item_a_id = self._item_name_to_id.get(item_a_name.strip())
+        item_b_id = self._item_name_to_id.get(item_b_name.strip())
+        if not item_a_id or not item_b_id or item_a_id not in self.inventory or item_b_id not in self.inventory:
+            return self._fail("You do not have both pieces required to combine those.", command)
+        output_id = self.compiled.recipe_map.get(frozenset({item_a_id, item_b_id}))
+        if not output_id:
+            return self._fail("Those items do not fit together.", command)
+        self.inventory.discard(item_a_id)
+        self.inventory.discard(item_b_id)
+        self.item_locations[item_a_id] = None
+        self.item_locations[item_b_id] = None
+        self.inventory.add(output_id)
+        self.item_locations[output_id] = INVENTORY_ID
+        self.used_items.update({item_a_id, item_b_id, output_id})
+        self.completed_recipe_outputs.add(output_id)
+        self.visited_nodes.add(output_id)
+        return self._success(f"You assemble the {self._label_by_id[output_id]}.", command)
+    def _apply_give(self, command: str) -> tuple[str, dict[str, Any]]:
+        item_name, npc_name = command[5:].split(" to ", 1)
+        item_id = self._item_name_to_id.get(item_name.strip())
+        npc_id = self.node_id_for_command_name(npc_name.strip(), node_types={"npc"})
+        if not item_id or item_id not in self.inventory:
+            return self._fail("You do not have that item to give.", command)
+        if not npc_id:
+            return self._fail("There is no one here by that name.", command)
+        npc = self._node_by_id[npc_id]
+        if npc.parent_id != self.current_room_id:
+            return self._fail("That person is not here.", command)
+        trade = self.compiled.npc_trade_map.get(npc_id)
+        if trade is None or trade.required_item_id != item_id:
+            return self._fail("They are not interested in that item.", command)
+        if npc_id in self.traded_npcs:
+            return self._fail("That trade has already been completed.", command)
+        self.inventory.discard(item_id)
+        self.item_locations[item_id] = None
+        self.used_items.add(item_id)
+        self.traded_npcs.add(npc_id)
+        if trade.gives_item_id:
+            self.inventory.add(trade.gives_item_id)
+            self.item_locations[trade.gives_item_id] = INVENTORY_ID
+            self.used_items.add(trade.gives_item_id)
+            return self._success(f"You receive the {self._label_by_id[trade.gives_item_id]}.", command)
+        if trade.gives_clue_id:
+            self.discovered_clues.add(trade.gives_clue_id)
+            return self._success(f'"{self.compiled.clue_text_by_id[trade.gives_clue_id]}"', command)
+        return self._fail("Nothing comes of the trade.", command)
+    def _custom_commands(self) -> set[str]:
+        commands: set[str] = set()
+        for node in self.compiled.world.nodes:
+            if node.type == "npc" and node.parent_id == self.current_room_id:
+                commands.add(f"talk {self.compiled.node_command_names[node.id]}")
+                trade = self.compiled.npc_trade_map.get(node.id)
+                if trade and node.id not in self.traded_npcs and trade.required_item_id in self.inventory:
+                    commands.add(
+                        f"give {self.compiled.item_command_names[trade.required_item_id]} to {self.compiled.node_command_names[node.id]}"
+                    )
+            elif node.type == "readable" and node.parent_id == self.current_room_id and node.id in self.revealed_readables:
+                if not node.requires_item_id or node.id in self.prepared_readables:
+                    commands.add(f"read {self.compiled.node_command_names[node.id]}")
+            elif node.type == "fixture" and node.parent_id == self.current_room_id:
+                effect = self.compiled.use_effects.get(node.id)
+                if effect and effect.required_item_id in self.inventory:
+                    commands.add(
+                        f"use {self.compiled.item_command_names[effect.required_item_id]} on {self.compiled.node_command_names[node.id]}"
+                    )
+        for readable_id, effect in self.compiled.use_effects.items():
+            node = self._node_by_id.get(readable_id)
+            if node and node.type == "readable" and node.parent_id == self.current_room_id and effect.required_item_id in self.inventory:
+                commands.add(
+                    f"use {self.compiled.item_command_names[effect.required_item_id]} on {self.compiled.node_command_names[readable_id]}"
+                )
+        for recipe_inputs, output_id in self.compiled.recipe_map.items():
+            del output_id
+            item_ids = sorted(recipe_inputs)
+            if all(item_id in self.inventory for item_id in item_ids):
+                commands.add(
+                    f"combine {self.compiled.item_command_names[item_ids[0]]} with {self.compiled.item_command_names[item_ids[1]]}"
+                )
+                commands.add(
+                    f"combine {self.compiled.item_command_names[item_ids[1]]} with {self.compiled.item_command_names[item_ids[0]]}"
+                )
+        return commands
+    def _is_wrapper_command(self, command: str) -> bool:
+        return any(
+            command.startswith(prefix)
+            for prefix in ("read ", "talk ", "use ", "combine ", "give ")
+        )
+    def _mark_item_by_name(self, name: str) -> None:
+        item_id = self._item_name_to_id.get(name)
+        if item_id:
+            self.used_items.add(item_id)
+    def _success(self, observation: str, command: str) -> tuple[str, dict[str, Any]]:
+        return observation, {"wrapper": "custom", "command": command, "succeeded": True, "location": self.current_room_id}
+    def _fail(self, observation: str, command: str) -> tuple[str, dict[str, Any]]:
+        return observation, {"wrapper": "custom", "command": command, "succeeded": False, "location": self.current_room_id}
+    @staticmethod
+    def _compute_delta(previous: GameState, current: GameState, succeeded: bool, fallback_location: str | None) -> dict[str, Any]:
+        return {
+            "added_facts": [],
+            "removed_facts": [],
+            "location": current.location or fallback_location,
+            "score": current.score,
+            "won": current.won,
+            "lost": current.lost,
+            "succeeded": succeeded,
+        }
+    def _wrapper_only_turn(
+        self,
+        raw_command: str,
+        translated: str,
+        observation: str,
+        delta: dict[str, Any],
+    ) -> Turn:
+        self.steps_taken += 1
+        delta.setdefault("location", self.current_room_id)
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=translated,
+            observation=observation,
+            game_state_delta=delta,
+        )
+        return self._record_turn(turn)
+    def _record_turn(self, turn: Turn) -> Turn:
+        self.transcript.append(turn)
+        self.last_state_fingerprint = self.state_fingerprint()
+        if self.turn_listener is not None:
+            self.turn_listener(self, turn)
+        return turn
+    def close(self) -> None:
+        if self._closed:
+            return
+        close = getattr(self.env, "close", None)
+        if callable(close):
+            close()
+        self._closed = True

agents/master/snapshots.py ADDED Viewed

	@@ -0,0 +1,308 @@

+from __future__ import annotations
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Protocol
+from pydantic import Field
+from .base import ARTIFACTS_ROOT
+from .schema import CompiledWorld, DMFeedback, DMObservation, StrictModel, Turn, WorldDefinition
+if TYPE_CHECKING:
+    from .session import EpisodeSession
+STATE_FILENAME = "state.json"
+WORLD_FILENAME = "world.json"
+LIVE_SCHEMA_VERSION = 1
+DEFAULT_LIVE_DIR = ARTIFACTS_ROOT / "live"
+class LiveMetrics(StrictModel):
+    steps_taken: int = 0
+    min_steps: int | None = None
+    ratio: float | None = None
+    reward: float | None = None
+    player_won: bool | None = None
+class LiveRuntime(StrictModel):
+    current_room_id: str | None = None
+    inventory_item_ids: list[str] = Field(default_factory=list)
+    discovered_clue_ids: list[str] = Field(default_factory=list)
+    traded_npc_ids: list[str] = Field(default_factory=list)
+    visited_room_ids: list[str] = Field(default_factory=list)
+    available_commands: list[str] = Field(default_factory=list)
+    invalid_command_count: int = 0
+    wrong_submit_count: int = 0
+    open_node_ids: list[str] = Field(default_factory=list)
+    locked_node_ids: list[str] = Field(default_factory=list)
+class LiveCurrentRoom(StrictModel):
+    id: str | None = None
+    label: str | None = None
+    description: str | None = None
+    visible_node_ids: list[str] = Field(default_factory=list)
+    visible_item_ids: list[str] = Field(default_factory=list)
+class LiveStateSnapshot(StrictModel):
+    schema_version: int = LIVE_SCHEMA_VERSION
+    episode_id: str
+    status: str
+    updated_at: str
+    title: str | None = None
+    runner: str | None = None
+    error: str | None = None
+    transcript: list[Turn] = Field(default_factory=list)
+    metrics: LiveMetrics = Field(default_factory=LiveMetrics)
+    feedback: DMFeedback | None = None
+    runtime: LiveRuntime = Field(default_factory=LiveRuntime)
+    current_room: LiveCurrentRoom | None = None
+class LiveObserver(Protocol):
+    def on_run_start(self, episode_id: str, world_input: WorldDefinition | dict[str, Any]) -> None:
+        ...
+    def on_compile_success(self, compiled: CompiledWorld, session: EpisodeSession) -> None:
+        ...
+    def on_turn(self, session: EpisodeSession, turn: Turn) -> None:
+        ...
+    def on_complete(self, compiled: CompiledWorld, session: EpisodeSession, observation: DMObservation) -> None:
+        ...
+    def on_error(
+        self,
+        *,
+        episode_id: str,
+        error: str,
+        world_input: WorldDefinition | dict[str, Any],
+        compiled: CompiledWorld | None = None,
+        session: EpisodeSession | None = None,
+    ) -> None:
+        ...
+class LiveSnapshotWriter:
+    def __init__(self, live_dir: Path | None = None, runner_name: str | None = None) -> None:
+        self.live_dir = live_dir or DEFAULT_LIVE_DIR
+        self.runner_name = runner_name
+        self.live_dir.mkdir(parents=True, exist_ok=True)
+    def on_run_start(self, episode_id: str, world_input: WorldDefinition | dict[str, Any]) -> None:
+        self._remove_world()
+        snapshot = LiveStateSnapshot(
+            episode_id=episode_id,
+            status="compiling",
+            updated_at=self._timestamp(),
+            title=self._extract_title(world_input),
+            runner=self.runner_name,
+        )
+        self._write_state_snapshot(snapshot)
+    def on_compile_success(self, compiled: CompiledWorld, session: EpisodeSession) -> None:
+        self._write_world(compiled.world)
+        snapshot = LiveStateSnapshot(
+            episode_id=compiled.episode_id,
+            status="running",
+            updated_at=self._timestamp(),
+            title=compiled.world.meta.title,
+            runner=self.runner_name,
+            metrics=self._metrics(min_steps=len(compiled.solver_policy), steps_taken=session.steps_taken),
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+    def on_turn(self, session: EpisodeSession, turn: Turn) -> None:
+        del turn
+        snapshot = LiveStateSnapshot(
+            episode_id=session.compiled.episode_id,
+            status="running",
+            updated_at=self._timestamp(),
+            title=session.compiled.world.meta.title,
+            runner=self.runner_name,
+            transcript=list(session.transcript),
+            metrics=self._metrics(
+                min_steps=len(session.compiled.solver_policy),
+                steps_taken=session.steps_taken,
+            ),
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+    def on_complete(self, compiled: CompiledWorld, session: EpisodeSession, observation: DMObservation) -> None:
+        status = "complete" if observation.player_won else "failed"
+        snapshot = LiveStateSnapshot(
+            episode_id=compiled.episode_id,
+            status=status,
+            updated_at=self._timestamp(),
+            title=compiled.world.meta.title,
+            runner=self.runner_name,
+            transcript=list(session.transcript),
+            metrics=self._metrics(
+                min_steps=observation.min_steps,
+                steps_taken=observation.steps_taken or session.steps_taken,
+                ratio=observation.ratio,
+                reward=observation.reward,
+                player_won=observation.player_won,
+            ),
+            feedback=observation.feedback,
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+    def on_error(
+        self,
+        *,
+        episode_id: str,
+        error: str,
+        world_input: WorldDefinition | dict[str, Any],
+        compiled: CompiledWorld | None = None,
+        session: EpisodeSession | None = None,
+    ) -> None:
+        title = compiled.world.meta.title if compiled is not None else self._extract_title(world_input)
+        snapshot = LiveStateSnapshot(
+            episode_id=episode_id,
+            status="compile_error",
+            updated_at=self._timestamp(),
+            title=title,
+            runner=self.runner_name,
+            error=error,
+            transcript=list(session.transcript) if session is not None else [],
+            metrics=self._metrics(
+                min_steps=len(compiled.solver_policy) if compiled is not None else None,
+                steps_taken=session.steps_taken if session is not None else 0,
+            ),
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+    def _write_world(self, world: WorldDefinition) -> None:
+        self._write_json(self.live_dir / WORLD_FILENAME, world.model_dump_json(indent=2))
+    def _write_state_snapshot(self, snapshot: LiveStateSnapshot) -> None:
+        self._write_json(self.live_dir / STATE_FILENAME, snapshot.model_dump_json(indent=2))
+    def _write_json(self, path: Path, payload: str) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        tmp_path = path.with_suffix(path.suffix + ".tmp")
+        tmp_path.write_text(payload + "\n", encoding="utf-8")
+        tmp_path.replace(path)
+    def _remove_world(self) -> None:
+        world_path = self.live_dir / WORLD_FILENAME
+        if world_path.exists():
+            world_path.unlink()
+    @staticmethod
+    def _timestamp() -> str:
+        return datetime.now(timezone.utc).isoformat()
+    @staticmethod
+    def _extract_title(world_input: WorldDefinition | dict[str, Any]) -> str | None:
+        if isinstance(world_input, WorldDefinition):
+            return world_input.meta.title
+        meta = world_input.get("meta") if isinstance(world_input, dict) else None
+        title = meta.get("title") if isinstance(meta, dict) else None
+        return title if isinstance(title, str) else None
+    @staticmethod
+    def _metrics(
+        *,
+        min_steps: int | None,
+        steps_taken: int,
+        ratio: float | None = None,
+        reward: float | None = None,
+        player_won: bool | None = None,
+    ) -> LiveMetrics:
+        computed_ratio = ratio
+        if computed_ratio is None and min_steps:
+            computed_ratio = steps_taken / min_steps
+        return LiveMetrics(
+            steps_taken=steps_taken,
+            min_steps=min_steps,
+            ratio=computed_ratio,
+            reward=reward,
+            player_won=player_won,
+        )
+    @staticmethod
+    def _runtime(session: EpisodeSession | None) -> LiveRuntime:
+        if session is None:
+            return LiveRuntime()
+        room_ids = {
+            node.id
+            for node in session.compiled.world.nodes
+            if node.type in {"location", "junction"}
+        }
+        commands = [] if session.done else session.available_commands()
+        return LiveRuntime(
+            current_room_id=session.current_room_id,
+            inventory_item_ids=sorted(session.inventory),
+            discovered_clue_ids=sorted(session.discovered_clues),
+            traded_npc_ids=sorted(session.traded_npcs),
+            visited_room_ids=sorted(room_ids & session.visited_nodes),
+            available_commands=commands,
+            invalid_command_count=session.invalid_command_count,
+            wrong_submit_count=session.wrong_submit_count,
+            open_node_ids=sorted(session.open_nodes),
+            locked_node_ids=sorted(session.locked_nodes),
+        )
+    @staticmethod
+    def _current_room(session: EpisodeSession | None) -> LiveCurrentRoom | None:
+        if session is None:
+            return None
+        node_by_id = {node.id: node for node in session.compiled.world.nodes}
+        room = node_by_id.get(session.current_room_id)
+        if room is None:
+            return None
+        visible_nodes = [
+            node.id
+            for node in session.compiled.world.nodes
+            if getattr(node, "parent_id", None) == session.current_room_id
+            and (node.type != "readable" or node.id in session.revealed_readables)
+        ]
+        visible_nodes.extend(
+            sorted(
+                door_id
+                for door_id, rooms in session.compiled.door_rooms.items()
+                if session.current_room_id in rooms
+            )
+        )
+        visible_items = sorted(
+            item_id
+            for item_id, location in session.item_locations.items()
+            if location == session.current_room_id
+        )
+        return LiveCurrentRoom(
+            id=room.id,
+            label=room.label,
+            description=room.description,
+            visible_node_ids=sorted(set(visible_nodes)),
+            visible_item_ids=visible_items,
+        )
+def load_live_payload(live_dir: Path, filename: str) -> bytes | None:
+    path = live_dir / filename
+    if not path.exists():
+        return None
+    return path.read_bytes()
+def load_live_state(live_dir: Path) -> dict[str, Any] | None:
+    payload = load_live_payload(live_dir, STATE_FILENAME)
+    if payload is None:
+        return None
+    return json.loads(payload)

agents/master/templates.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from __future__ import annotations
+from pathlib import Path
+DIST_INDEX = Path(__file__).resolve().parents[2] / "www" / "dist" / "index.html"
+def render_index() -> str:
+    if DIST_INDEX.is_file():
+        return DIST_INDEX.read_text(encoding="utf-8")
+    return """<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Viewer Not Built</title>
+    <style>
+      body {
+        margin: 0;
+        min-height: 100vh;
+        display: grid;
+        place-items: center;
+        font-family: ui-monospace, Menlo, Monaco, monospace;
+        background: #0a0d14;
+        color: #e7ebf2;
+      }
+      main {
+        max-width: 48rem;
+        padding: 2rem;
+      }
+      code {
+        color: #ffd86b;
+      }
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>Frontend build not found.</h1>
+      <p>Run <code>npm run dev</code> for the Vite app or <code>npm run build</code> to let the Python server serve the built site from <code>www/dist</code>.</p>
+    </main>
+  </body>
+</html>
+"""

agents/openenv_server/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """OpenEnv HTTP server entrypoints for dungeon environments."""
2	+

agents/openenv_server/__main__.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from __future__ import annotations
+import argparse
+from pathlib import Path
+import uvicorn
+from openenv.core.env_server import create_fastapi_app
+from agents.hero.env import HeroEnvironment
+from agents.hero.schema import HeroObservation, HeroServerAction
+from agents.master.env import DMEnvironment
+from agents.master.sample import load_world
+from agents.master.schema import DMAction, DMObservation
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Serve dungeon environments over OpenEnv HTTP/WebSocket APIs.")
+    parser.add_argument("role", choices=["dm", "hero"])
+    parser.add_argument("--host", default="127.0.0.1")
+    parser.add_argument("--port", type=int)
+    parser.add_argument("--world", type=Path, help="Optional world definition JSON for hero serving.")
+    parser.add_argument("--artifacts-root", type=Path)
+    parser.add_argument("--max-concurrent-envs", type=int, default=1)
+    parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    parser.add_argument("--interface-model")
+    parser.add_argument("--interface-narrate", action="store_true")
+    parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    args = parser.parse_args(argv)
+    interface_config = resolve_interface_config(
+        provider=args.interface_provider,
+        model_name=args.interface_model,
+        narrate_observations=args.interface_narrate,
+        translation_mode="corporate_app" if args.translate_corporate_env else None,
+    )
+    if args.role == "dm":
+        env_factory = lambda: DMEnvironment(
+            artifacts_root=args.artifacts_root,
+            interface_adapter=build_interface_adapter(interface_config),
+        )
+        action_cls = DMAction
+        observation_cls = DMObservation
+        default_port = 8001
+    else:
+        world_input = load_world(str(args.world)) if args.world is not None else None
+        env_factory = lambda: HeroEnvironment(
+            artifacts_root=args.artifacts_root,
+            world_input=world_input,
+            interface_adapter=build_interface_adapter(interface_config),
+        )
+        action_cls = HeroServerAction
+        observation_cls = HeroObservation
+        default_port = 8002
+    app = create_fastapi_app(
+        env=env_factory,
+        action_cls=action_cls,
+        observation_cls=observation_cls,
+        max_concurrent_envs=args.max_concurrent_envs,
+    )
+    uvicorn.run(app, host=args.host, port=args.port or default_port)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/shared/__init__.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""Shared helpers for agent environments and model adapters."""
+from .llm_client import (
+    DEFAULT_HF_DM_MODEL,
+    DEFAULT_HF_HERO_MODEL,
+    GeminiStructuredClient,
+    HuggingFaceStructuredClient,
+    StructuredModelClient,
+)
+from .model_schema import ModelMessage
+from .openenv_compat import OPENENV_AVAILABLE
+from .runtime import (
+    DEFAULT_INTERFACE_MODEL,
+    DEFAULT_INTERFACE_PROVIDER,
+    DEFAULT_INTERFACE_TRANSLATION_MODE,
+    InterfaceConfig,
+    InterfaceTranslationMode,
+    StructuredClientConfig,
+    build_interface_adapter,
+    create_structured_client,
+    resolve_interface_config,
+    resolve_structured_client_config,
+)
+__all__ = [
+    "build_interface_adapter",
+    "create_structured_client",
+    "DEFAULT_HF_DM_MODEL",
+    "DEFAULT_HF_HERO_MODEL",
+    "DEFAULT_INTERFACE_MODEL",
+    "DEFAULT_INTERFACE_PROVIDER",
+    "DEFAULT_INTERFACE_TRANSLATION_MODE",
+    "GeminiStructuredClient",
+    "HuggingFaceStructuredClient",
+    "InterfaceConfig",
+    "InterfaceTranslationMode",
+    "ModelMessage",
+    "OPENENV_AVAILABLE",
+    "resolve_interface_config",
+    "resolve_structured_client_config",
+    "StructuredModelClient",
+    "StructuredClientConfig",
+]

agents/shared/llm_client.py ADDED Viewed

	@@ -0,0 +1,415 @@

+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any, Protocol, TypeVar
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+from pydantic import BaseModel
+from .model_schema import ModelMessage
+try:
+    from trl.chat_template_utils import qwen3_chat_template
+except Exception:  # pragma: no cover - optional runtime dependency
+    qwen3_chat_template = None  # type: ignore[assignment]
+ResponseModelT = TypeVar("ResponseModelT", bound=BaseModel)
+DEFAULT_GEMINI_DM_MODEL = "gemini-2.5-flash"
+DEFAULT_GEMINI_HERO_MODEL = "gemini-2.5-flash"
+DEFAULT_HF_DM_MODEL = "Qwen/Qwen3-32B"
+DEFAULT_HF_HERO_MODEL = "Qwen/Qwen3-32B"
+PROVIDER_GEMINI = "gemini"
+PROVIDER_HF_LOCAL = "hf_local"
+class StructuredModelClient(Protocol):
+    def generate_structured(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        ...
+class GeminiStructuredClient:
+    def __init__(self, api_key: str | None = None) -> None:
+        self._client = self._create_client(api_key)
+    def generate_structured(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        failures: list[str] = []
+        strategies = (
+            self._generate_with_response_schema,
+            self._generate_with_json_mode,
+            self._generate_with_prompt_only,
+        )
+        for strategy in strategies:
+            try:
+                return strategy(
+                    messages,
+                    response_model,
+                    model_name=model_name,
+                    temperature=temperature,
+                    max_output_tokens=max_output_tokens,
+                )
+            except Exception as exc:
+                failures.append(f"{strategy.__name__}: {self._normalize_error(exc)}")
+        raise RuntimeError("Gemini structured generation failed. " + " | ".join(failures))
+    def _generate_with_response_schema(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        system_instruction, contents = self._split_messages(messages)
+        response = self._client.models.generate_content(
+            model=model_name,
+            contents=contents,
+            config=types.GenerateContentConfig(
+                system_instruction=system_instruction,
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                response_mime_type="application/json",
+                response_schema=response_model,
+                candidate_count=1,
+            ),
+        )
+        parsed = getattr(response, "parsed", None)
+        if parsed is not None:
+            return response_model.model_validate(parsed)
+        text = getattr(response, "text", None)
+        if isinstance(text, str) and text.strip():
+            return response_model.model_validate_json(text)
+        raise RuntimeError("Gemini returned an empty structured response.")
+    def _generate_with_json_mode(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        prompt = self._json_prompt(messages, response_model)
+        response = self._client.models.generate_content(
+            model=model_name,
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                response_mime_type="application/json",
+                candidate_count=1,
+            ),
+        )
+        text = getattr(response, "text", None)
+        if not isinstance(text, str) or not text.strip():
+            raise RuntimeError("Gemini returned an empty JSON-mode response.")
+        return response_model.model_validate_json(text)
+    def _generate_with_prompt_only(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        prompt = self._json_prompt(messages, response_model)
+        response = self._client.models.generate_content(
+            model=model_name,
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                candidate_count=1,
+            ),
+        )
+        text = getattr(response, "text", None)
+        if not isinstance(text, str) or not text.strip():
+            raise RuntimeError("Gemini returned an empty prompt-only response.")
+        return response_model.model_validate_json(self._extract_json_object(text))
+    def _create_client(self, api_key: str | None) -> genai.Client:
+        load_dotenv(self._repo_root() / ".env", override=False)
+        key = api_key or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        if not key:
+            raise RuntimeError("Missing GEMINI_API_KEY or GOOGLE_API_KEY.")
+        return genai.Client(api_key=key)
+    @staticmethod
+    def _repo_root() -> Path:
+        return Path(__file__).resolve().parents[2]
+    @staticmethod
+    def _split_messages(messages: list[ModelMessage]) -> tuple[str | None, list[str]]:
+        system_parts: list[str] = []
+        content_parts: list[str] = []
+        for message in messages:
+            if message.role == "system":
+                system_parts.append(message.content)
+                continue
+            content_parts.append(f"{message.role.upper()}:\n{message.content}")
+        system_instruction = "\n\n".join(system_parts) if system_parts else None
+        contents = ["\n\n".join(content_parts)] if content_parts else [""]
+        return system_instruction, contents
+    @staticmethod
+    def _json_prompt(
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+    ) -> str:
+        message_blocks = [f"{message.role.upper()}:\n{message.content}" for message in messages]
+        schema = _schema_prompt_snippet(response_model)
+        conversation = "\n\n".join(message_blocks)
+        return (
+            "Return exactly one valid JSON object and nothing else.\n"
+            "Do not use markdown fences.\n"
+            "Use compact JSON with no commentary.\n"
+            f"JSON Schema:\n{schema}\n\n"
+            f"Conversation:\n{conversation}\n"
+        )
+    @staticmethod
+    def _extract_json_object(text: str) -> str:
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            cleaned = cleaned.strip("`")
+            if cleaned.startswith("json"):
+                cleaned = cleaned[4:].lstrip()
+        start = cleaned.find("{")
+        end = cleaned.rfind("}")
+        if start == -1 or end == -1 or end < start:
+            raise RuntimeError("Gemini response did not contain a JSON object.")
+        return cleaned[start : end + 1]
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        return " ".join(str(exc).split()) or exc.__class__.__name__
+class HuggingFaceStructuredClient:
+    def __init__(
+        self,
+        *,
+        adapter_path: str | None = None,
+        cache_dir: str | None = None,
+        load_in_4bit: bool = True,
+        trust_remote_code: bool = False,
+        device_map: str | None = "auto",
+    ) -> None:
+        self.adapter_path = adapter_path
+        self.cache_dir = cache_dir
+        self.load_in_4bit = load_in_4bit
+        self.trust_remote_code = trust_remote_code
+        self.device_map = device_map
+        self._loaded_model_name: str | None = None
+        self._model: Any | None = None
+        self._tokenizer: Any | None = None
+    def generate_structured(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        tokenizer, model = self._ensure_model(model_name)
+        prompt = self._hf_prompt(messages, response_model)
+        rendered = self._render_prompt(tokenizer, prompt)
+        tokenized = tokenizer(rendered, return_tensors="pt")
+        tokenized = {key: value.to(model.device) for key, value in tokenized.items()}
+        generate_kwargs: dict[str, Any] = {
+            "max_new_tokens": max_output_tokens,
+            "do_sample": temperature > 0.0,
+            "temperature": max(temperature, 1e-5) if temperature > 0.0 else None,
+            "pad_token_id": getattr(tokenizer, "pad_token_id", None) or getattr(tokenizer, "eos_token_id", None),
+            "eos_token_id": getattr(tokenizer, "eos_token_id", None),
+        }
+        generate_kwargs = {key: value for key, value in generate_kwargs.items() if value is not None}
+        import torch
+        with torch.inference_mode():
+            output_ids = model.generate(**tokenized, **generate_kwargs)
+        prompt_length = tokenized["input_ids"].shape[1]
+        completion_ids = output_ids[0][prompt_length:]
+        text = tokenizer.decode(completion_ids, skip_special_tokens=True)
+        if not text.strip():
+            raise RuntimeError("Hugging Face model returned an empty response.")
+        return response_model.model_validate_json(self._extract_json_object(text))
+    def _ensure_model(self, model_name: str) -> tuple[Any, Any]:
+        if self._model is not None and self._tokenizer is not None and self._loaded_model_name == model_name:
+            return self._tokenizer, self._model
+        load_dotenv(self._repo_root() / ".env", override=False)
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            cache_dir=self.cache_dir,
+            trust_remote_code=self.trust_remote_code,
+            token=_hf_token(),
+        )
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        tokenizer = self._canonicalize_chat_template(tokenizer)
+        model_kwargs: dict[str, Any] = {
+            "cache_dir": self.cache_dir,
+            "trust_remote_code": self.trust_remote_code,
+            "token": _hf_token(),
+        }
+        model_kwargs.update(_hf_model_init_kwargs(load_in_4bit=self.load_in_4bit, device_map=self.device_map))
+        model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
+        if self.adapter_path:
+            from peft import PeftModel
+            model = PeftModel.from_pretrained(model, self.adapter_path, is_trainable=False)
+        model.eval()
+        self._loaded_model_name = model_name
+        self._model = model
+        self._tokenizer = tokenizer
+        return tokenizer, model
+    @staticmethod
+    def _repo_root() -> Path:
+        return Path(__file__).resolve().parents[2]
+    @staticmethod
+    def _render_prompt(tokenizer: Any, prompt: str) -> str:
+        if hasattr(tokenizer, "apply_chat_template"):
+            chat_template_kwargs = HuggingFaceStructuredClient._chat_template_kwargs(tokenizer)
+            return tokenizer.apply_chat_template(
+                [
+                    {"role": "system", "content": "Return exactly one valid JSON object and nothing else."},
+                    {"role": "user", "content": prompt},
+                ],
+                tokenize=False,
+                add_generation_prompt=True,
+                **chat_template_kwargs,
+            )
+        return prompt
+    @staticmethod
+    def _canonicalize_chat_template(tokenizer: Any) -> Any:
+        chat_template = getattr(tokenizer, "chat_template", "") or ""
+        if qwen3_chat_template is None:
+            return tokenizer
+        if "<|im_start|>" not in chat_template or "<|im_end|>" not in chat_template:
+            return tokenizer
+        tokenizer.chat_template = qwen3_chat_template
+        return tokenizer
+    @staticmethod
+    def _chat_template_kwargs(tokenizer: Any) -> dict[str, Any]:
+        if not hasattr(tokenizer, "apply_chat_template"):
+            return {}
+        try:
+            tokenizer.apply_chat_template(
+                [{"role": "user", "content": "ping"}],
+                tokenize=False,
+                add_generation_prompt=True,
+                enable_thinking=False,
+            )
+        except Exception:
+            return {}
+        return {"enable_thinking": False}
+    @staticmethod
+    def _hf_prompt(
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+    ) -> str:
+        schema = _schema_prompt_snippet(response_model)
+        conversation = "\n\n".join(f"{message.role.upper()}:\n{message.content}" for message in messages)
+        return (
+            "Respond with exactly one compact JSON object and no other text.\n"
+            "Do not use markdown fences.\n"
+            f"JSON Schema:\n{schema}\n\n"
+            f"Conversation:\n{conversation}\n"
+        )
+    @staticmethod
+    def _extract_json_object(text: str) -> str:
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            cleaned = cleaned.strip("`")
+            if cleaned.startswith("json"):
+                cleaned = cleaned[4:].lstrip()
+        start = cleaned.find("{")
+        end = cleaned.rfind("}")
+        if start == -1 or end == -1 or end < start:
+            raise RuntimeError("Hugging Face response did not contain a JSON object.")
+        return cleaned[start : end + 1]
+def _schema_prompt_snippet(response_model: type[ResponseModelT]) -> str:
+    schema = response_model.model_json_schema()
+    serialized = json.dumps(schema, separators=(",", ":"))
+    if len(serialized) <= 4000:
+        return serialized
+    summarized = {
+        "title": schema.get("title", response_model.__name__),
+        "type": schema.get("type", "object"),
+        "required": schema.get("required", []),
+        "properties": {
+            key: {
+                field_name: value
+                for field_name, value in property_schema.items()
+                if field_name in {"type", "title", "enum", "items", "required", "$ref", "description"}
+            }
+            for key, property_schema in schema.get("properties", {}).items()
+        },
+        "defs": sorted(schema.get("$defs", {}).keys()),
+    }
+    return json.dumps(summarized, separators=(",", ":"))
+def _hf_model_init_kwargs(*, load_in_4bit: bool, device_map: str | None) -> dict[str, Any]:
+    import torch
+    kwargs: dict[str, Any] = {
+        "torch_dtype": torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+    }
+    if device_map is not None and torch.cuda.is_available():
+        kwargs["device_map"] = device_map
+    if load_in_4bit and torch.cuda.is_available():
+        from transformers import BitsAndBytesConfig
+        kwargs["quantization_config"] = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+    return kwargs
+def _hf_token() -> str | None:
+    return os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")

agents/shared/model_schema.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from __future__ import annotations
+from typing import Literal
+from pydantic import BaseModel, ConfigDict
+class StrictModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+class ModelMessage(StrictModel):
+    role: Literal["system", "user", "assistant"]
+    content: str

agents/shared/openenv_compat.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Generic, Optional, TypeVar
+from pydantic import BaseModel, ConfigDict, Field
+ObsT = TypeVar("ObsT")
+ActT = TypeVar("ActT")
+StateT = TypeVar("StateT")
+try:  # pragma: no cover - exercised when openenv-core is installed
+    from openenv.core.client_types import StepResult as OpenEnvStepResult
+    from openenv.core.env_server.interfaces import Environment as OpenEnvEnvironment
+    from openenv.core.env_server.types import (
+        Action as OpenEnvAction,
+        EnvironmentMetadata as OpenEnvEnvironmentMetadata,
+        Observation as OpenEnvObservation,
+        State as OpenEnvState,
+    )
+    OPENENV_AVAILABLE = True
+except ImportError:  # pragma: no cover - lightweight fallback for local imports/tests
+    OPENENV_AVAILABLE = False
+    class Action(BaseModel):
+        model_config = ConfigDict(
+            extra="forbid",
+            validate_assignment=True,
+            arbitrary_types_allowed=True,
+        )
+        metadata: dict[str, Any] = Field(default_factory=dict)
+    class Observation(BaseModel):
+        model_config = ConfigDict(
+            extra="forbid",
+            validate_assignment=True,
+            arbitrary_types_allowed=True,
+        )
+        done: bool = False
+        reward: bool | int | float | None = None
+        metadata: dict[str, Any] = Field(default_factory=dict)
+    class State(BaseModel):
+        model_config = ConfigDict(
+            extra="allow",
+            validate_assignment=True,
+            arbitrary_types_allowed=True,
+        )
+        episode_id: str | None = None
+        step_count: int = 0
+    class EnvironmentMetadata(BaseModel):
+        model_config = ConfigDict(extra="forbid")
+        name: str
+        description: str
+        version: str | None = None
+    @dataclass
+    class StepResult(Generic[ObsT]):
+        observation: ObsT
+        reward: Optional[float] = None
+        done: bool = False
+    class Environment(Generic[ActT, ObsT, StateT]):
+        SUPPORTS_CONCURRENT_SESSIONS: bool = False
+        def __init__(self, transform: Any | None = None) -> None:
+            self.transform = transform
+        def reset(
+            self,
+            seed: Optional[int] = None,
+            episode_id: Optional[str] = None,
+            **kwargs: Any,
+        ) -> ObsT:
+            raise NotImplementedError
+        def step(
+            self,
+            action: ActT,
+            timeout_s: Optional[float] = None,
+            **kwargs: Any,
+        ) -> ObsT:
+            raise NotImplementedError
+        @property
+        def state(self) -> StateT:
+            raise NotImplementedError
+        def get_metadata(self) -> EnvironmentMetadata:
+            return EnvironmentMetadata(
+                name=self.__class__.__name__,
+                description=f"{self.__class__.__name__} environment",
+                version="1.0.0",
+            )
+        def _apply_transform(self, observation: ObsT) -> ObsT:
+            return observation if self.transform is None else self.transform(observation)
+        def close(self) -> None:
+            return None
+else:
+    Action = OpenEnvAction
+    Observation = OpenEnvObservation
+    State = OpenEnvState
+    Environment = OpenEnvEnvironment
+    EnvironmentMetadata = OpenEnvEnvironmentMetadata
+    StepResult = OpenEnvStepResult
+def build_step_result(observation: ObsT) -> StepResult[ObsT]:
+    reward = getattr(observation, "reward", None)
+    if reward is not None:
+        reward = float(reward)
+    return StepResult(
+        observation=observation,
+        reward=reward,
+        done=bool(getattr(observation, "done", False)),
+    )

agents/shared/runtime.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from __future__ import annotations
+import os
+from dataclasses import dataclass
+from typing import Literal
+from .llm_client import (
+    DEFAULT_GEMINI_DM_MODEL,
+    DEFAULT_GEMINI_HERO_MODEL,
+    DEFAULT_HF_DM_MODEL,
+    DEFAULT_HF_HERO_MODEL,
+    GeminiStructuredClient,
+    HuggingFaceStructuredClient,
+    PROVIDER_GEMINI,
+    PROVIDER_HF_LOCAL,
+    StructuredModelClient,
+)
+StructuredProvider = Literal["gemini", "hf_local"]
+InterfaceProvider = Literal["strict", "simple", "gemini"]
+InterfaceTranslationMode = Literal["none", "corporate_app"]
+RoleName = Literal["dm", "hero"]
+DEFAULT_INTERFACE_PROVIDER: InterfaceProvider = "strict"
+DEFAULT_INTERFACE_MODEL = "gemini-2.5-flash-lite"
+DEFAULT_INTERFACE_TRANSLATION_MODE: InterfaceTranslationMode = "none"
+@dataclass(frozen=True)
+class StructuredClientConfig:
+    role: RoleName
+    provider: StructuredProvider
+    model_name: str
+    adapter_path: str | None = None
+    cache_dir: str | None = None
+    load_in_4bit: bool = True
+    trust_remote_code: bool = False
+@dataclass(frozen=True)
+class InterfaceConfig:
+    provider: InterfaceProvider
+    model_name: str = DEFAULT_INTERFACE_MODEL
+    narrate_observations: bool = False
+    translation_mode: InterfaceTranslationMode = DEFAULT_INTERFACE_TRANSLATION_MODE
+def resolve_structured_client_config(
+    role: RoleName,
+    *,
+    provider: StructuredProvider | None = None,
+    model_name: str | None = None,
+    adapter_path: str | None = None,
+) -> StructuredClientConfig:
+    env_prefix = f"DND_{role.upper()}"
+    resolved_provider = provider or _structured_provider_from_env(os.getenv(f"{env_prefix}_PROVIDER")) or PROVIDER_GEMINI
+    if resolved_provider == PROVIDER_HF_LOCAL:
+        default_model = DEFAULT_HF_DM_MODEL if role == "dm" else DEFAULT_HF_HERO_MODEL
+    else:
+        default_model = DEFAULT_GEMINI_DM_MODEL if role == "dm" else DEFAULT_GEMINI_HERO_MODEL
+    return StructuredClientConfig(
+        role=role,
+        provider=resolved_provider,
+        model_name=model_name or os.getenv(f"{env_prefix}_MODEL") or default_model,
+        adapter_path=adapter_path or os.getenv(f"{env_prefix}_ADAPTER_PATH"),
+        cache_dir=os.getenv("HF_HOME"),
+        load_in_4bit=_env_bool("DND_LOAD_IN_4BIT", default=True),
+        trust_remote_code=_env_bool("DND_TRUST_REMOTE_CODE", default=False),
+    )
+def create_structured_client(config: StructuredClientConfig) -> StructuredModelClient:
+    if config.provider == PROVIDER_GEMINI:
+        return GeminiStructuredClient()
+    if config.provider == PROVIDER_HF_LOCAL:
+        return HuggingFaceStructuredClient(
+            adapter_path=config.adapter_path,
+            cache_dir=config.cache_dir,
+            load_in_4bit=config.load_in_4bit,
+            trust_remote_code=config.trust_remote_code,
+        )
+    raise ValueError(f"Unsupported structured provider: {config.provider}")
+def resolve_interface_config(
+    *,
+    provider: InterfaceProvider | None = None,
+    model_name: str | None = None,
+    narrate_observations: bool | None = None,
+    translation_mode: InterfaceTranslationMode | None = None,
+) -> InterfaceConfig:
+    resolved_translation = (
+        translation_mode
+        or _interface_translation_mode_from_env(os.getenv("DND_INTERFACE_TRANSLATION_MODE"))
+        or DEFAULT_INTERFACE_TRANSLATION_MODE
+    )
+    resolved_provider = provider or _interface_provider_from_env(os.getenv("DND_INTERFACE_PROVIDER"))
+    if resolved_provider is None:
+        resolved_provider = "gemini" if resolved_translation != "none" else DEFAULT_INTERFACE_PROVIDER
+    resolved_narrate = narrate_observations
+    if resolved_narrate is None:
+        resolved_narrate = _env_bool("DND_INTERFACE_NARRATE", default=False)
+    if resolved_translation != "none" and resolved_provider != "gemini":
+        raise ValueError("Interface translation mode requires the Gemini interface provider.")
+    return InterfaceConfig(
+        provider=resolved_provider,
+        model_name=model_name or os.getenv("DND_INTERFACE_MODEL") or DEFAULT_INTERFACE_MODEL,
+        narrate_observations=resolved_narrate,
+        translation_mode=resolved_translation,
+    )
+def build_interface_adapter(config: InterfaceConfig):
+    from agents.master.interface import GeminiInterfaceAdapter, SimpleInterfaceAdapter, StrictCliInterfaceAdapter
+    if config.provider == "strict":
+        return StrictCliInterfaceAdapter()
+    if config.provider == "simple":
+        return SimpleInterfaceAdapter()
+    if config.provider == "gemini":
+        return GeminiInterfaceAdapter(
+            model=config.model_name,
+            narrate_observations=config.narrate_observations,
+            translation_mode=config.translation_mode,
+        )
+    raise ValueError(f"Unsupported interface provider: {config.provider}")
+def _structured_provider_from_env(value: str | None) -> StructuredProvider | None:
+    if value is None:
+        return None
+    normalized = value.strip().lower()
+    if normalized not in {PROVIDER_GEMINI, PROVIDER_HF_LOCAL}:
+        raise ValueError(f"Unsupported structured provider value: {value}")
+    return normalized  # type: ignore[return-value]
+def _interface_provider_from_env(value: str | None) -> InterfaceProvider | None:
+    if value is None:
+        return None
+    normalized = value.strip().lower()
+    if normalized not in {"strict", "simple", "gemini"}:
+        raise ValueError(f"Unsupported interface provider value: {value}")
+    return normalized  # type: ignore[return-value]
+def _interface_translation_mode_from_env(value: str | None) -> InterfaceTranslationMode | None:
+    if value is None:
+        return None
+    normalized = value.strip().lower()
+    if normalized not in {"none", "corporate_app"}:
+        raise ValueError(f"Unsupported interface translation mode value: {value}")
+    return normalized  # type: ignore[return-value]
+def _env_bool(name: str, *, default: bool) -> bool:
+    raw = os.getenv(name)
+    if raw is None:
+        return default
+    normalized = raw.strip().lower()
+    if normalized in {"1", "true", "yes", "on"}:
+        return True
+    if normalized in {"0", "false", "no", "off"}:
+        return False
+    raise ValueError(f"Environment variable {name} must be a boolean value, got {raw!r}")

agents/spaces/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""Hugging Face Space wrapper apps for the dungeon environments."""
+from .dm_space import LatestWorldOutputStore, SpaceDMEnvironment, create_app as create_dm_space_app
+from .hero_space import SpaceHeroEnvironment, UploadedWorldStore, create_app as create_hero_space_app
+__all__ = [
+    "LatestWorldOutputStore",
+    "SpaceDMEnvironment",
+    "SpaceHeroEnvironment",
+    "UploadedWorldStore",
+    "create_dm_space_app",
+    "create_hero_space_app",
+]

agents/spaces/dm_space.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from html import escape
+from pathlib import Path
+from threading import Lock
+from typing import Any, Callable
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse, HTMLResponse
+import uvicorn
+from agents.master.env import DMEnvironment
+from agents.master.schema import CompiledWorld, DMAction, DMObservation, WorldDefinition
+from agents.shared.openenv_compat import StepResult
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+DEFAULT_ARTIFACTS_ROOT = Path("/tmp/dnd_dm_artifacts")
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 8000
+DEFAULT_MAX_CONCURRENT_ENVS = 1
+@dataclass(frozen=True)
+class LatestWorldSnapshot:
+    episode_id: str
+    title: str
+    path: Path
+    updated_at: str
+class LatestWorldOutputStore:
+    def __init__(self) -> None:
+        self._lock = Lock()
+        self._snapshot: LatestWorldSnapshot | None = None
+    def record(self, compiled: CompiledWorld) -> None:
+        path = compiled.artifacts_dir / "world_definition.normalized.json"
+        if not path.is_file():
+            return
+        snapshot = LatestWorldSnapshot(
+            episode_id=compiled.episode_id,
+            title=compiled.world.meta.title,
+            path=path,
+            updated_at=datetime.now(timezone.utc).isoformat(),
+        )
+        with self._lock:
+            self._snapshot = snapshot
+    def latest_path(self) -> Path | None:
+        snapshot = self.snapshot()
+        return None if snapshot is None else snapshot.path
+    def snapshot(self) -> LatestWorldSnapshot | None:
+        with self._lock:
+            return self._snapshot
+class SpaceDMEnvironment(DMEnvironment):
+    def __init__(self, *, world_output_store: LatestWorldOutputStore, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._world_output_store = world_output_store
+    def step(  # type: ignore[override]
+        self,
+        action: DMAction | WorldDefinition | dict[str, Any],
+        runner: Any | None = None,
+        observer: Any | None = None,
+        timeout_s: float | None = None,
+    ) -> StepResult[DMObservation]:
+        result = super().step(action, runner=runner, observer=observer, timeout_s=timeout_s)
+        observation = result.observation
+        if observation.compile_error is None and self.last_compiled_world is not None:
+            self._world_output_store.record(self.last_compiled_world)
+        return result
+def create_app(
+    *,
+    openenv_app_factory: Callable[..., Any] | None = None,
+    world_output_store: LatestWorldOutputStore | None = None,
+    artifacts_root: Path = DEFAULT_ARTIFACTS_ROOT,
+    max_concurrent_envs: int = DEFAULT_MAX_CONCURRENT_ENVS,
+) -> FastAPI:
+    if openenv_app_factory is None:
+        from openenv.core.env_server import create_fastapi_app as openenv_app_factory
+    store = world_output_store or LatestWorldOutputStore()
+    interface_adapter = build_interface_adapter(resolve_interface_config(provider="strict"))
+    env_app = openenv_app_factory(
+        env=lambda: SpaceDMEnvironment(
+            artifacts_root=artifacts_root,
+            interface_adapter=interface_adapter,
+            world_output_store=store,
+        ),
+        action_cls=DMAction,
+        observation_cls=DMObservation,
+        max_concurrent_envs=max_concurrent_envs,
+    )
+    app = FastAPI(title="DND-DM")
+    app.state.world_output_store = store
+    app.mount("/env", env_app)
+    @app.get("/", response_class=HTMLResponse)
+    def index() -> str:
+        return _render_index(store.snapshot())
+    @app.get("/healthz")
+    def healthz() -> dict[str, bool]:
+        return {"ok": True}
+    @app.get("/world-output/latest")
+    def latest_world_output() -> FileResponse:
+        path = store.latest_path()
+        if path is None or not path.is_file():
+            raise HTTPException(status_code=404, detail="No successful normalized world output is available yet.")
+        return FileResponse(
+            path,
+            media_type="application/json",
+            filename="world_definition.normalized.json",
+        )
+    return app
+def _render_index(snapshot: LatestWorldSnapshot | None) -> str:
+    latest_html = (
+        "<p>No successful normalized world output has been recorded yet.</p>"
+        if snapshot is None
+        else (
+            "<p>"
+            f"Latest world: <strong>{escape(snapshot.title)}</strong> "
+            f"(episode <code>{escape(snapshot.episode_id)}</code>, updated {escape(snapshot.updated_at)}). "
+            '<a href="/world-output/latest">Download normalized world JSON</a>.'
+            "</p>"
+        )
+    )
+    return f"""<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>DND-DM</title>
+    <style>
+      body {{
+        font-family: "IBM Plex Sans", "Helvetica Neue", sans-serif;
+        margin: 0;
+        background: #f4efe5;
+        color: #1b1a17;
+      }}
+      main {{
+        max-width: 760px;
+        margin: 0 auto;
+        padding: 48px 24px 64px;
+      }}
+      a {{ color: #0b5c78; }}
+      code {{
+        background: rgba(11, 92, 120, 0.08);
+        padding: 0.15rem 0.35rem;
+        border-radius: 0.3rem;
+      }}
+      .panel {{
+        border: 1px solid rgba(27, 26, 23, 0.12);
+        background: rgba(255, 255, 255, 0.72);
+        border-radius: 18px;
+        padding: 20px 22px;
+        margin-top: 18px;
+      }}
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>DND-DM</h1>
+      <p>This Space hosts the dungeon DM OpenEnv environment as a CPU-only evaluator.</p>
+      <div class="panel">
+        <p>The OpenEnv API is mounted at <a href="/env"><code>/env</code></a>.</p>
+        <p>The DM evaluates submitted world definitions and writes the latest normalized JSON artifact for manual handoff to <code>DND-Hero</code>.</p>
+        {latest_html}
+      </div>
+    </main>
+  </body>
+</html>"""
+def main() -> int:
+    uvicorn.run("agents.spaces.dm_space:create_app", factory=True, host=DEFAULT_HOST, port=DEFAULT_PORT)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/spaces/hero_space.py ADDED Viewed

	@@ -0,0 +1,271 @@

+from __future__ import annotations
+from copy import deepcopy
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from html import escape
+import json
+from pathlib import Path
+from threading import Lock
+from typing import Any, Callable
+from fastapi import FastAPI, File, HTTPException, Request, UploadFile
+from fastapi.responses import HTMLResponse, JSONResponse, Response
+import uvicorn
+from agents.hero.env import HeroEnvironment
+from agents.hero.schema import HeroObservation, HeroServerAction
+from agents.master.check import DMCompileError, validate_and_normalize
+from agents.master.schema import WorldDefinition
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+DEFAULT_ARTIFACTS_ROOT = Path("/tmp/dnd_hero_artifacts")
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 8000
+DEFAULT_MAX_CONCURRENT_ENVS = 1
+@dataclass(frozen=True)
+class UploadedWorldSnapshot:
+    world_input: dict[str, Any]
+    title: str
+    size_bytes: int
+    updated_at: str
+class UploadedWorldStore:
+    def __init__(self) -> None:
+        self._lock = Lock()
+        self._snapshot: UploadedWorldSnapshot | None = None
+    def set_world(self, world: WorldDefinition | dict[str, Any]) -> UploadedWorldSnapshot:
+        if isinstance(world, dict):
+            world = validate_and_normalize(world)
+        world_input = world.model_dump(mode="json")
+        snapshot = UploadedWorldSnapshot(
+            world_input=world_input,
+            title=world.meta.title,
+            size_bytes=len(json.dumps(world_input).encode("utf-8")),
+            updated_at=datetime.now(timezone.utc).isoformat(),
+        )
+        with self._lock:
+            self._snapshot = snapshot
+        return snapshot
+    def clear(self) -> None:
+        with self._lock:
+            self._snapshot = None
+    def current_world(self) -> dict[str, Any] | None:
+        snapshot = self.snapshot()
+        return None if snapshot is None else deepcopy(snapshot.world_input)
+    def snapshot(self) -> UploadedWorldSnapshot | None:
+        with self._lock:
+            return self._snapshot
+    def metadata(self) -> dict[str, Any]:
+        snapshot = self.snapshot()
+        if snapshot is None:
+            return {"configured": False}
+        return {
+            "configured": True,
+            "title": snapshot.title,
+            "size_bytes": snapshot.size_bytes,
+            "updated_at": snapshot.updated_at,
+        }
+class SpaceHeroEnvironment(HeroEnvironment):
+    def __init__(self, *, uploaded_world_store: UploadedWorldStore, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._uploaded_world_store = uploaded_world_store
+    def reset(  # type: ignore[override]
+        self,
+        world_input: Any | None = None,
+        *,
+        seed: int | None = None,
+        episode_id: str | None = None,
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int | None = None,
+        debug: bool | None = None,
+    ) -> HeroObservation:
+        selected_world_input = world_input
+        if selected_world_input is None:
+            selected_world_input = self._uploaded_world_store.current_world()
+        if selected_world_input is None:
+            raise ValueError(
+                "Upload a world JSON to /world-input or pass world_input explicitly before resetting DND-Hero."
+            )
+        return super().reset(
+            selected_world_input,
+            seed=seed,
+            episode_id=episode_id,
+            max_game_steps=max_game_steps,
+            max_tool_calls=max_tool_calls,
+            scratchpad_max_chars=scratchpad_max_chars,
+            debug=debug,
+        )
+def create_app(
+    *,
+    openenv_app_factory: Callable[..., Any] | None = None,
+    uploaded_world_store: UploadedWorldStore | None = None,
+    artifacts_root: Path = DEFAULT_ARTIFACTS_ROOT,
+    max_concurrent_envs: int = DEFAULT_MAX_CONCURRENT_ENVS,
+) -> FastAPI:
+    if openenv_app_factory is None:
+        from openenv.core.env_server import create_fastapi_app as openenv_app_factory
+    store = uploaded_world_store or UploadedWorldStore()
+    interface_adapter = build_interface_adapter(resolve_interface_config(provider="strict"))
+    env_app = openenv_app_factory(
+        env=lambda: SpaceHeroEnvironment(
+            artifacts_root=artifacts_root,
+            uploaded_world_store=store,
+            interface_adapter=interface_adapter,
+        ),
+        action_cls=HeroServerAction,
+        observation_cls=HeroObservation,
+        max_concurrent_envs=max_concurrent_envs,
+    )
+    app = FastAPI(title="DND-Hero")
+    app.state.uploaded_world_store = store
+    app.mount("/env", env_app)
+    @app.get("/", response_class=HTMLResponse)
+    def index() -> str:
+        return _render_index(store.metadata())
+    @app.get("/healthz")
+    def healthz() -> dict[str, bool]:
+        return {"ok": True}
+    @app.post("/world-input")
+    async def upload_world_input(
+        request: Request,
+        file: UploadFile | None = File(default=None),
+    ) -> JSONResponse:
+        payload = await file.read() if file is not None else await request.body()
+        if not payload:
+            raise HTTPException(status_code=400, detail="Provide a world JSON file upload or a raw JSON request body.")
+        try:
+            raw_world = json.loads(payload.decode("utf-8"))
+        except UnicodeDecodeError as exc:
+            raise HTTPException(status_code=400, detail="World input must be UTF-8 JSON.") from exc
+        except json.JSONDecodeError as exc:
+            raise HTTPException(status_code=400, detail=f"Invalid JSON: {exc.msg}") from exc
+        if not isinstance(raw_world, dict):
+            raise HTTPException(status_code=400, detail="World input JSON must be an object.")
+        try:
+            world = validate_and_normalize(raw_world)
+        except DMCompileError as exc:
+            raise HTTPException(status_code=400, detail=str(exc)) from exc
+        snapshot = store.set_world(world)
+        return JSONResponse(
+            {
+                "configured": True,
+                "title": snapshot.title,
+                "size_bytes": snapshot.size_bytes,
+                "updated_at": snapshot.updated_at,
+            }
+        )
+    @app.get("/world-input")
+    def world_input_metadata() -> JSONResponse:
+        return JSONResponse(store.metadata())
+    @app.delete("/world-input", status_code=204)
+    def clear_world_input() -> Response:
+        store.clear()
+        return Response(status_code=204)
+    return app
+def _render_index(metadata: dict[str, Any]) -> str:
+    current_world_html = (
+        "<p>No default world is uploaded yet.</p>"
+        if not metadata.get("configured")
+        else (
+            "<p>"
+            f"Current uploaded world: <strong>{escape(str(metadata['title']))}</strong> "
+            f"({escape(str(metadata['size_bytes']))} bytes, updated {escape(str(metadata['updated_at']))})."
+            "</p>"
+        )
+    )
+    return f"""<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>DND-Hero</title>
+    <style>
+      body {{
+        font-family: "IBM Plex Sans", "Helvetica Neue", sans-serif;
+        margin: 0;
+        background: #eef4eb;
+        color: #182118;
+      }}
+      main {{
+        max-width: 760px;
+        margin: 0 auto;
+        padding: 48px 24px 64px;
+      }}
+      a {{ color: #146042; }}
+      code {{
+        background: rgba(20, 96, 66, 0.08);
+        padding: 0.15rem 0.35rem;
+        border-radius: 0.3rem;
+      }}
+      .panel {{
+        border: 1px solid rgba(24, 33, 24, 0.12);
+        background: rgba(255, 255, 255, 0.76);
+        border-radius: 18px;
+        padding: 20px 22px;
+        margin-top: 18px;
+      }}
+      input[type="file"] {{
+        display: block;
+        margin-bottom: 12px;
+      }}
+      button {{
+        background: #146042;
+        color: white;
+        border: 0;
+        border-radius: 999px;
+        padding: 0.7rem 1rem;
+        cursor: pointer;
+      }}
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>DND-Hero</h1>
+      <p>This Space hosts the dungeon Hero OpenEnv environment as a CPU-only evaluator.</p>
+      <div class="panel">
+        <p>The OpenEnv API is mounted at <a href="/env"><code>/env</code></a>.</p>
+        <p>Upload a normalized world-definition JSON file from <code>DND-DM</code> to make it the default world for future hero resets.</p>
+        {current_world_html}
+        <form action="/world-input" method="post" enctype="multipart/form-data">
+          <input type="file" name="file" accept="application/json,.json" required>
+          <button type="submit">Upload World JSON</button>
+        </form>
+      </div>
+    </main>
+  </body>
+</html>"""
+def main() -> int:
+    uvicorn.run("agents.spaces.hero_space:create_app", factory=True, host=DEFAULT_HOST, port=DEFAULT_PORT)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/train/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Training entrypoints for GRPO-based experiments."""
2	+

agents/train/__main__.py ADDED Viewed

	@@ -0,0 +1,361 @@

+from __future__ import annotations
+import argparse
+import json
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from agents.shared.llm_client import DEFAULT_HF_DM_MODEL, DEFAULT_HF_HERO_MODEL
+from .grpo import (
+    DMClosedLoopConfig,
+    GRPOLaunchConfig,
+    SUPPORTED_GRPO_LOSS_TYPES,
+    SUPPORTED_IMPORTANCE_SAMPLING_LEVELS,
+    build_dm_grpo_dataset,
+    build_hero_grpo_dataset,
+    run_dm_grpo,
+    run_hero_grpo,
+)
+from .joint import JointTrainingConfig, run_joint_training_loop
+def main(argv: list[str] | None = None) -> int:
+    _load_repo_dotenv()
+    parser = argparse.ArgumentParser(description="GRPO training harnesses for dungeon agents.")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    dm_parser = subparsers.add_parser("dm-grpo", help="Run GRPO for the dungeon-master generator.")
+    _add_common_args(dm_parser, default_model=DEFAULT_HF_DM_MODEL, default_output_dir="artifacts/grpo/dm")
+    dm_parser.add_argument("--target-ratio", type=float, action="append")
+    dm_parser.add_argument("--artifacts-root", type=Path)
+    dm_parser.add_argument("--hero-provider", choices=["gemini", "hf_local"])
+    dm_parser.add_argument("--hero-model")
+    dm_parser.add_argument("--hero-adapter-path")
+    dm_parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    dm_parser.add_argument("--interface-model")
+    dm_parser.add_argument("--interface-narrate", action="store_true")
+    dm_parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    dm_parser.add_argument("--hero-max-game-steps", type=int, default=40)
+    dm_parser.add_argument("--hero-max-tool-calls", type=int, default=80)
+    hero_parser = subparsers.add_parser("hero-grpo", help="Run GRPO for the hero tool-calling policy.")
+    _add_common_args(hero_parser, default_model=DEFAULT_HF_HERO_MODEL, default_output_dir="artifacts/grpo/hero")
+    hero_parser.add_argument("--world", type=Path)
+    hero_parser.add_argument("--artifacts-root", type=Path)
+    hero_parser.add_argument("--max-game-steps", type=int, default=40)
+    hero_parser.add_argument("--max-tool-calls", type=int, default=80)
+    hero_parser.add_argument("--max-tool-calling-iterations", type=int, default=32)
+    hero_parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    hero_parser.add_argument("--interface-model")
+    hero_parser.add_argument("--interface-narrate", action="store_true")
+    hero_parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    joint_parser = subparsers.add_parser("joint-loop", help="Alternate hero and DM GRPO phases with adapter carry-over.")
+    joint_parser.add_argument("--root-dir", type=Path, required=True)
+    joint_parser.add_argument("--cycles", type=int, default=1)
+    joint_parser.add_argument("--target-ratio", type=float, action="append")
+    joint_parser.add_argument("--hero-world", type=Path)
+    joint_parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    joint_parser.add_argument("--interface-model")
+    joint_parser.add_argument("--interface-narrate", action="store_true")
+    joint_parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    joint_parser.add_argument("--hero-max-game-steps", type=int, default=40)
+    joint_parser.add_argument("--hero-max-tool-calls", type=int, default=80)
+    joint_parser.add_argument("--hero-max-tool-calling-iterations", type=int, default=32)
+    _add_prefixed_common_args(
+        joint_parser,
+        prefix="hero",
+        default_model=DEFAULT_HF_HERO_MODEL,
+        default_max_steps=24,
+        default_num_prompts=16,
+        default_max_completion_length=512,
+    )
+    _add_prefixed_common_args(
+        joint_parser,
+        prefix="dm",
+        default_model=DEFAULT_HF_DM_MODEL,
+        default_max_steps=8,
+        default_num_prompts=16,
+        default_max_completion_length=2048,
+    )
+    dataset_parser = subparsers.add_parser("smoke-dataset", help="Print smoke dataset rows for inspection.")
+    dataset_parser.add_argument("role", choices=["dm", "hero"])
+    dataset_parser.add_argument("--num-prompts", type=int, default=2)
+    dataset_parser.add_argument("--target-ratio", type=float, action="append")
+    dataset_parser.add_argument("--world", type=Path)
+    dataset_parser.add_argument("--max-game-steps", type=int, default=40)
+    dataset_parser.add_argument("--max-tool-calls", type=int, default=80)
+    args = parser.parse_args(argv)
+    if args.command == "smoke-dataset":
+        if args.role == "dm":
+            rows = build_dm_grpo_dataset(num_prompts=args.num_prompts, target_ratios=args.target_ratio)
+        else:
+            world_input = None if args.world is None else json.loads(args.world.read_text(encoding="utf-8"))
+            rows = build_hero_grpo_dataset(
+                num_prompts=args.num_prompts,
+                world_input=world_input,
+                max_game_steps=args.max_game_steps,
+                max_tool_calls=args.max_tool_calls,
+            )
+        print(json.dumps(rows, indent=2))
+        return 0
+    if args.command == "joint-loop":
+        hero_config = _build_prefixed_grpo_config(args, "hero", default_output_dir=args.root_dir / "hero")
+        dm_config = _build_prefixed_grpo_config(args, "dm", default_output_dir=args.root_dir / "dm")
+        run_joint_training_loop(
+            JointTrainingConfig(
+                root_dir=args.root_dir,
+                cycles=args.cycles,
+                hero_config=hero_config,
+                dm_config=dm_config,
+                target_ratios=args.target_ratio,
+                hero_world_path=args.hero_world,
+                interface_provider=args.interface_provider,
+                interface_model=args.interface_model,
+                interface_narrate=args.interface_narrate,
+                interface_translation_mode="corporate_app" if args.translate_corporate_env else None,
+                hero_max_game_steps=args.hero_max_game_steps,
+                hero_max_tool_calls=args.hero_max_tool_calls,
+                hero_max_tool_calling_iterations=args.hero_max_tool_calling_iterations,
+            )
+        )
+        return 0
+    config = GRPOLaunchConfig(
+        model_name=args.model,
+        output_dir=args.output_dir,
+        resume_adapter_path=args.resume_adapter_path,
+        max_steps=args.max_steps,
+        num_prompts=args.num_prompts,
+        learning_rate=args.learning_rate,
+        per_device_train_batch_size=args.per_device_train_batch_size,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        num_generations=args.num_generations,
+        max_completion_length=args.max_completion_length,
+        logging_steps=args.logging_steps,
+        save_steps=args.save_steps,
+        seed=args.seed,
+        rank=args.rank,
+        alpha=args.alpha,
+        dropout=args.dropout,
+        temperature=args.temperature,
+        top_p=args.top_p,
+        top_k=args.top_k,
+        min_p=args.min_p,
+        repetition_penalty=args.repetition_penalty,
+        use_wandb=not args.no_wandb,
+        run_name=args.run_name,
+        trust_remote_code=args.trust_remote_code,
+        load_in_4bit=not args.no_4bit,
+        loss_type=args.loss_type,
+        importance_sampling_level=args.importance_sampling_level,
+        use_transformers_paged=args.use_transformers_paged,
+        cache_implementation=args.cache_implementation,
+        use_vllm=args.use_vllm,
+        vllm_mode=args.vllm_mode,
+        vllm_gpu_memory_utilization=args.vllm_gpu_memory_utilization,
+        vllm_enable_sleep_mode=not args.no_vllm_sleep_mode,
+    )
+    if args.command == "dm-grpo":
+        run_dm_grpo(
+            config,
+            target_ratios=args.target_ratio,
+            artifacts_root=args.artifacts_root,
+            closed_loop=DMClosedLoopConfig(
+                hero_provider=args.hero_provider,
+                hero_model=args.hero_model,
+                hero_adapter_path=args.hero_adapter_path,
+                interface_provider=args.interface_provider,
+                interface_model=args.interface_model,
+                interface_narrate=args.interface_narrate,
+                interface_translation_mode="corporate_app" if args.translate_corporate_env else None,
+                hero_max_game_steps=args.hero_max_game_steps,
+                hero_max_tool_calls=args.hero_max_tool_calls,
+            ),
+        )
+        return 0
+    run_hero_grpo(
+        config,
+        world_path=args.world,
+        artifacts_root=args.artifacts_root,
+        interface_provider=args.interface_provider,
+        interface_model=args.interface_model,
+        interface_narrate=args.interface_narrate,
+        interface_translation_mode="corporate_app" if args.translate_corporate_env else None,
+        max_game_steps=args.max_game_steps,
+        max_tool_calls=args.max_tool_calls,
+        max_tool_calling_iterations=args.max_tool_calling_iterations,
+    )
+    return 0
+def _load_repo_dotenv() -> None:
+    load_dotenv(Path(__file__).resolve().parents[2] / ".env", override=False)
+    _normalize_wandb_env()
+def _normalize_wandb_env() -> None:
+    project = os.getenv("WANDB_PROJECT")
+    entity = os.getenv("WANDB_ENTITY")
+    if entity or not project or "/" not in project:
+        return
+    maybe_entity, maybe_project = project.split("/", 1)
+    if not maybe_entity or not maybe_project:
+        return
+    os.environ["WANDB_ENTITY"] = maybe_entity
+    os.environ["WANDB_PROJECT"] = maybe_project
+def _add_common_args(parser: argparse.ArgumentParser, *, default_model: str, default_output_dir: str) -> None:
+    parser.add_argument("--model", default=default_model)
+    parser.add_argument("--output-dir", type=Path, default=Path(default_output_dir))
+    parser.add_argument("--resume-adapter-path")
+    parser.add_argument("--run-name")
+    parser.add_argument("--max-steps", type=int, default=10)
+    parser.add_argument("--num-prompts", type=int, default=16)
+    parser.add_argument("--learning-rate", type=float, default=1e-5)
+    parser.add_argument("--per-device-train-batch-size", type=int, default=2)
+    parser.add_argument("--gradient-accumulation-steps", type=int, default=8)
+    parser.add_argument("--num-generations", type=int, default=2)
+    parser.add_argument("--max-completion-length", type=int, default=512)
+    parser.add_argument("--logging-steps", type=int, default=1)
+    parser.add_argument("--save-steps", type=int, default=10)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--rank", type=int, default=16)
+    parser.add_argument("--alpha", type=int, default=32)
+    parser.add_argument("--dropout", type=float, default=0.05)
+    parser.add_argument("--temperature", type=float, default=0.6)
+    parser.add_argument("--top-p", type=float, default=0.95)
+    parser.add_argument("--top-k", type=int, default=20)
+    parser.add_argument("--min-p", type=float)
+    parser.add_argument("--repetition-penalty", type=float, default=1.0)
+    parser.add_argument("--loss-type", choices=SUPPORTED_GRPO_LOSS_TYPES, default="dapo")
+    parser.add_argument(
+        "--importance-sampling-level",
+        choices=SUPPORTED_IMPORTANCE_SAMPLING_LEVELS,
+        default="token",
+    )
+    parser.add_argument("--use-transformers-paged", action="store_true")
+    parser.add_argument("--cache-implementation")
+    parser.add_argument("--use-vllm", action="store_true")
+    parser.add_argument("--vllm-mode", choices=["server", "colocate"], default="colocate")
+    parser.add_argument("--vllm-gpu-memory-utilization", type=float, default=0.2)
+    parser.add_argument("--no-vllm-sleep-mode", action="store_true")
+    parser.add_argument("--trust-remote-code", action="store_true")
+    parser.add_argument("--no-4bit", action="store_true")
+    parser.add_argument("--no-wandb", action="store_true")
+def _add_prefixed_common_args(
+    parser: argparse.ArgumentParser,
+    *,
+    prefix: str,
+    default_model: str,
+    default_max_steps: int,
+    default_num_prompts: int,
+    default_max_completion_length: int,
+) -> None:
+    parser.add_argument(f"--{prefix}-model", default=default_model)
+    parser.add_argument(f"--{prefix}-resume-adapter-path")
+    parser.add_argument(f"--{prefix}-run-name")
+    parser.add_argument(f"--{prefix}-max-steps", type=int, default=default_max_steps)
+    parser.add_argument(f"--{prefix}-num-prompts", type=int, default=default_num_prompts)
+    parser.add_argument(f"--{prefix}-learning-rate", type=float, default=1e-5)
+    parser.add_argument(f"--{prefix}-per-device-train-batch-size", type=int, default=2)
+    parser.add_argument(f"--{prefix}-gradient-accumulation-steps", type=int, default=8)
+    parser.add_argument(f"--{prefix}-num-generations", type=int, default=2)
+    parser.add_argument(f"--{prefix}-max-completion-length", type=int, default=default_max_completion_length)
+    parser.add_argument(f"--{prefix}-logging-steps", type=int, default=1)
+    parser.add_argument(f"--{prefix}-save-steps", type=int, default=4)
+    parser.add_argument(f"--{prefix}-seed", type=int, default=42)
+    parser.add_argument(f"--{prefix}-rank", type=int, default=16)
+    parser.add_argument(f"--{prefix}-alpha", type=int, default=32)
+    parser.add_argument(f"--{prefix}-dropout", type=float, default=0.05)
+    parser.add_argument(f"--{prefix}-temperature", type=float, default=0.6)
+    parser.add_argument(f"--{prefix}-top-p", type=float, default=0.95)
+    parser.add_argument(f"--{prefix}-top-k", type=int, default=20)
+    parser.add_argument(f"--{prefix}-min-p", type=float)
+    parser.add_argument(f"--{prefix}-repetition-penalty", type=float, default=1.0)
+    parser.add_argument(f"--{prefix}-loss-type", choices=SUPPORTED_GRPO_LOSS_TYPES, default="dapo")
+    parser.add_argument(
+        f"--{prefix}-importance-sampling-level",
+        choices=SUPPORTED_IMPORTANCE_SAMPLING_LEVELS,
+        default="token",
+    )
+    parser.add_argument(f"--{prefix}-use-transformers-paged", action="store_true")
+    parser.add_argument(f"--{prefix}-cache-implementation")
+    parser.add_argument(f"--{prefix}-use-vllm", action="store_true")
+    parser.add_argument(f"--{prefix}-vllm-mode", choices=["server", "colocate"], default="colocate")
+    parser.add_argument(f"--{prefix}-vllm-gpu-memory-utilization", type=float, default=0.2)
+    parser.add_argument(f"--{prefix}-no-vllm-sleep-mode", action="store_true")
+    parser.add_argument(f"--{prefix}-trust-remote-code", action="store_true")
+    parser.add_argument(f"--{prefix}-no-4bit", action="store_true")
+    parser.add_argument(f"--{prefix}-no-wandb", action="store_true")
+def _build_prefixed_grpo_config(args: argparse.Namespace, prefix: str, *, default_output_dir: Path) -> GRPOLaunchConfig:
+    def value(name: str):
+        return getattr(args, f"{prefix}_{name}")
+    return GRPOLaunchConfig(
+        model_name=value("model"),
+        output_dir=default_output_dir,
+        resume_adapter_path=value("resume_adapter_path"),
+        max_steps=value("max_steps"),
+        num_prompts=value("num_prompts"),
+        learning_rate=value("learning_rate"),
+        per_device_train_batch_size=value("per_device_train_batch_size"),
+        gradient_accumulation_steps=value("gradient_accumulation_steps"),
+        num_generations=value("num_generations"),
+        max_completion_length=value("max_completion_length"),
+        logging_steps=value("logging_steps"),
+        save_steps=value("save_steps"),
+        seed=value("seed"),
+        rank=value("rank"),
+        alpha=value("alpha"),
+        dropout=value("dropout"),
+        temperature=value("temperature"),
+        top_p=value("top_p"),
+        top_k=value("top_k"),
+        min_p=value("min_p"),
+        repetition_penalty=value("repetition_penalty"),
+        use_wandb=not value("no_wandb"),
+        run_name=value("run_name"),
+        trust_remote_code=value("trust_remote_code"),
+        load_in_4bit=not value("no_4bit"),
+        loss_type=value("loss_type"),
+        importance_sampling_level=value("importance_sampling_level"),
+        use_transformers_paged=value("use_transformers_paged"),
+        cache_implementation=value("cache_implementation"),
+        use_vllm=value("use_vllm"),
+        vllm_mode=value("vllm_mode"),
+        vllm_gpu_memory_utilization=value("vllm_gpu_memory_utilization"),
+        vllm_enable_sleep_mode=not value("no_vllm_sleep_mode"),
+    )
+if __name__ == "__main__":
+    raise SystemExit(main())

agents/train/grpo.py ADDED Viewed

The diff for this file is too large to render. See raw diff

agents/train/joint.py ADDED Viewed

	@@ -0,0 +1,278 @@

+from __future__ import annotations
+import json
+import os
+from contextlib import contextmanager
+from dataclasses import asdict, dataclass, replace
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Callable, Iterator
+from .grpo import DMClosedLoopConfig, GRPOLaunchConfig, run_dm_grpo, run_hero_grpo
+@dataclass(frozen=True)
+class JointTrainingConfig:
+    root_dir: Path
+    cycles: int
+    hero_config: GRPOLaunchConfig
+    dm_config: GRPOLaunchConfig
+    target_ratios: list[float] | None = None
+    hero_world_path: Path | None = None
+    interface_provider: str | None = None
+    interface_model: str | None = None
+    interface_narrate: bool = False
+    interface_translation_mode: str | None = None
+    hero_max_game_steps: int = 40
+    hero_max_tool_calls: int = 80
+    hero_max_tool_calling_iterations: int = 32
+def run_joint_training_loop(config: JointTrainingConfig) -> Path:
+    if config.cycles < 1:
+        raise ValueError("cycles must be at least 1.")
+    config.root_dir.mkdir(parents=True, exist_ok=True)
+    latest_hero_adapter = _initial_adapter_path(config.hero_config.resume_adapter_path)
+    latest_dm_adapter = _initial_adapter_path(config.dm_config.resume_adapter_path)
+    phases: list[dict[str, Any]] = []
+    _write_manifest(config, phases, status="running")
+    try:
+        for cycle_index in range(config.cycles):
+            cycle_number = cycle_index + 1
+            cycle_dir = config.root_dir / f"cycle_{cycle_number:02d}"
+            hero_dir = cycle_dir / "hero"
+            dm_dir = cycle_dir / "dm"
+            hero_result = _run_or_resume_hero_phase(
+                config=config,
+                cycle_number=cycle_number,
+                output_dir=hero_dir,
+                resume_adapter_path=latest_hero_adapter,
+                phases=phases,
+                on_phase_state_change=lambda: _write_manifest(config, phases, status="running"),
+            )
+            latest_hero_adapter = hero_result
+            _write_manifest(config, phases, status="running")
+            dm_result = _run_or_resume_dm_phase(
+                config=config,
+                cycle_number=cycle_number,
+                output_dir=dm_dir,
+                resume_adapter_path=latest_dm_adapter,
+                hero_adapter_path=latest_hero_adapter,
+                phases=phases,
+                on_phase_state_change=lambda: _write_manifest(config, phases, status="running"),
+            )
+            latest_dm_adapter = dm_result
+            _write_manifest(config, phases, status="running")
+    except Exception as exc:
+        _write_manifest(config, phases, status="failed", error=str(exc))
+        raise
+    _write_manifest(
+        config,
+        phases,
+        status="completed",
+        latest_hero_adapter_path=str(latest_hero_adapter) if latest_hero_adapter is not None else None,
+        latest_dm_adapter_path=str(latest_dm_adapter) if latest_dm_adapter is not None else None,
+    )
+    return config.root_dir
+def _run_or_resume_hero_phase(
+    *,
+    config: JointTrainingConfig,
+    cycle_number: int,
+    output_dir: Path,
+    resume_adapter_path: Path | None,
+    phases: list[dict[str, Any]],
+    on_phase_state_change: Callable[[], None] | None = None,
+) -> Path:
+    state_path = output_dir / "phase_state.json"
+    existing_state = _load_phase_state(state_path)
+    if existing_state is not None and existing_state.get("status") == "completed":
+        phases.append(existing_state)
+        return output_dir
+    output_dir.mkdir(parents=True, exist_ok=True)
+    run_name = config.hero_config.run_name or f"{config.root_dir.name}-hero-cycle-{cycle_number:02d}"
+    phase_state = {
+        "phase": "hero",
+        "cycle": cycle_number,
+        "status": "running",
+        "run_name": run_name,
+        "output_dir": str(output_dir),
+        "resume_adapter_path": None if resume_adapter_path is None else str(resume_adapter_path),
+        "started_at": _utc_now(),
+    }
+    phases.append(phase_state)
+    _write_json(state_path, phase_state)
+    if on_phase_state_change is not None:
+        on_phase_state_change()
+    phase_config = replace(
+        config.hero_config,
+        output_dir=output_dir,
+        run_name=run_name,
+        resume_adapter_path=None if resume_adapter_path is None else str(resume_adapter_path),
+    )
+    with _wandb_phase_env(group=config.root_dir.name, job_type="hero"):
+        run_hero_grpo(
+            phase_config,
+            world_path=config.hero_world_path,
+            artifacts_root=output_dir / "artifacts",
+            interface_provider=config.interface_provider,
+            interface_model=config.interface_model,
+            interface_narrate=config.interface_narrate,
+            interface_translation_mode=config.interface_translation_mode,
+            max_game_steps=config.hero_max_game_steps,
+            max_tool_calls=config.hero_max_tool_calls,
+            max_tool_calling_iterations=config.hero_max_tool_calling_iterations,
+        )
+    phase_state["status"] = "completed"
+    phase_state["completed_at"] = _utc_now()
+    _write_json(state_path, phase_state)
+    return output_dir
+def _run_or_resume_dm_phase(
+    *,
+    config: JointTrainingConfig,
+    cycle_number: int,
+    output_dir: Path,
+    resume_adapter_path: Path | None,
+    hero_adapter_path: Path | None,
+    phases: list[dict[str, Any]],
+    on_phase_state_change: Callable[[], None] | None = None,
+) -> Path:
+    if hero_adapter_path is None:
+        raise RuntimeError("DM phase requires a hero adapter path from a completed hero phase.")
+    state_path = output_dir / "phase_state.json"
+    existing_state = _load_phase_state(state_path)
+    if existing_state is not None and existing_state.get("status") == "completed":
+        phases.append(existing_state)
+        return output_dir
+    output_dir.mkdir(parents=True, exist_ok=True)
+    run_name = config.dm_config.run_name or f"{config.root_dir.name}-dm-cycle-{cycle_number:02d}"
+    phase_state = {
+        "phase": "dm",
+        "cycle": cycle_number,
+        "status": "running",
+        "run_name": run_name,
+        "output_dir": str(output_dir),
+        "resume_adapter_path": None if resume_adapter_path is None else str(resume_adapter_path),
+        "hero_adapter_path": str(hero_adapter_path),
+        "started_at": _utc_now(),
+    }
+    phases.append(phase_state)
+    _write_json(state_path, phase_state)
+    if on_phase_state_change is not None:
+        on_phase_state_change()
+    phase_config = replace(
+        config.dm_config,
+        output_dir=output_dir,
+        run_name=run_name,
+        resume_adapter_path=None if resume_adapter_path is None else str(resume_adapter_path),
+    )
+    closed_loop = DMClosedLoopConfig(
+        hero_provider="hf_local",
+        hero_model=config.hero_config.model_name,
+        hero_adapter_path=str(hero_adapter_path),
+        interface_provider=config.interface_provider,
+        interface_model=config.interface_model,
+        interface_narrate=config.interface_narrate,
+        interface_translation_mode=config.interface_translation_mode,
+        hero_max_game_steps=config.hero_max_game_steps,
+        hero_max_tool_calls=config.hero_max_tool_calls,
+    )
+    with _wandb_phase_env(group=config.root_dir.name, job_type="dm"):
+        run_dm_grpo(
+            phase_config,
+            target_ratios=config.target_ratios,
+            artifacts_root=output_dir / "artifacts",
+            closed_loop=closed_loop,
+        )
+    phase_state["status"] = "completed"
+    phase_state["completed_at"] = _utc_now()
+    _write_json(state_path, phase_state)
+    return output_dir
+def _write_manifest(
+    config: JointTrainingConfig,
+    phases: list[dict[str, Any]],
+    *,
+    status: str,
+    error: str | None = None,
+    latest_hero_adapter_path: str | None = None,
+    latest_dm_adapter_path: str | None = None,
+) -> None:
+    payload = {
+        "status": status,
+        "updated_at": _utc_now(),
+        "error": error,
+        "latest_hero_adapter_path": latest_hero_adapter_path,
+        "latest_dm_adapter_path": latest_dm_adapter_path,
+        "config": _to_jsonable(asdict(config)),
+        "phases": phases,
+    }
+    _write_json(config.root_dir / "joint_state.json", payload)
+@contextmanager
+def _wandb_phase_env(*, group: str, job_type: str) -> Iterator[None]:
+    previous_group = os.getenv("WANDB_RUN_GROUP")
+    previous_job_type = os.getenv("WANDB_JOB_TYPE")
+    os.environ["WANDB_RUN_GROUP"] = group
+    os.environ["WANDB_JOB_TYPE"] = job_type
+    try:
+        yield
+    finally:
+        _restore_env("WANDB_RUN_GROUP", previous_group)
+        _restore_env("WANDB_JOB_TYPE", previous_job_type)
+def _restore_env(name: str, value: str | None) -> None:
+    if value is None:
+        os.environ.pop(name, None)
+    else:
+        os.environ[name] = value
+def _load_phase_state(path: Path) -> dict[str, Any] | None:
+    if not path.exists():
+        return None
+    return json.loads(path.read_text(encoding="utf-8"))
+def _write_json(path: Path, payload: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(_to_jsonable(payload), indent=2, sort_keys=True) + "\n", encoding="utf-8")
+def _to_jsonable(value: Any) -> Any:
+    if isinstance(value, Path):
+        return str(value)
+    if isinstance(value, dict):
+        return {str(key): _to_jsonable(item) for key, item in value.items()}
+    if isinstance(value, list):
+        return [_to_jsonable(item) for item in value]
+    return value
+def _initial_adapter_path(raw_path: str | None) -> Path | None:
+    if raw_path is None:
+        return None
+    path = Path(raw_path)
+    return path if path.exists() else None
+def _utc_now() -> str:
+    return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")

pyproject.toml ADDED Viewed

	@@ -0,0 +1,63 @@

+[build-system]
+requires = ["setuptools>=69", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "dnd-agents"
+version = "0.1.0"
+description = "Dungeon master and hero agent environments built on TextWorld and OpenEnv."
+readme = "SPEC.md"
+requires-python = ">=3.11,<3.12"
+dependencies = [
+  "openenv-core==0.2.1",
+  "textworld==1.7.0",
+  "fastapi>=0.115,<1",
+  "uvicorn>=0.30,<1",
+  "pydantic>=2.12,<3",
+  "python-dotenv>=1.0,<2",
+  "python-multipart>=0.0.9,<1",
+  "google-genai>=1.0,<2",
+  "huggingface-hub>=1.6,<2",
+  "pytest>=8.0,<9",
+]
+[project.scripts]
+dnd-master = "agents.master.main:main"
+dnd-hero = "agents.hero.__main__:main"
+dnd-loop = "agents.loop.__main__:main"
+dnd-train = "agents.train.__main__:main"
+dnd-openenv = "agents.openenv_server.__main__:main"
+[project.optional-dependencies]
+local-llm = [
+  "accelerate==1.13.0",
+  "bitsandbytes==0.49.2",
+  "huggingface-hub>=1.6,<2",
+  "peft==0.18.1",
+  "transformers==5.3.0",
+  "vllm==0.12.0; platform_system == 'Linux'",
+]
+train = [
+  "accelerate==1.13.0",
+  "bitsandbytes==0.49.2",
+  "datasets==4.6.1",
+  "huggingface-hub>=1.6,<2",
+  "jmespath>=1.0,<2",
+  "peft==0.18.1",
+  "transformers==5.3.0",
+  "trl==0.29.0",
+  "vllm==0.12.0; platform_system == 'Linux'",
+  "wandb==0.25.0",
+]
+[tool.setuptools.packages.find]
+include = ["agents*"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+markers = [
+  "live: tests that call live external model APIs",
+]
+filterwarnings = [
+  "ignore:Game '.*' is not fully supported\\..*",
+]