diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..415182fca3387bce886684eeaf9c4cdd3a6ac297
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends build-essential git curl \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY . /app
+
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir .
+
+EXPOSE 8000
+
+CMD ["uvicorn", "agents.spaces.dm_space:create_app", "--factory", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
index d8250b320d18703e310790cc04443b8193d779de..fe6b38d43150331d3af9144dd125756af50a054a 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,19 @@
 ---
-title: FATHOM DM
-emoji: 🏃
-colorFrom: yellow
-colorTo: blue
+title: DND-DM
 sdk: docker
-pinned: false
+app_port: 8000
+tags:
+  - openenv
+  - dnd
+  - textworld
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# DND-DM
+
+This Space hosts the CPU-only `DND-DM` environment.
+
+- OpenEnv API: `/env`
+- Health check: `/healthz`
+- Latest normalized world output: `/world-output/latest`
+
+`DND-DM` evaluates submitted world definitions. It does not generate worlds by itself.
diff --git a/agents/__init__.py b/agents/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbc2dd692e781d0a404a2cebabc4947dc3c5faac
--- /dev/null
+++ b/agents/__init__.py
@@ -0,0 +1,2 @@
+"""Agent environments for the dungeon project."""
+
diff --git a/agents/hero/__init__.py b/agents/hero/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2dc140bb1caa13bda440bb032b58bd1a560b40eb
--- /dev/null
+++ b/agents/hero/__init__.py
@@ -0,0 +1,58 @@
+"""Hero agent environment and runner primitives."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+__all__ = [
+    "HeroEnvironment",
+    "HeroLLMPolicy",
+    "HeroObservation",
+    "HeroPolicy",
+    "HeroPolicyError",
+    "HeroRunner",
+    "HeroServerAction",
+    "HeroState",
+    "HeroTraceEvent",
+    "ScriptedToolCallingPolicy",
+    "ToolCallingPolicy",
+]
+
+if TYPE_CHECKING:
+    from .env import HeroEnvironment
+    from .policy import HeroLLMPolicy, HeroPolicy, HeroPolicyError, HeroTraceEvent
+    from .runner import HeroRunner, ScriptedToolCallingPolicy, ToolCallingPolicy
+    from .schema import HeroObservation, HeroServerAction, HeroState
+
+
+def __getattr__(name: str) -> Any:
+    if name == "HeroEnvironment":
+        from .env import HeroEnvironment
+
+        return HeroEnvironment
+    if name in {"HeroLLMPolicy", "HeroPolicy", "HeroPolicyError", "HeroTraceEvent"}:
+        from .policy import HeroLLMPolicy, HeroPolicy, HeroPolicyError, HeroTraceEvent
+
+        return {
+            "HeroLLMPolicy": HeroLLMPolicy,
+            "HeroPolicy": HeroPolicy,
+            "HeroPolicyError": HeroPolicyError,
+            "HeroTraceEvent": HeroTraceEvent,
+        }[name]
+    if name in {"HeroRunner", "ScriptedToolCallingPolicy", "ToolCallingPolicy"}:
+        from .runner import HeroRunner, ScriptedToolCallingPolicy, ToolCallingPolicy
+
+        return {
+            "HeroRunner": HeroRunner,
+            "ScriptedToolCallingPolicy": ScriptedToolCallingPolicy,
+            "ToolCallingPolicy": ToolCallingPolicy,
+        }[name]
+    if name in {"HeroObservation", "HeroServerAction", "HeroState"}:
+        from .schema import HeroObservation, HeroServerAction, HeroState
+
+        return {
+            "HeroObservation": HeroObservation,
+            "HeroServerAction": HeroServerAction,
+            "HeroState": HeroState,
+        }[name]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/agents/hero/__main__.py b/agents/hero/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0da3fbc56a682eaa7e458bf3f64af49e277729
--- /dev/null
+++ b/agents/hero/__main__.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+from agents.master.sample import load_world
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+
+from .env import HeroEnvironment
+
+
+def _manual_action(raw: str) -> dict[str, object]:
+    if raw == "/read":
+        return {"tool": "scratchpad_read"}
+    if raw.startswith("/write append "):
+        return {"tool": "scratchpad_write", "mode": "append", "content": raw[len("/write append ") :]}
+    if raw.startswith("/write replace "):
+        return {"tool": "scratchpad_write", "mode": "replace", "content": raw[len("/write replace ") :]}
+    return {"tool": "act", "command": raw}
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Local hero environment smoke runner")
+    parser.add_argument("mode", choices=["manual", "scripted"])
+    parser.add_argument("world", help="Path to a world-definition JSON file.")
+    parser.add_argument("--actions", help="JSON file containing a list of hero action objects.")
+    parser.add_argument("--debug", action="store_true")
+    parser.add_argument("--interface-model")
+    parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite observations into a corporate app metaphor and translate parser-safe corporate commands back through Gemini.",
+    )
+    args = parser.parse_args(argv)
+
+    world = load_world(args.world)
+    interface_adapter = build_interface_adapter(
+        resolve_interface_config(
+            model_name=args.interface_model,
+            translation_mode="corporate_app" if args.translate_corporate_env else None,
+        )
+    )
+    env = HeroEnvironment(debug=args.debug, interface_adapter=interface_adapter)
+    observation = env.reset(world)
+    print(observation.message)
+
+    if args.mode == "scripted":
+        if not args.actions:
+            parser.error("--actions is required for scripted mode.")
+        actions = json.loads(Path(args.actions).read_text(encoding="utf-8"))
+        for action in actions:
+            result = env.step(action)
+            print(result.observation.message)
+            if result.done:
+                print(json.dumps(result.observation.model_dump(), indent=2))
+                return 0
+        print(json.dumps(env.state.model_dump(), indent=2))
+        return 0
+
+    while not observation.done:
+        try:
+            raw = input("hero> ").strip()
+        except EOFError:
+            print()
+            return 0
+        if raw in {"quit", "exit"}:
+            return 0
+        result = env.step(_manual_action(raw))
+        observation = result.observation
+        print(observation.message)
+        if result.done:
+            print(json.dumps(observation.model_dump(), indent=2))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/hero/cli.py b/agents/hero/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..6feb6e082284ca60fd8cf9cca82d3e84e4faad77
--- /dev/null
+++ b/agents/hero/cli.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+
+from agents.master.base import SUPPORTED_DIRECTIONS
+
+_TOKEN_RE = re.compile(r"^[a-z0-9]+(?: [a-z0-9]+)*$")
+_BANNED_OBJECT_TOKENS = {"a", "an", "the"}
+
+
+@dataclass(frozen=True)
+class CliCommandAst:
+    kind: str
+    normalized_command: str
+    arguments: tuple[str, ...] = ()
+
+
+@dataclass(frozen=True)
+class CliCommandParseResult:
+    valid: bool
+    normalized_command: str | None = None
+    ast: CliCommandAst | None = None
+    error: str | None = None
+
+
+def parse_cli_command(raw_command: str) -> CliCommandParseResult:
+    normalized = normalize_cli_command(raw_command)
+    if not normalized:
+        return CliCommandParseResult(valid=False, error="Command must not be empty.")
+
+    if normalized in {"look", "inventory", "wait"}:
+        return _ok(normalized, normalized)
+
+    if normalized in SUPPORTED_DIRECTIONS:
+        return _ok("move", f"go {normalized}", normalized)
+    if normalized.startswith("go "):
+        direction = normalized[3:].strip()
+        if direction in SUPPORTED_DIRECTIONS:
+            return _ok("move", f"go {direction}", direction)
+        return CliCommandParseResult(valid=False, error="Unknown direction.")
+
+    if match := re.fullmatch(r"look in (?P<object>.+)", normalized):
+        object_text = match.group("object").strip()
+        return _object_result("look_in", normalized, object_text)
+    if match := re.fullmatch(r"take (?P<object>.+) from (?P<source>.+)", normalized):
+        return _two_object_result("take_from", normalized, match.group("object"), match.group("source"))
+
+    one_target_patterns = {
+        "open": r"open (?P<object>.+)",
+        "read": r"read (?P<object>.+)",
+        "talk": r"talk (?P<object>.+)",
+        "examine": r"examine (?P<object>.+)",
+    }
+    for kind, pattern in one_target_patterns.items():
+        if match := re.fullmatch(pattern, normalized):
+            object_text = match.group("object").strip()
+            return _object_result(kind, normalized, object_text)
+    if match := re.fullmatch(r"take (?P<object>.+)", normalized):
+        object_text = match.group("object").strip()
+        return _object_result("take", normalized, object_text)
+    if match := re.fullmatch(r"unlock (?P<object>.+) with (?P<tool>.+)", normalized):
+        return _two_object_result("unlock", normalized, match.group("object"), match.group("tool"))
+    if match := re.fullmatch(r"use (?P<object>.+) on (?P<target>.+)", normalized):
+        return _two_object_result("use", normalized, match.group("object"), match.group("target"))
+    if match := re.fullmatch(r"combine (?P<object>.+) with (?P<target>.+)", normalized):
+        return _two_object_result("combine", normalized, match.group("object"), match.group("target"))
+    if match := re.fullmatch(r"give (?P<object>.+) to (?P<target>.+)", normalized):
+        return _two_object_result("give", normalized, match.group("object"), match.group("target"))
+
+    if match := re.fullmatch(r"submit (?P<answer>[a-z0-9]+(?: [a-z0-9]+)*)", normalized):
+        answer = match.group("answer").strip()
+        return _ok("submit", normalized, answer)
+
+    return CliCommandParseResult(valid=False, error="Command does not match the strict CLI grammar.")
+
+
+def normalize_cli_command(raw_command: str) -> str:
+    return re.sub(r"\s+", " ", raw_command.strip().lower())
+
+
+def _object_result(kind: str, normalized_command: str, object_text: str) -> CliCommandParseResult:
+    object_error = _validate_object_text(object_text)
+    if object_error is not None:
+        return CliCommandParseResult(valid=False, error=object_error)
+    return _ok(kind, normalized_command, object_text)
+
+
+def _two_object_result(kind: str, normalized_command: str, first: str, second: str) -> CliCommandParseResult:
+    first_error = _validate_object_text(first)
+    if first_error is not None:
+        return CliCommandParseResult(valid=False, error=first_error)
+    second_error = _validate_object_text(second)
+    if second_error is not None:
+        return CliCommandParseResult(valid=False, error=second_error)
+    return _ok(kind, normalized_command, first.strip(), second.strip())
+
+
+def _validate_object_text(value: str) -> str | None:
+    candidate = value.strip()
+    if not candidate:
+        return "Command target must not be empty."
+    if not _TOKEN_RE.fullmatch(candidate):
+        return "Command targets must use lowercase letters, numbers, and spaces only."
+    if any(token in _BANNED_OBJECT_TOKENS for token in candidate.split()):
+        return "Strict CLI commands must use exact parser-safe object names without articles."
+    return None
+
+
+def _ok(kind: str, normalized_command: str, *arguments: str) -> CliCommandParseResult:
+    return CliCommandParseResult(
+        valid=True,
+        normalized_command=normalized_command,
+        ast=CliCommandAst(kind=kind, normalized_command=normalized_command, arguments=arguments),
+    )
diff --git a/agents/hero/env.py b/agents/hero/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..79727b4d5395e4db8de8783b6049458f4c811ca5
--- /dev/null
+++ b/agents/hero/env.py
@@ -0,0 +1,450 @@
+from __future__ import annotations
+
+from collections import deque
+from pathlib import Path
+from typing import Any
+
+from agents.master.base import DMInterfaceError, MAX_STEP_MULTIPLIER
+from agents.master.build import WorldCompiler
+from agents.master.interface import InterfaceAdapter, StrictCliInterfaceAdapter
+from agents.master.schema import CompiledWorld, WorldDefinition
+from agents.master.session import EpisodeSession
+from agents.shared.openenv_compat import Environment, StepResult, build_step_result
+
+from .cli import parse_cli_command
+from .schema import (
+    ActAction,
+    HeroAction,
+    HeroAuxSignals,
+    HeroEpisodeStats,
+    HeroObservation,
+    HeroRewardBreakdown,
+    HeroState,
+    ScratchpadReadAction,
+    ScratchpadWriteAction,
+    validate_hero_action,
+)
+
+_DENSE_PROGRESS_SCALE = 0.30
+_SYNTAX_PENALTY = -0.02
+_INVALID_ACTION_PENALTY = -0.02
+_REPEAT_NOOP_PENALTY = -0.01
+_WRONG_SUBMIT_PENALTY = -0.10
+
+
+class HeroEnvironment(Environment[HeroAction, HeroObservation, HeroState]):
+    def __init__(
+        self,
+        *,
+        artifacts_root: Path | None = None,
+        world_input: CompiledWorld | WorldDefinition | dict[str, Any] | None = None,
+        session: EpisodeSession | None = None,
+        interface_adapter: InterfaceAdapter | None = None,
+        model: str = "",
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int = 8000,
+        debug: bool = False,
+    ) -> None:
+        super().__init__()
+        self.compiler = WorldCompiler(artifacts_root=artifacts_root)
+        self._initial_world_input = world_input
+        self._provided_session = session
+        self._provided_interface_adapter = interface_adapter
+        self.model = model
+        self._default_max_game_steps = max_game_steps
+        self._default_max_tool_calls = max_tool_calls
+        self.scratchpad_max_chars = scratchpad_max_chars
+        self.debug = debug
+        self._state = HeroState()
+        self._compiled: CompiledWorld | None = None
+        self._session: EpisodeSession | None = None
+        self._scratchpad = ""
+        self._max_game_steps = 0
+        self._max_tool_calls = 0
+        self._debug_dir: Path | None = None
+        self._episode_stats = HeroEpisodeStats()
+        self._recent_noop_signatures: deque[tuple[str, str, str]] = deque(maxlen=3)
+
+    @classmethod
+    def from_session(
+        cls,
+        session: EpisodeSession,
+        *,
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int = 8000,
+        debug: bool = False,
+    ) -> "HeroEnvironment":
+        return cls(
+            session=session,
+            max_game_steps=max_game_steps,
+            max_tool_calls=max_tool_calls,
+            scratchpad_max_chars=scratchpad_max_chars,
+            debug=debug,
+        )
+
+    def reset(
+        self,
+        world_input: CompiledWorld | WorldDefinition | dict[str, Any] | None = None,
+        *,
+        seed: int | None = None,
+        episode_id: str | None = None,
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int | None = None,
+        debug: bool | None = None,
+    ) -> HeroObservation:
+        del seed, episode_id
+        if debug is not None:
+            self.debug = debug
+        if scratchpad_max_chars is not None:
+            self.scratchpad_max_chars = scratchpad_max_chars
+        self._scratchpad = ""
+        self._episode_stats = HeroEpisodeStats()
+        self._recent_noop_signatures.clear()
+
+        if self._provided_session is not None:
+            self._session = self._provided_session
+            self._compiled = self._session.compiled
+        else:
+            selected_world = world_input if world_input is not None else self._initial_world_input
+            if selected_world is None:
+                raise ValueError("HeroEnvironment.reset requires a compiled world, world definition, or live session.")
+            self._compiled = (
+                selected_world
+                if isinstance(selected_world, CompiledWorld)
+                else self.compiler.compile(selected_world)
+            )
+            adapter = self._provided_interface_adapter or StrictCliInterfaceAdapter()
+            self._session = EpisodeSession(self._compiled, interface_adapter=adapter)
+
+        self._max_game_steps = max_game_steps or self._default_max_game_steps or max(
+            1, len(self._compiled.solver_policy) * MAX_STEP_MULTIPLIER
+        )
+        self._max_tool_calls = max_tool_calls or self._default_max_tool_calls or (self._max_game_steps * 4)
+        self._state = HeroState(
+            episode_id=self._compiled.episode_id,
+            step_count=0,
+            game_steps_taken=self._session.steps_taken,
+            tool_calls_total=0,
+            max_game_steps=self._max_game_steps,
+            max_tool_calls=self._max_tool_calls,
+            game_steps_remaining=max(0, self._max_game_steps - self._session.steps_taken),
+            tool_calls_remaining=self._max_tool_calls,
+            status="running",
+            world_title=self._compiled.world.meta.title,
+            last_command=None,
+            scratchpad_chars=0,
+        )
+        self._prepare_debug_dir()
+        reward_breakdown = self._empty_breakdown(self._progress_potential())
+        observation = self._apply_transform(
+            HeroObservation(
+                message=self._session.current_feedback(),
+                reward=0.0,
+                done=False,
+                won=None,
+                reward_breakdown=reward_breakdown,
+                aux_signals=self._progress_signals(),
+            )
+        )
+        return observation
+
+    def step(  # type: ignore[override]
+        self,
+        action: HeroAction | dict[str, object],
+        timeout_s: float | None = None,
+        **kwargs: Any,
+    ) -> StepResult[HeroObservation]:
+        del timeout_s, kwargs
+        if self._session is None or self._compiled is None:
+            raise RuntimeError("HeroEnvironment.reset must be called before step().")
+        if self._state.status != "running":
+            observation = HeroObservation(
+                message="",
+                reward=1.0 if self._state.status == "won" else 0.0,
+                done=True,
+                won=self._state.status == "won",
+                terminal_reason="episode_complete",
+                reward_breakdown=self._empty_breakdown(self._progress_potential()),
+                aux_signals=self._progress_signals(),
+            )
+            return build_step_result(self._apply_transform(observation))
+
+        parsed = validate_hero_action(action)
+        self._state.tool_calls_total += 1
+        self._state.step_count = self._state.tool_calls_total
+        self._update_remaining_counters()
+
+        if isinstance(parsed, ScratchpadReadAction):
+            observation = self._observation(
+                message=self._scratchpad,
+                tool=parsed.tool,
+                tool_success=True,
+                reward_breakdown=self._empty_breakdown(self._progress_potential()),
+            )
+            return build_step_result(observation)
+
+        if isinstance(parsed, ScratchpadWriteAction):
+            observation = self._handle_scratchpad_write(parsed)
+            return build_step_result(observation)
+
+        observation = self._handle_act(parsed)
+        return build_step_result(observation)
+
+    @property
+    def state(self) -> HeroState:
+        return self._state
+
+    @property
+    def scratchpad(self) -> str:
+        return self._scratchpad
+
+    @property
+    def session(self) -> EpisodeSession | None:
+        return self._session
+
+    @property
+    def episode_stats(self) -> HeroEpisodeStats:
+        return self._episode_stats
+
+    def _handle_scratchpad_write(self, action: ScratchpadWriteAction) -> HeroObservation:
+        new_value = (
+            self._scratchpad + action.content
+            if action.mode == "append"
+            else action.content
+        )
+        if len(new_value) > self.scratchpad_max_chars:
+            return self._observation(
+                message="Scratchpad write rejected: notebook size limit exceeded.",
+                tool=action.tool,
+                tool_success=False,
+                reward_breakdown=self._empty_breakdown(self._progress_potential()),
+            )
+
+        self._scratchpad = new_value
+        self._state.scratchpad_chars = len(self._scratchpad)
+        self._persist_debug_scratchpad()
+        return self._observation(
+            message="Scratchpad updated.",
+            tool=action.tool,
+            tool_success=True,
+            reward_breakdown=self._empty_breakdown(self._progress_potential()),
+        )
+
+    def _handle_act(self, action: ActAction) -> HeroObservation:
+        assert self._session is not None
+        parsed_command = parse_cli_command(action.command)
+        self._state.last_command = parsed_command.normalized_command or action.command
+        if not parsed_command.valid or parsed_command.normalized_command is None:
+            breakdown = self._empty_breakdown(self._progress_potential())
+            breakdown.syntax_penalty = _SYNTAX_PENALTY
+            return self._observation(
+                message=parsed_command.error or "That command does not match the strict CLI grammar.",
+                tool=action.tool,
+                tool_success=False,
+                reward_breakdown=breakdown,
+            )
+
+        potential_before = self._progress_potential()
+        fingerprint_before = self._session.state_fingerprint()
+        room_before = self._session.current_room_id
+        try:
+            turn = self._session.step(parsed_command.normalized_command)
+        except DMInterfaceError:
+            breakdown = self._empty_breakdown(potential_before)
+            breakdown.syntax_penalty = _SYNTAX_PENALTY
+            return self._observation(
+                message="The interface could not interpret that action.",
+                tool=action.tool,
+                tool_success=False,
+                reward_breakdown=breakdown,
+            )
+
+        tool_success = self._turn_succeeded(turn.game_state_delta)
+        self._state.game_steps_taken = self._session.steps_taken
+        self._session.recent_normalized_commands.append(parsed_command.normalized_command)
+        potential_after = self._progress_potential()
+        breakdown = self._empty_breakdown(potential_before)
+        breakdown.progress_potential_after = potential_after
+        breakdown.dense_progress_reward = _DENSE_PROGRESS_SCALE * max(0.0, potential_after - potential_before)
+        if not tool_success:
+            breakdown.invalid_action_penalty = _INVALID_ACTION_PENALTY
+        if self._is_wrong_submit(turn.game_state_delta):
+            breakdown.wrong_submit_penalty = _WRONG_SUBMIT_PENALTY
+        if self._repeat_noop(parsed_command.normalized_command, fingerprint_before, room_before):
+            breakdown.repeat_noop_penalty = _REPEAT_NOOP_PENALTY
+        return self._observation(
+            message=turn.observation,
+            tool=action.tool,
+            tool_success=tool_success,
+            reward_breakdown=breakdown,
+        )
+
+    def _update_remaining_counters(self) -> None:
+        self._state.game_steps_remaining = max(0, self._max_game_steps - self._state.game_steps_taken)
+        self._state.tool_calls_remaining = max(0, self._max_tool_calls - self._state.tool_calls_total)
+
+    def _turn_succeeded(self, delta: dict[str, Any]) -> bool:
+        if delta.get("wrapper") == "submit_rejected":
+            return False
+        if "succeeded" in delta:
+            return bool(delta["succeeded"])
+        return True
+
+    def _observation(
+        self,
+        *,
+        message: str,
+        tool: str,
+        tool_success: bool,
+        reward_breakdown: HeroRewardBreakdown,
+    ) -> HeroObservation:
+        assert self._session is not None
+        done = False
+        won: bool | None = None
+        terminal_reason: str | None = None
+
+        if self._session.player_won:
+            self._state.status = "won"
+            done = True
+            won = True
+            reward_breakdown.base_terminal_reward = 1.0
+        elif self._session.done:
+            self._state.status = "lost"
+            done = True
+            won = False
+            terminal_reason = "session_ended"
+        elif self._state.game_steps_taken >= self._max_game_steps:
+            self._state.status = "timed_out"
+            done = True
+            won = False
+            terminal_reason = "game_step_budget_exhausted"
+        elif self._state.tool_calls_total >= self._max_tool_calls:
+            self._state.status = "timed_out"
+            done = True
+            won = False
+            terminal_reason = "tool_budget_exhausted"
+
+        reward_breakdown.total_reward = (
+            reward_breakdown.base_terminal_reward
+            + reward_breakdown.dense_progress_reward
+            + reward_breakdown.syntax_penalty
+            + reward_breakdown.invalid_action_penalty
+            + reward_breakdown.repeat_noop_penalty
+            + reward_breakdown.wrong_submit_penalty
+        )
+        self._update_remaining_counters()
+        aux_signals = self._progress_signals()
+        self._accumulate_episode_stats(reward_breakdown, won is True)
+
+        observation = self._apply_transform(
+            HeroObservation(
+                message=message,
+                reward=reward_breakdown.total_reward,
+                done=done,
+                won=won,
+                tool=tool,
+                tool_success=tool_success,
+                terminal_reason=terminal_reason,
+                reward_breakdown=reward_breakdown,
+                aux_signals=aux_signals,
+            )
+        )
+        return observation
+
+    def _prepare_debug_dir(self) -> None:
+        if not self.debug or self._compiled is None:
+            self._debug_dir = None
+            return
+        self._debug_dir = self._compiled.artifacts_dir / "hero_debug"
+        self._debug_dir.mkdir(parents=True, exist_ok=True)
+        self._persist_debug_scratchpad()
+
+    def _persist_debug_scratchpad(self) -> None:
+        if self._debug_dir is None:
+            return
+        (self._debug_dir / "scratchpad.txt").write_text(self._scratchpad, encoding="utf-8")
+
+    def _progress_signals(self) -> HeroAuxSignals:
+        assert self._session is not None
+        assert self._compiled is not None
+        room_ids = {node.id for node in self._compiled.world.nodes if node.type in {"location", "junction"}}
+        total_locked_doors = {
+            edge.door_node_id
+            for edge in self._compiled.world.edges
+            if edge.type == "locked_passage" and edge.door_node_id
+        }
+        total_clues = {clue.id for clue in self._compiled.world.clues}
+        answer_ready = float(
+            bool(total_clues)
+            and self._session.consulted_guardian
+            and self._session.discovered_clues == total_clues
+        )
+        return HeroAuxSignals(
+            visited_room_progress=_fraction(len(self._session.visited_nodes & room_ids), len(room_ids)),
+            clue_progress=_fraction(len(self._session.discovered_clues), len(total_clues)),
+            locked_gate_progress=_fraction(len(self._session.unlocked_doors), len(total_locked_doors)),
+            trade_progress=_fraction(len(self._session.traded_npcs), len(self._compiled.npc_trade_map)),
+            recipe_progress=_fraction(len(self._session.completed_recipe_outputs), len(self._compiled.world.recipes)),
+            use_effect_progress=_fraction(len(self._session.completed_use_targets), len(self._compiled.use_effects)),
+            guardian_consulted_progress=1.0 if self._session.consulted_guardian else 0.0,
+            answer_ready_progress=answer_ready,
+        )
+
+    def _progress_potential(self) -> float:
+        signals = self._progress_signals()
+        potential = (
+            0.10 * signals.visited_room_progress
+            + 0.35 * signals.clue_progress
+            + 0.10 * signals.locked_gate_progress
+            + 0.10 * signals.trade_progress
+            + 0.10 * signals.recipe_progress
+            + 0.15 * signals.use_effect_progress
+            + 0.05 * signals.guardian_consulted_progress
+            + 0.05 * signals.answer_ready_progress
+        )
+        return max(0.0, min(1.0, potential))
+
+    def _empty_breakdown(self, potential: float) -> HeroRewardBreakdown:
+        return HeroRewardBreakdown(
+            progress_potential_before=potential,
+            progress_potential_after=potential,
+        )
+
+    def _repeat_noop(self, command: str, fingerprint_before: str, room_before: str) -> bool:
+        assert self._session is not None
+        fingerprint_after = self._session.state_fingerprint()
+        room_after = self._session.current_room_id
+        if room_before == room_after and fingerprint_before == fingerprint_after:
+            self._recent_noop_signatures.append((command, room_after, fingerprint_after))
+        else:
+            self._recent_noop_signatures.clear()
+        return (
+            len(self._recent_noop_signatures) == 3
+            and len({signature[0] for signature in self._recent_noop_signatures}) == 1
+            and len({signature[1] for signature in self._recent_noop_signatures}) == 1
+            and len({signature[2] for signature in self._recent_noop_signatures}) == 1
+        )
+
+    @staticmethod
+    def _is_wrong_submit(delta: dict[str, Any]) -> bool:
+        return delta.get("wrapper") == "submit_rejected" and delta.get("reason") == "wrong_answer"
+
+    def _accumulate_episode_stats(self, breakdown: HeroRewardBreakdown, player_won: bool) -> None:
+        self._episode_stats.player_won = player_won or self._episode_stats.player_won
+        self._episode_stats.total_reward += breakdown.total_reward
+        self._episode_stats.dense_return += breakdown.dense_progress_reward
+        self._episode_stats.syntax_penalty_total += breakdown.syntax_penalty
+        self._episode_stats.invalid_action_penalty_total += breakdown.invalid_action_penalty
+        self._episode_stats.repeat_noop_penalty_total += breakdown.repeat_noop_penalty
+        self._episode_stats.wrong_submit_penalty_total += breakdown.wrong_submit_penalty
+        self._episode_stats.steps_taken = self._state.game_steps_taken
+        self._episode_stats.tool_calls_total = self._state.tool_calls_total
+
+
+def _fraction(done: int, total: int) -> float:
+    if total <= 0:
+        return 0.0
+    return min(1.0, done / total)
diff --git a/agents/hero/policy.py b/agents/hero/policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3f4cc2f9e6881e9d88acf6b007a88f1ca4c1570
--- /dev/null
+++ b/agents/hero/policy.py
@@ -0,0 +1,157 @@
+from __future__ import annotations
+
+from typing import Literal, Protocol
+
+from pydantic import BaseModel
+
+from agents.shared.llm_client import StructuredModelClient
+from agents.shared.model_schema import ModelMessage, StrictModel
+
+from .cli import parse_cli_command
+from .prompt import format_hero_system_prompt, format_hero_turn_prompt
+from .schema import ActAction, HeroAction, HeroObservation, HeroState, validate_hero_action
+
+
+class HeroPolicyError(RuntimeError):
+    pass
+
+
+class HeroPolicy(Protocol):
+    trace_events: list["HeroTraceEvent"]
+    last_error: str | None
+
+    def reset(self) -> None:
+        ...
+
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction:
+        ...
+
+
+class HeroActionPayload(BaseModel):
+    tool: Literal["act", "scratchpad_read", "scratchpad_write"]
+    command: str | None = None
+    mode: Literal["append", "replace"] | None = None
+    content: str | None = None
+
+
+class HeroActionResponse(BaseModel):
+    action: HeroActionPayload
+
+
+class HeroTraceEvent(StrictModel):
+    turn_index: int
+    observation: str
+    scratchpad: str
+    state: dict[str, object]
+    action: dict[str, object] | None = None
+    repair_count: int = 0
+    validation_error: str | None = None
+
+
+class HeroLLMPolicy:
+    def __init__(
+        self,
+        client: StructuredModelClient,
+        *,
+        model_name: str,
+        temperature: float = 0.1,
+        max_output_tokens: int = 256,
+        max_repair_attempts: int = 1,
+    ) -> None:
+        self.client = client
+        self.model_name = model_name
+        self.temperature = temperature
+        self.max_output_tokens = max_output_tokens
+        self.max_repair_attempts = max_repair_attempts
+        self.trace_events: list[HeroTraceEvent] = []
+        self.last_error: str | None = None
+
+    def reset(self) -> None:
+        self.trace_events = []
+        self.last_error = None
+
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction:
+        repair_error: str | None = None
+        for attempt in range(self.max_repair_attempts + 1):
+            try:
+                response = self.client.generate_structured(
+                    self._messages(observation, state, scratchpad, repair_error),
+                    HeroActionResponse,
+                    model_name=self.model_name,
+                    temperature=self.temperature,
+                    max_output_tokens=self.max_output_tokens,
+                )
+                action = validate_hero_action(response.action.model_dump(mode="json", exclude_none=True))
+                if isinstance(action, ActAction):
+                    parsed_command = parse_cli_command(action.command)
+                    if not parsed_command.valid or parsed_command.normalized_command is None:
+                        raise ValueError(parsed_command.error or "Invalid strict CLI command.")
+                    action = ActAction(command=parsed_command.normalized_command)
+                self.trace_events.append(
+                    HeroTraceEvent(
+                        turn_index=len(self.trace_events),
+                        observation=observation.message,
+                        scratchpad=scratchpad,
+                        state=state.model_dump(mode="json"),
+                        action=action.model_dump(mode="json"),
+                        repair_count=attempt,
+                    )
+                )
+                self.last_error = None
+                return action
+            except Exception as exc:
+                repair_error = self._normalize_error(exc)
+                if attempt >= self.max_repair_attempts:
+                    self.last_error = repair_error
+                    self.trace_events.append(
+                        HeroTraceEvent(
+                            turn_index=len(self.trace_events),
+                            observation=observation.message,
+                            scratchpad=scratchpad,
+                            state=state.model_dump(mode="json"),
+                            repair_count=attempt,
+                            validation_error=repair_error,
+                        )
+                    )
+                    raise HeroPolicyError(repair_error) from exc
+        raise HeroPolicyError("Hero policy failed without a usable action.")
+
+    def _messages(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+        repair_error: str | None,
+    ) -> list[ModelMessage]:
+        user_prompt = format_hero_turn_prompt(observation.message, state, scratchpad)
+        if repair_error is not None:
+            user_prompt += (
+                "\nThe previous response did not match the action schema.\n"
+                f"Validation error: {repair_error}\n"
+                "Return one corrected action only.\n"
+            )
+        return [
+            ModelMessage(
+                role="system",
+                content=format_hero_system_prompt(
+                    state.world_title,
+                    state.max_game_steps,
+                    state.max_tool_calls,
+                ),
+            ),
+            ModelMessage(role="user", content=user_prompt),
+        ]
+
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        return " ".join(str(exc).split()) or exc.__class__.__name__
diff --git a/agents/hero/prompt.py b/agents/hero/prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f67ff913c3c22805bf717cf28497ef8ad910602
--- /dev/null
+++ b/agents/hero/prompt.py
@@ -0,0 +1,134 @@
+from __future__ import annotations
+
+from .schema import HeroState
+
+
+HERO_SYSTEM_PROMPT = """You are the hero exploring a living dungeon.
+
+You can only act through tools.
+
+Rules:
+- Use `act` for any in-world action with one strict parser-style CLI command.
+- Use `scratchpad_read` and `scratchpad_write` to manage your own notebook.
+- Track rooms, objects, clues, hypotheses, and failed attempts in the notebook.
+- Do not assume the world is fair in obvious ways; verify.
+- Do not expect command hints from the environment. Use `look` and `inventory` when needed.
+- Prefer systematic play: open visible containers and doors, take portable items, read text, talk to NPCs, and backtrack when blocked.
+- When a puzzle reveals a clue, record it immediately.
+- Do not submit an answer until you have enough evidence and the guardian is ready.
+- Winning requires gathering evidence and then answering the guardian correctly.
+- Keep your notebook concise and update it when the world changes.
+- Commands must be lowercase only, with no articles, no markdown, and no conversational text.
+- Allowed command grammar:
+  look
+  inventory
+  wait
+  north|south|east|west|up|down|in|out
+  go north|go south|go east|go west|go up|go down|go in|go out
+  open <object>
+  read <object>
+  talk <npc>
+  examine <object>
+  look in <object>
+  take <item>
+  take <item> from <container>
+  unlock <door> with <key>
+  use <item> on <target>
+  combine <item_a> with <item_b>
+  give <item> to <npc>
+  submit <answer>
+- Example valid commands:
+  open entry chest
+  take brass key from entry chest
+  unlock iron door with brass key
+  east
+  use torch on ash mural
+  talk stone guardian
+  submit mira
+- Return JSON only. Never add prose, markdown fences, or explanations.
+- Valid response shapes:
+  {"action":{"tool":"act","command":"look"}}
+  {"action":{"tool":"scratchpad_read"}}
+  {"action":{"tool":"scratchpad_write","mode":"append","content":"room notes"}}
+"""
+
+HERO_GRPO_SYSTEM_PROMPT = """You are the hero exploring a living dungeon.
+
+You can only act through tool calls.
+
+Rules:
+- Call exactly one tool for each turn.
+- Use `act` for any in-world action with one strict parser-style CLI command.
+- Use `scratchpad_read` and `scratchpad_write` to manage your own notebook.
+- Track rooms, objects, clues, hypotheses, and failed attempts in the notebook.
+- Do not assume the world is fair in obvious ways; verify.
+- Do not expect command hints from the environment. Use `look` and `inventory` when needed.
+- Prefer systematic play: open visible containers and doors, take portable items, read text, talk to NPCs, and backtrack when blocked.
+- When a puzzle reveals a clue, record it immediately.
+- Do not submit an answer until you have enough evidence and the guardian is ready.
+- Winning requires gathering evidence and then answering the guardian correctly.
+- Keep your notebook concise and update it when the world changes.
+- Commands must be lowercase only, with no articles, no markdown, and no conversational text.
+- Allowed command grammar:
+  look
+  inventory
+  wait
+  north|south|east|west|up|down|in|out
+  go north|go south|go east|go west|go up|go down|go in|go out
+  open <object>
+  read <object>
+  talk <npc>
+  examine <object>
+  look in <object>
+  take <item>
+  take <item> from <container>
+  unlock <door> with <key>
+  use <item> on <target>
+  combine <item_a> with <item_b>
+  give <item> to <npc>
+  submit <answer>
+- Example valid commands:
+  open entry chest
+  take brass key from entry chest
+  unlock iron door with brass key
+  east
+  use torch on ash mural
+  talk stone guardian
+  submit mira
+- Do not write prose, plans, or plain JSON action objects.
+- The runtime provides the tool schema; emit a tool call only.
+"""
+
+
+def format_hero_system_prompt(world_title: str, max_game_steps: int, max_tool_calls: int) -> str:
+    return (
+        f"{HERO_SYSTEM_PROMPT}\n\n"
+        f"World: {world_title}\n"
+        f"Game-step budget: {max_game_steps}\n"
+        f"Total tool-call budget: {max_tool_calls}\n"
+    )
+
+
+def format_hero_grpo_system_prompt(world_title: str, max_game_steps: int, max_tool_calls: int) -> str:
+    return (
+        f"{HERO_GRPO_SYSTEM_PROMPT}\n\n"
+        f"World: {world_title}\n"
+        f"Game-step budget: {max_game_steps}\n"
+        f"Total tool-call budget: {max_tool_calls}\n"
+    )
+
+
+def format_hero_turn_prompt(message: str, state: HeroState, scratchpad: str) -> str:
+    notebook = scratchpad if scratchpad else "<empty>"
+    return (
+        "Choose exactly one next tool call.\n"
+        f"Observation:\n{message.strip() or '<empty>'}\n\n"
+        f"World: {state.world_title}\n"
+        f"Status: {state.status}\n"
+        f"Game steps taken: {state.game_steps_taken}/{state.max_game_steps}\n"
+        f"Tool calls used: {state.tool_calls_total}/{state.max_tool_calls}\n"
+        f"Game steps remaining: {state.game_steps_remaining}\n"
+        f"Tool calls remaining: {state.tool_calls_remaining}\n"
+        f"Last command: {state.last_command or '<none>'}\n\n"
+        f"Scratchpad:\n{notebook}\n"
+    )
diff --git a/agents/hero/runner.py b/agents/hero/runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..d01cc0076a68ee52abc713d0c8ab6107834a49e5
--- /dev/null
+++ b/agents/hero/runner.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Protocol
+
+from agents.master.session import EpisodeSession
+
+from .env import HeroEnvironment
+from .policy import HeroPolicyError
+from .schema import HeroAction, HeroEpisodeStats, HeroObservation, HeroState
+
+
+class ToolCallingPolicy(Protocol):
+    def reset(self) -> None:
+        ...
+
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction | dict[str, object] | None:
+        ...
+
+
+class ScriptedToolCallingPolicy:
+    def __init__(self, actions: Iterable[HeroAction | dict[str, object]]) -> None:
+        self._initial_actions = list(actions)
+        self._remaining_actions = list(self._initial_actions)
+
+    def reset(self) -> None:
+        self._remaining_actions = list(self._initial_actions)
+
+    def next_action(
+        self,
+        observation: HeroObservation,
+        state: HeroState,
+        scratchpad: str,
+    ) -> HeroAction | dict[str, object] | None:
+        del observation, state, scratchpad
+        if not self._remaining_actions:
+            return None
+        return self._remaining_actions.pop(0)
+
+
+class HeroRunner:
+    def __init__(
+        self,
+        policy: ToolCallingPolicy,
+        *,
+        max_game_steps: int | None = 40,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int = 8000,
+        debug: bool = False,
+    ) -> None:
+        self.policy = policy
+        self.max_game_steps = max_game_steps
+        self.max_tool_calls = max_tool_calls
+        self.scratchpad_max_chars = scratchpad_max_chars
+        self.debug = debug
+        self.last_error: str | None = None
+        self.last_observation: HeroObservation | None = None
+        self.episode_stats: HeroEpisodeStats | None = None
+
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        self.last_error = None
+        self.last_observation = None
+        self.episode_stats = None
+        self.policy.reset()
+        env = HeroEnvironment.from_session(
+            session,
+            max_game_steps=max_steps if self.max_game_steps is None else min(max_steps, self.max_game_steps),
+            max_tool_calls=self.max_tool_calls,
+            scratchpad_max_chars=self.scratchpad_max_chars,
+            debug=self.debug,
+        )
+        observation = env.reset()
+        self.last_observation = observation
+        while not observation.done:
+            try:
+                action = self.policy.next_action(observation, env.state, env.scratchpad)
+            except HeroPolicyError as exc:
+                self.last_error = str(exc)
+                self.episode_stats = env.episode_stats
+                return
+            if action is None:
+                self.episode_stats = env.episode_stats
+                return
+            result = env.step(action)
+            observation = result.observation
+            self.last_observation = observation
+        self.episode_stats = env.episode_stats
diff --git a/agents/hero/schema.py b/agents/hero/schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc7b48585bd1a31e6cd0d19afe44fab2bbf3642a
--- /dev/null
+++ b/agents/hero/schema.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+from typing import Any
+from typing import Annotated, Literal, TypeAlias
+
+from pydantic import Field, TypeAdapter
+
+from agents.shared.openenv_compat import Action, Observation, State
+from agents.shared.model_schema import StrictModel
+
+
+class ActAction(Action):
+    tool: Literal["act"] = "act"
+    command: str
+
+
+class ScratchpadReadAction(Action):
+    tool: Literal["scratchpad_read"] = "scratchpad_read"
+
+
+class ScratchpadWriteAction(Action):
+    tool: Literal["scratchpad_write"] = "scratchpad_write"
+    mode: Literal["append", "replace"]
+    content: str
+
+
+class HeroServerAction(Action):
+    tool: Literal["act", "scratchpad_read", "scratchpad_write"]
+    command: str | None = None
+    mode: Literal["append", "replace"] | None = None
+    content: str | None = None
+
+
+HeroAction: TypeAlias = Annotated[
+    ActAction | ScratchpadReadAction | ScratchpadWriteAction,
+    Field(discriminator="tool"),
+]
+
+HERO_ACTION_ADAPTER = TypeAdapter(HeroAction)
+
+
+def validate_hero_action(value: HeroAction | HeroServerAction | dict[str, Any]) -> HeroAction:
+    if isinstance(value, Action):
+        value = value.model_dump(mode="json", exclude_none=True)
+    return HERO_ACTION_ADAPTER.validate_python(value)
+
+
+class HeroObservation(Observation):
+    message: str = ""
+    won: bool | None = None
+    tool: str | None = None
+    tool_success: bool | None = None
+    terminal_reason: str | None = None
+    reward_breakdown: "HeroRewardBreakdown | None" = None
+    aux_signals: "HeroAuxSignals | None" = None
+
+
+class HeroAuxSignals(StrictModel):
+    visited_room_progress: float = 0.0
+    clue_progress: float = 0.0
+    locked_gate_progress: float = 0.0
+    trade_progress: float = 0.0
+    recipe_progress: float = 0.0
+    use_effect_progress: float = 0.0
+    guardian_consulted_progress: float = 0.0
+    answer_ready_progress: float = 0.0
+
+
+class HeroRewardBreakdown(StrictModel):
+    base_terminal_reward: float = 0.0
+    dense_progress_reward: float = 0.0
+    syntax_penalty: float = 0.0
+    invalid_action_penalty: float = 0.0
+    repeat_noop_penalty: float = 0.0
+    wrong_submit_penalty: float = 0.0
+    total_reward: float = 0.0
+    progress_potential_before: float = 0.0
+    progress_potential_after: float = 0.0
+
+
+class HeroEpisodeStats(StrictModel):
+    player_won: bool = False
+    total_reward: float = 0.0
+    dense_return: float = 0.0
+    syntax_penalty_total: float = 0.0
+    invalid_action_penalty_total: float = 0.0
+    repeat_noop_penalty_total: float = 0.0
+    wrong_submit_penalty_total: float = 0.0
+    steps_taken: int = 0
+    tool_calls_total: int = 0
+
+
+class HeroState(State):
+    game_steps_taken: int = 0
+    tool_calls_total: int = 0
+    max_game_steps: int = 0
+    max_tool_calls: int = 0
+    game_steps_remaining: int = 0
+    tool_calls_remaining: int = 0
+    status: Literal["ready", "running", "won", "lost", "timed_out", "error"] = "ready"
+    world_title: str = ""
+    last_command: str | None = None
+    scratchpad_chars: int = 0
diff --git a/agents/loop/__init__.py b/agents/loop/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8590edb0e43d527fc4992adfbb00c923edec962
--- /dev/null
+++ b/agents/loop/__init__.py
@@ -0,0 +1,11 @@
+"""Closed-loop orchestration for hero and dungeon master policies."""
+
+from .runner import ClosedLoopRunner
+from .schema import ClosedLoopEpisodeArtifacts, ClosedLoopEpisodeRecord, ClosedLoopEpisodeSummary
+
+__all__ = [
+    "ClosedLoopEpisodeArtifacts",
+    "ClosedLoopEpisodeRecord",
+    "ClosedLoopEpisodeSummary",
+    "ClosedLoopRunner",
+]
diff --git a/agents/loop/__main__.py b/agents/loop/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b0691f6f2d6ff840893c9cc4f2b83a9bd107634
--- /dev/null
+++ b/agents/loop/__main__.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+from agents.hero.policy import HeroLLMPolicy
+from agents.master.interface import DEFAULT_GEMINI_MODEL
+from agents.master.env import DMEnvironment
+from agents.master.policy import DungeonMasterLLMPolicy
+from agents.shared.runtime import (
+    build_interface_adapter,
+    create_structured_client,
+    resolve_interface_config,
+    resolve_structured_client_config,
+)
+
+from .runner import ClosedLoopRunner
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Closed-loop dungeon master and hero harness")
+    parser.add_argument("--episodes", type=int, default=1)
+    parser.add_argument("--seed", type=int)
+    parser.add_argument("--target-ratio", type=float)
+    parser.add_argument("--dm-provider", choices=["gemini", "hf_local"])
+    parser.add_argument("--dm-model")
+    parser.add_argument("--dm-adapter-path")
+    parser.add_argument("--hero-provider", choices=["gemini", "hf_local"])
+    parser.add_argument("--hero-model")
+    parser.add_argument("--hero-adapter-path")
+    parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    parser.add_argument("--interface-model", default=DEFAULT_GEMINI_MODEL)
+    parser.add_argument("--interface-narrate", action="store_true")
+    parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    parser.add_argument("--artifacts-root", type=Path)
+    parser.add_argument("--dm-artifacts-root", type=Path)
+    parser.add_argument("--dm-repair-attempts", type=int, default=2)
+    parser.add_argument("--hero-max-game-steps", type=int, default=40)
+    parser.add_argument("--hero-max-tool-calls", type=int, default=80)
+    parser.add_argument("--live", action="store_true")
+    parser.add_argument("--live-dir", type=Path)
+    args = parser.parse_args(argv)
+
+    dm_config = resolve_structured_client_config(
+        "dm",
+        provider=args.dm_provider,
+        model_name=args.dm_model,
+        adapter_path=args.dm_adapter_path,
+    )
+    hero_config = resolve_structured_client_config(
+        "hero",
+        provider=args.hero_provider,
+        model_name=args.hero_model,
+        adapter_path=args.hero_adapter_path,
+    )
+    interface_config = resolve_interface_config(
+        provider=args.interface_provider,
+        model_name=args.interface_model,
+        narrate_observations=args.interface_narrate,
+        translation_mode="corporate_app" if args.translate_corporate_env else None,
+    )
+    runner = ClosedLoopRunner(
+        dm_env=DMEnvironment(artifacts_root=args.dm_artifacts_root),
+        dm_policy=DungeonMasterLLMPolicy(create_structured_client(dm_config), model_name=dm_config.model_name),
+        hero_policy=HeroLLMPolicy(create_structured_client(hero_config), model_name=hero_config.model_name),
+        artifacts_root=args.artifacts_root,
+        live_dir=args.live_dir,
+        max_dm_repair_attempts=args.dm_repair_attempts,
+        hero_runner_kwargs={
+            "max_game_steps": args.hero_max_game_steps,
+            "max_tool_calls": args.hero_max_tool_calls,
+        },
+        hero_interface_adapter=build_interface_adapter(interface_config),
+    )
+    records = []
+    for index in range(args.episodes):
+        seed = None if args.seed is None else args.seed + index
+        record = runner.run_episode(seed=seed, target_ratio=args.target_ratio, live=args.live)
+        records.append(record)
+        print(json.dumps(ClosedLoopRunner.summary(record).model_dump(mode="json")))
+    if records:
+        print(json.dumps(ClosedLoopRunner.aggregate(records).model_dump(mode="json")))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/loop/runner.py b/agents/loop/runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b24c876d898a44aa82bbab385931e6cc9b95e64
--- /dev/null
+++ b/agents/loop/runner.py
@@ -0,0 +1,253 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from agents.hero.policy import HeroPolicy
+from agents.hero.runner import HeroRunner
+from agents.master.env import DMEnvironment
+from agents.master.interface import InterfaceAdapter, StrictCliInterfaceAdapter
+from agents.master.policy import DMRepairContext, DungeonMasterPolicy, DungeonMasterPolicyError
+from agents.master.schema import DMObservation, DMRewardBreakdown, WorldDefinition
+from agents.master.snapshots import LiveObserver, LiveSnapshotWriter
+
+from .schema import (
+    ClosedLoopAggregateReport,
+    ClosedLoopEpisodeArtifacts,
+    ClosedLoopEpisodeRecord,
+    ClosedLoopEpisodeSummary,
+)
+
+DEFAULT_CLOSED_LOOP_ROOT = Path(__file__).resolve().parents[2] / ".play_runs" / "closed_loop"
+
+
+class ClosedLoopRunner:
+    def __init__(
+        self,
+        *,
+        dm_env: DMEnvironment,
+        dm_policy: DungeonMasterPolicy,
+        hero_policy: HeroPolicy,
+        artifacts_root: Path | None = None,
+        live_dir: Path | None = None,
+        max_dm_repair_attempts: int = 2,
+        hero_runner_kwargs: dict[str, object] | None = None,
+        hero_interface_adapter: InterfaceAdapter | None = None,
+    ) -> None:
+        self.dm_env = dm_env
+        self.dm_policy = dm_policy
+        self.hero_policy = hero_policy
+        self.artifacts_root = artifacts_root or DEFAULT_CLOSED_LOOP_ROOT
+        self.live_dir = live_dir
+        self.max_dm_repair_attempts = max_dm_repair_attempts
+        self.hero_runner_kwargs = hero_runner_kwargs or {"max_game_steps": 40, "max_tool_calls": 80}
+        self.hero_interface_adapter = hero_interface_adapter or StrictCliInterfaceAdapter()
+
+    def run_episode(
+        self,
+        *,
+        seed: int | None = None,
+        target_ratio: float | None = None,
+        live: bool = False,
+    ) -> ClosedLoopEpisodeRecord:
+        self.dm_env.reset(seed=seed, difficulty_hint=target_ratio)
+        episode_id = self.dm_env.state.episode_id
+        if episode_id is None:
+            raise RuntimeError("DM environment did not assign an episode id.")
+        episode_dir = self.artifacts_root / episode_id
+        episode_dir.mkdir(parents=True, exist_ok=True)
+        artifacts = ClosedLoopEpisodeArtifacts.from_episode_dir(episode_dir)
+        observer = self._observer(live)
+
+        world: WorldDefinition | None = None
+        errors: list[str] = []
+        compile_attempts = 0
+        repair_context: DMRepairContext | None = None
+        previous_candidate_json: str | None = None
+        attempt_rows: list[dict[str, object]] = []
+
+        for attempt in range(1, self.max_dm_repair_attempts + 2):
+            compile_attempts = attempt
+            try:
+                candidate = self.dm_policy.generate_world(
+                    target_ratio=self.dm_env.state.target_ratio,
+                    repair_context=repair_context,
+                )
+                previous_candidate_json = candidate.model_dump_json(indent=2)
+                self._write_json(Path(artifacts.world_definition_path), previous_candidate_json)
+                self.dm_env.compile_world(candidate, episode_id=episode_id)
+                world = candidate
+                attempt_rows.append(
+                    {
+                        "attempt_number": attempt,
+                        "status": "compiled",
+                        "world_title": candidate.meta.title,
+                        "difficulty_target": candidate.meta.difficulty_target,
+                    }
+                )
+                break
+            except Exception as exc:
+                normalized_error = self._normalize_error(exc)
+                errors.append(normalized_error)
+                attempt_rows.append(
+                    {
+                        "attempt_number": attempt,
+                        "status": "failed",
+                        "error": normalized_error,
+                    }
+                )
+                repair_context = DMRepairContext(
+                    attempt_number=attempt,
+                    error_message=normalized_error,
+                    previous_candidate_json=previous_candidate_json,
+                )
+
+        self._write_jsonl(Path(artifacts.world_generation_attempts_path), attempt_rows)
+
+        if world is None:
+            observation = self._compile_failure_observation(errors[-1] if errors else "world compilation failed")
+            record = ClosedLoopEpisodeRecord(
+                episode_id=episode_id,
+                status="compile_failed",
+                target_ratio=self.dm_env.state.target_ratio,
+                compile_attempts=compile_attempts,
+                dm_repair_errors=errors,
+                world_definition=None,
+                declared_difficulty_target=None,
+                difficulty_target_matches_target_ratio=None,
+                observation=observation,
+                artifacts=artifacts,
+            )
+            self._persist_record(record)
+            self._write_jsonl(Path(artifacts.hero_trace_path), [])
+            self._write_jsonl(Path(artifacts.transcript_path), [])
+            return record
+
+        hero_runner = HeroRunner(policy=self.hero_policy, **self.hero_runner_kwargs)
+        previous_adapter = self.dm_env.interface_adapter
+        self.dm_env.interface_adapter = self.hero_interface_adapter
+        try:
+            result = self.dm_env.step(world, runner=hero_runner, observer=observer)
+        finally:
+            self.dm_env.interface_adapter = previous_adapter
+        observation = result.observation
+        status = "policy_error" if hero_runner.last_error else ("complete" if observation.player_won else "failed")
+        record = ClosedLoopEpisodeRecord(
+            episode_id=episode_id,
+            status=status,
+            target_ratio=self.dm_env.state.target_ratio,
+            compile_attempts=compile_attempts,
+            dm_repair_errors=errors,
+            hero_policy_error=hero_runner.last_error,
+            hero_episode_stats=hero_runner.episode_stats,
+            world_definition=world,
+            declared_difficulty_target=world.meta.difficulty_target,
+            difficulty_target_matches_target_ratio=(world.meta.difficulty_target == self.dm_env.state.target_ratio),
+            observation=observation,
+            artifacts=artifacts,
+        )
+        self._persist_record(record)
+        self._write_jsonl(
+            Path(artifacts.hero_trace_path),
+            [event.model_dump(mode="json") for event in self.hero_policy.trace_events],
+        )
+        self._write_jsonl(
+            Path(artifacts.transcript_path),
+            [turn.model_dump(mode="json") for turn in observation.episode_transcript],
+        )
+        return record
+
+    @staticmethod
+    def summary(record: ClosedLoopEpisodeRecord) -> ClosedLoopEpisodeSummary:
+        return ClosedLoopEpisodeSummary(
+            episode_id=record.episode_id,
+            status=record.status,
+            reward=record.observation.reward,
+            player_won=record.observation.player_won,
+            ratio=record.observation.ratio,
+            compile_error=record.observation.compile_error,
+            hero_policy_error=record.hero_policy_error,
+        )
+
+    @staticmethod
+    def aggregate(records: list[ClosedLoopEpisodeRecord]) -> ClosedLoopAggregateReport:
+        episodes = len(records)
+        dense_returns = [
+            record.hero_episode_stats.dense_return
+            for record in records
+            if record.hero_episode_stats is not None
+        ]
+        invalid_penalties = [
+            record.hero_episode_stats.invalid_action_penalty_total
+            for record in records
+            if record.hero_episode_stats is not None
+        ]
+        repeat_penalties = [
+            record.hero_episode_stats.repeat_noop_penalty_total
+            for record in records
+            if record.hero_episode_stats is not None
+        ]
+        return ClosedLoopAggregateReport(
+            episodes=episodes,
+            compile_valid_rate=_rate(sum(record.status != "compile_failed" for record in records), episodes),
+            policy_error_rate=_rate(sum(record.status == "policy_error" for record in records), episodes),
+            playable_rate=_rate(sum(record.world_definition is not None for record in records), episodes),
+            solve_rate=_rate(sum(record.status == "complete" for record in records), episodes),
+            mean_dense_return=_mean(dense_returns),
+            mean_invalid_action_penalty=_mean(invalid_penalties),
+            mean_repeat_noop_penalty=_mean(repeat_penalties),
+        )
+
+    def _compile_failure_observation(self, error: str) -> DMObservation:
+        breakdown = DMRewardBreakdown(
+            reward_mode="compile_failure_penalty",
+            player_won=False,
+            target_ratio=self.dm_env.state.target_ratio,
+            quality_score=0.0,
+            reward=0.0,
+        )
+        return DMObservation(
+            player_won=False,
+            compile_error=error,
+            reward=0.0,
+            done=True,
+            reward_breakdown=breakdown,
+            target_ratio_used=self.dm_env.state.target_ratio,
+        )
+
+    def _observer(self, live: bool) -> LiveObserver | None:
+        if not live:
+            return None
+        return LiveSnapshotWriter(live_dir=self.live_dir, runner_name="hero_llm")
+
+    def _persist_record(self, record: ClosedLoopEpisodeRecord) -> None:
+        self._write_json(Path(record.artifacts.run_record_path), record.model_dump_json(indent=2))
+
+    @staticmethod
+    def _write_json(path: Path, payload: str) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(payload + "\n", encoding="utf-8")
+
+    @staticmethod
+    def _write_jsonl(path: Path, rows: list[dict[str, object]]) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        payload = "".join(json.dumps(row) + "\n" for row in rows)
+        path.write_text(payload, encoding="utf-8")
+
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        if isinstance(exc, DungeonMasterPolicyError):
+            return str(exc)
+        return " ".join(str(exc).split()) or exc.__class__.__name__
+
+
+def _mean(values: list[float]) -> float:
+    if not values:
+        return 0.0
+    return sum(values) / len(values)
+
+
+def _rate(count: int, total: int) -> float:
+    if total <= 0:
+        return 0.0
+    return count / total
diff --git a/agents/loop/schema.py b/agents/loop/schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd2034ee3845166229d5b3e36e735da19148381e
--- /dev/null
+++ b/agents/loop/schema.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Literal
+
+from agents.hero.schema import HeroEpisodeStats
+from agents.master.schema import DMObservation, WorldDefinition
+from agents.shared.model_schema import StrictModel
+
+
+class ClosedLoopEpisodeArtifacts(StrictModel):
+    episode_dir: str
+    world_generation_attempts_path: str
+    world_definition_path: str
+    run_record_path: str
+    hero_trace_path: str
+    transcript_path: str
+
+    @classmethod
+    def from_episode_dir(cls, episode_dir: Path) -> "ClosedLoopEpisodeArtifacts":
+        return cls(
+            episode_dir=str(episode_dir),
+            world_generation_attempts_path=str(episode_dir / "world_generation_attempts.jsonl"),
+            world_definition_path=str(episode_dir / "world_definition.json"),
+            run_record_path=str(episode_dir / "run_record.json"),
+            hero_trace_path=str(episode_dir / "hero_trace.jsonl"),
+            transcript_path=str(episode_dir / "transcript.jsonl"),
+        )
+
+
+class ClosedLoopEpisodeRecord(StrictModel):
+    episode_id: str
+    status: Literal["complete", "failed", "compile_failed", "policy_error"]
+    target_ratio: float
+    compile_attempts: int
+    dm_repair_errors: list[str]
+    hero_policy_error: str | None = None
+    hero_episode_stats: HeroEpisodeStats | None = None
+    declared_difficulty_target: float | None = None
+    difficulty_target_matches_target_ratio: bool | None = None
+    world_definition: WorldDefinition | None = None
+    observation: DMObservation
+    artifacts: ClosedLoopEpisodeArtifacts
+
+
+class ClosedLoopEpisodeSummary(StrictModel):
+    episode_id: str
+    status: str
+    reward: float | None = None
+    player_won: bool | None = None
+    ratio: float | None = None
+    compile_error: str | None = None
+    hero_policy_error: str | None = None
+
+
+class ClosedLoopAggregateReport(StrictModel):
+    episodes: int
+    compile_valid_rate: float
+    policy_error_rate: float
+    playable_rate: float
+    solve_rate: float
+    mean_dense_return: float
+    mean_invalid_action_penalty: float
+    mean_repeat_noop_penalty: float
diff --git a/agents/master/__init__.py b/agents/master/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..68d915f1860a5810fe19b630f2dd7c09b9214715
--- /dev/null
+++ b/agents/master/__init__.py
@@ -0,0 +1,15 @@
+"""DM environment source package."""
+
+from .policy import (
+    DMRepairContext,
+    DungeonMasterLLMPolicy,
+    DungeonMasterPolicy,
+    DungeonMasterPolicyError,
+)
+
+__all__ = [
+    "DMRepairContext",
+    "DungeonMasterLLMPolicy",
+    "DungeonMasterPolicy",
+    "DungeonMasterPolicyError",
+]
diff --git a/agents/master/__main__.py b/agents/master/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fb3861e6c1b330e1930ed00632a66d1ec669e4d
--- /dev/null
+++ b/agents/master/__main__.py
@@ -0,0 +1,5 @@
+from .main import main
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/master/base.py b/agents/master/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..11399be85b6d4e77eae7e63fa9d78f465a50d000
--- /dev/null
+++ b/agents/master/base.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+import re
+import warnings
+from pathlib import Path
+
+
+MAX_NODES = 40
+MAX_ITEMS = 32
+MAX_QUEST_STEPS = 64
+MIN_NODES = 5
+MIN_QUEST_STEPS = 2
+MIN_CLUES = 3
+MAX_CLUES = 5
+TARGET_RATIO = 1.5
+TARGET_RATIO_SIGMA = 0.4
+MAX_STEP_MULTIPLIER = 5
+INVENTORY_ID = "__inventory__"
+STORED_ID = "__stored__"
+ROOT_DIR = Path(__file__).resolve().parents[2]
+ARTIFACTS_ROOT = ROOT_DIR / ".artifacts" / "dm_env"
+CUSTOM_LOGIC_DIR = ROOT_DIR / "textworld_data" / "dnd" / "logic"
+CUSTOM_GRAMMAR_DIR = ROOT_DIR / "textworld_data" / "dnd" / "text_grammars"
+SUPPORTED_DIRECTIONS = ("north", "south", "east", "west", "up", "down", "in", "out")
+OPPOSITE_DIRECTION = {
+    "north": "south",
+    "south": "north",
+    "east": "west",
+    "west": "east",
+    "up": "down",
+    "down": "up",
+    "in": "out",
+    "out": "in",
+}
+
+GO_RE = re.compile(r"^go\((?P<target>[a-z0-9_]+)\)$")
+OPEN_RE = re.compile(r"^open\((?P<target>[a-z0-9_]+)\)$")
+UNLOCK_RE = re.compile(r"^unlock\((?P<door>[a-z0-9_]+),(?P<key>[a-z0-9_]+)\)$")
+TAKE_RE = re.compile(r"^take\((?P<item>[a-z0-9_]+),(?P<source>[a-z0-9_]+)\)$")
+READ_RE = re.compile(r"^read\((?P<target>[a-z0-9_]+)\)$")
+USE_RE = re.compile(r"^use\((?P<item>[a-z0-9_]+),(?P<target>[a-z0-9_]+)\)$")
+COMBINE_RE = re.compile(r"^combine\((?P<item_a>[a-z0-9_]+),(?P<item_b>[a-z0-9_]+)\)$")
+GIVE_RE = re.compile(r"^give\((?P<item>[a-z0-9_]+),(?P<npc>[a-z0-9_]+)\)$")
+TALK_RE = re.compile(r"^talk\((?P<target>[a-z0-9_]+)\)$")
+SUBMIT_RE = re.compile(r"^submit\((?P<quote>[\"'])(?P<answer>.+)(?P=quote)\)$")
+
+
+class DMCompileError(RuntimeError):
+    pass
+
+
+class DMInterfaceError(RuntimeError):
+    pass
+
+
+@contextmanager
+def suppress_unsupported_game_warning():
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            message=r"Game '.*' is not fully supported\..*",
+            category=Warning,
+        )
+        yield
+
+
+def normalize_snake_id(value: str, kind: str) -> str:
+    if not re.fullmatch(r"[a-z][a-z0-9_]*", value):
+        raise DMCompileError(f"{kind} '{value}' must be snake_case.")
+    return value
+
+
+def parser_safe_text(value: str) -> str:
+    collapsed = re.sub(r"[^A-Za-z0-9 ]+", " ", value).strip().lower()
+    collapsed = re.sub(r"\s+", " ", collapsed)
+    if not collapsed:
+        raise DMCompileError(f"Unable to derive a parser-safe name from '{value}'.")
+    return collapsed
+
+
+def normalize_answer_text(value: str) -> str:
+    collapsed = re.sub(r"[^A-Za-z0-9 ]+", " ", value).strip().lower()
+    return re.sub(r"\s+", " ", collapsed)
diff --git a/agents/master/build.py b/agents/master/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..b645ea9ba889543aa0f6f87ea2c626200bb87621
--- /dev/null
+++ b/agents/master/build.py
@@ -0,0 +1,287 @@
+from __future__ import annotations
+
+import uuid
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+from textworld.generator import GameMaker, GameOptions, compile_game
+from textworld.generator.data import KnowledgeBase
+
+from .base import ARTIFACTS_ROOT, DMCompileError, parser_safe_text
+from .check import validate_and_normalize
+from .graph import (
+    door_room_mapping,
+    hidden_readable_ids,
+    npc_trade_mapping,
+    produced_item_ids,
+    readable_clue_mapping,
+    recipe_mapping,
+    use_effect_mapping,
+)
+from .logic import build_grammar_dir, build_logic_dir, solver_policy, submit_command_text, write_artifacts
+from .quest import parse_quest_action, simulate_walkthrough, topological_linearize
+from .schema import CompiledWorld, WorldDefinition
+
+
+class WorldCompiler:
+    def __init__(self, artifacts_root: Path | None = None) -> None:
+        self.artifacts_root = artifacts_root or ARTIFACTS_ROOT
+
+    def compile(self, world_input: WorldDefinition | dict[str, Any], episode_id: str | None = None) -> CompiledWorld:
+        world = validate_and_normalize(world_input)
+        episode_id = episode_id or uuid.uuid4().hex[:12]
+        artifacts_dir = self.artifacts_root / episode_id
+        artifacts_dir.mkdir(parents=True, exist_ok=True)
+        parsed_steps = [parse_quest_action(step.action) for step in topological_linearize(world.quest_chain)]
+        entity_names = self._assign_command_names(world)
+
+        options = GameOptions()
+        options.kb = KnowledgeBase.load(
+            logic_path=str(build_logic_dir(artifacts_dir, world)),
+            grammar_path=str(build_grammar_dir(artifacts_dir)),
+        )
+        options.path = str(artifacts_dir / "game.z8")
+        options.force_recompile = True
+        maker = GameMaker(options=options)
+
+        rooms, entities = self._build_entities(maker, world, entity_names)
+        maker.set_player(rooms[world.meta.start_node_id])
+        self._compile_edges(maker, world, rooms, entities)
+        self._compile_clue_sources(maker, world, entities)
+        self._compile_fixtures(maker, world, entities)
+        self._compile_npcs(maker, world, entities)
+        self._compile_recipes(maker, world, entities)
+
+        guardian = entities[world.meta.win_condition.target_npc_id]
+        answer = maker.new(type="answer", name="final answer token")
+        maker.nowhere.append(answer)
+        entities["__answer__"] = answer
+        maker.add_fact("guardian", guardian)
+        maker.add_fact("correct", answer, guardian)
+
+        walkthrough_commands = simulate_walkthrough(world, parsed_steps, entity_names)
+        game = maker.build()
+        game.objective = (
+            f"Explore {world.meta.title}, manipulate the dungeon's tools, gather every clue, "
+            f"speak to {entities[world.meta.win_condition.target_npc_id].name}, and submit the answer."
+        )
+        game.metadata.update(
+            {"episode_id": episode_id, "dm_title": world.meta.title, "start_node_id": world.meta.start_node_id}
+        )
+        compile_game(game, options)
+        write_artifacts(artifacts_dir, world, walkthrough_commands)
+        policy = solver_policy(str(options.path))
+        if not policy:
+            policy = list(walkthrough_commands)
+        return self._compiled_world(
+            episode_id,
+            artifacts_dir,
+            Path(options.path),
+            world,
+            entity_names,
+            walkthrough_commands,
+            policy,
+        )
+
+    def _build_entities(
+        self,
+        maker: GameMaker,
+        world: WorldDefinition,
+        entity_names: dict[str, str],
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        rooms = {
+            node.id: maker.new(type="r", name=entity_names[node.id], desc=node.description)
+            for node in world.nodes
+            if node.type in {"location", "junction"}
+        }
+        entities: dict[str, Any] = {}
+        hidden_readables = hidden_readable_ids(world)
+        recipe_outputs = {recipe.output_item_id for recipe in world.recipes}
+        produced_items = produced_item_ids(world)
+
+        for node in world.nodes:
+            if node.type in {"location", "junction"}:
+                continue
+            entity = self._make_node_entity(maker, node, entity_names[node.id])
+            entities[node.id] = entity
+            if node.type == "door":
+                maker.nowhere.append(entity)
+            elif node.type == "readable" and node.id in hidden_readables:
+                maker.nowhere.append(entity)
+                maker.add_fact("hidden_readable", entity)
+            else:
+                rooms[node.parent_id].add(entity)
+
+        for item in world.items:
+            item_type = "k" if item.subtype == "key" else "o"
+            entity = maker.new(type=item_type, name=entity_names[item.id], desc=item.description)
+            entities[item.id] = entity
+            if item.id in produced_items:
+                maker.nowhere.append(entity)
+                if item.id in recipe_outputs:
+                    maker.add_fact("fresh", entity)
+                else:
+                    maker.add_fact("stored_item", entity)
+                continue
+            holder = item.start_node_id
+            if holder is None:
+                raise DMCompileError(f"Placed item '{item.id}' is missing start_node_id.")
+            if holder in rooms:
+                rooms[holder].add(entity)
+            else:
+                entities[holder].add(entity)
+
+        return rooms, entities
+
+    @staticmethod
+    def _make_node_entity(maker: GameMaker, node: object, name: str) -> Any:
+        if node.type == "container":
+            entity = maker.new(type="c", name=name, desc=node.description)
+            entity.add_property("open" if node.open else "locked" if node.locked else "closed")
+            return entity
+        if node.type == "door":
+            entity = maker.new(type="d", name=name, desc=node.description)
+            entity.add_property("open" if node.open else "locked" if node.locked else "closed")
+            return entity
+        if node.type == "readable":
+            return maker.new(type="readable", name=name, desc=node.description)
+        if node.type == "fixture":
+            return maker.new(type="fixture", name=name, desc=node.description)
+        if node.type == "npc":
+            return maker.new(type="npc", name=name, desc=node.description)
+        raise DMCompileError(f"Unsupported node type '{node.type}'.")
+
+    def _compile_clue_sources(
+        self,
+        maker: GameMaker,
+        world: WorldDefinition,
+        entities: dict[str, Any],
+    ) -> None:
+        hidden_readables = hidden_readable_ids(world)
+        for node in world.nodes:
+            if node.type != "readable":
+                continue
+            readable = entities[node.id]
+            if node.requires_item_id:
+                maker.add_fact("read_requires", readable, entities[node.requires_item_id])
+                maker.add_fact("read_consumes_use" if node.consumes_item else "read_keeps_use", readable)
+            else:
+                maker.add_fact("free_read", readable)
+            if node.id in hidden_readables:
+                continue
+
+    def _compile_fixtures(self, maker: GameMaker, world: WorldDefinition, entities: dict[str, Any]) -> None:
+        for node in world.nodes:
+            if node.type != "fixture":
+                continue
+            fixture = entities[node.id]
+            maker.add_fact("fixture_requires", fixture, entities[node.requires_item_id])
+            maker.add_fact("sealed", fixture)
+            maker.add_fact("fixture_consumes_use" if node.consumes_item else "fixture_keeps_use", fixture)
+            if node.reveals_item_id:
+                maker.add_fact("reveals_item", fixture, entities[node.reveals_item_id])
+            if node.reveals_readable_id:
+                maker.add_fact("reveals_readable", fixture, entities[node.reveals_readable_id])
+
+    def _compile_npcs(
+        self,
+        maker: GameMaker,
+        world: WorldDefinition,
+        entities: dict[str, Any],
+    ) -> None:
+        guardian_id = world.meta.win_condition.target_npc_id
+        for node in world.nodes:
+            if node.type != "npc":
+                continue
+            npc = entities[node.id]
+            if node.id == guardian_id:
+                continue
+            maker.add_fact("trade_pending", npc)
+            maker.add_fact("trade_requires", npc, entities[node.requires_item_id])
+            if node.gives_item_id:
+                maker.add_fact("trade_gives_item", npc, entities[node.gives_item_id])
+            if node.gives_clue_id:
+                maker.add_fact("trade_gives_clue", npc)
+
+    def _compile_recipes(self, maker: GameMaker, world: WorldDefinition, entities: dict[str, Any]) -> None:
+        for recipe in world.recipes:
+            a_id, b_id = recipe.input_item_ids
+            output = entities[recipe.output_item_id]
+            maker.add_fact("combines_with", entities[a_id], entities[b_id], output)
+            maker.add_fact("combines_with", entities[b_id], entities[a_id], output)
+
+    @staticmethod
+    def _compile_edges(
+        maker: GameMaker,
+        world: WorldDefinition,
+        rooms: dict[str, Any],
+        entities: dict[str, Any],
+    ) -> None:
+        pair_groups: dict[frozenset[str], list[Any]] = defaultdict(list)
+        for edge in world.edges:
+            pair_groups.setdefault(frozenset({edge.from_node_id, edge.to_node_id}), []).append(edge)
+        for edges in pair_groups.values():
+            forward, backward = sorted(edges, key=lambda edge: edge.id)
+            for edge in (forward, backward):
+                maker.add_fact(f"{edge.direction}_of", rooms[edge.to_node_id], rooms[edge.from_node_id])
+            if forward.door_node_id:
+                door = entities[forward.door_node_id]
+                room_a = rooms[forward.from_node_id]
+                room_b = rooms[forward.to_node_id]
+                maker.add_fact("link", room_a, door, room_b)
+                maker.add_fact("link", room_b, door, room_a)
+                if forward.required_item_id:
+                    maker.add_fact("match", entities[forward.required_item_id], door)
+                door_is_open = door.has_property("open")
+                if door_is_open:
+                    maker.add_fact("free", room_a, room_b)
+                    maker.add_fact("free", room_b, room_a)
+            else:
+                maker.add_fact("free", rooms[forward.from_node_id], rooms[forward.to_node_id])
+                maker.add_fact("free", rooms[forward.to_node_id], rooms[forward.from_node_id])
+
+    def _compiled_world(
+        self,
+        episode_id: str,
+        artifacts_dir: Path,
+        game_file: Path,
+        world: WorldDefinition,
+        entity_names: dict[str, str],
+        walkthrough_commands: list[str],
+        policy: list[str],
+    ) -> CompiledWorld:
+        node_by_id = {node.id: node for node in world.nodes}
+        return CompiledWorld(
+            episode_id=episode_id,
+            world=world,
+            artifacts_dir=artifacts_dir,
+            game_file=game_file,
+            walkthrough_commands=walkthrough_commands,
+            solver_policy=policy,
+            correct_answer_normalized=submit_command_text(world).replace("submit ", "", 1),
+            correct_submit_command=submit_command_text(world),
+            guardian_id=world.meta.win_condition.target_npc_id,
+            guardian_room_id=node_by_id[world.meta.win_condition.target_npc_id].parent_id,
+            room_name_to_id={
+                entity_names[node.id]: node.id for node in world.nodes if node.type in {"location", "junction"}
+            },
+            node_command_names={node.id: entity_names[node.id] for node in world.nodes},
+            item_command_names={item.id: entity_names[item.id] for item in world.items},
+            item_start_locations={item.id: item.start_node_id for item in world.items},
+            clue_text_by_id={clue.id: clue.text for clue in world.clues},
+            readable_clue_by_id=readable_clue_mapping(world),
+            npc_trade_map=npc_trade_mapping(world),
+            recipe_map=recipe_mapping(world),
+            use_effects=use_effect_mapping(world),
+            produced_item_ids=produced_item_ids(world),
+            room_edges_by_target={(edge.from_node_id, edge.to_node_id): edge for edge in world.edges},
+            room_edges_by_direction={(edge.from_node_id, edge.direction): edge for edge in world.edges},
+            door_rooms=door_room_mapping(world),
+        )
+
+    @staticmethod
+    def _assign_command_names(world: WorldDefinition) -> dict[str, str]:
+        names = {node.id: parser_safe_text(node.label) for node in world.nodes}
+        names.update({item.id: parser_safe_text(item.label) for item in world.items})
+        return names
diff --git a/agents/master/check.py b/agents/master/check.py
new file mode 100644
index 0000000000000000000000000000000000000000..7392f40ac9b1d6b7bf53bb9083cae10e41f91f60
--- /dev/null
+++ b/agents/master/check.py
@@ -0,0 +1,435 @@
+from __future__ import annotations
+
+from collections import defaultdict, deque
+from typing import Any
+
+from pydantic import ValidationError
+
+from .base import (
+    DMCompileError,
+    MAX_CLUES,
+    MAX_ITEMS,
+    MAX_NODES,
+    MAX_QUEST_STEPS,
+    MIN_CLUES,
+    MIN_NODES,
+    MIN_QUEST_STEPS,
+    OPPOSITE_DIRECTION,
+    normalize_answer_text,
+    normalize_snake_id,
+    parser_safe_text,
+)
+from .graph import hidden_readable_ids, produced_item_ids
+from .quest import parse_quest_action, simulate_walkthrough, topological_linearize
+from .schema import (
+    CombineAction,
+    ContainerNode,
+    DoorNode,
+    GiveAction,
+    NpcNode,
+    ReadableNode,
+    SubmitAction,
+    TakeAction,
+    TalkAction,
+    UnlockAction,
+    UseAction,
+    WorldDefinition,
+)
+
+
+def validate_and_normalize(world_input: WorldDefinition | dict[str, Any]) -> WorldDefinition:
+    if isinstance(world_input, dict):
+        _reject_legacy_shapes(world_input)
+    try:
+        world = WorldDefinition.model_validate(world_input)
+    except ValidationError as exc:  # pragma: no cover - exercised indirectly in compile paths
+        raise DMCompileError(str(exc)) from exc
+    _validate_ids(world)
+    _validate_shape(world)
+    _validate_nodes(world)
+    _validate_edges(world)
+    _validate_items(world)
+    _validate_clues(world)
+    _validate_visibility(world)
+    _validate_answer_leaks(world)
+    _validate_guardian_path(world)
+    _validate_clue_gates(world)
+    _validate_item_usage(world)
+    _validate_quest_shape(world)
+    return world
+
+
+def infer_start_room(world: WorldDefinition) -> str:
+    return world.meta.start_node_id
+
+
+def _reject_legacy_shapes(world_input: dict[str, Any]) -> None:
+    for node in world_input.get("nodes", []):
+        if node.get("type") == "clue":
+            raise DMCompileError("Legacy clue nodes are not supported in v2. Use top-level clues[].")
+        if node.get("state", {}).get("npc_dialogue") is not None:
+            raise DMCompileError("Legacy npc_dialogue is not supported in v2.")
+    for edge in world_input.get("edges", []):
+        if edge.get("type") == "conditional_passage":
+            raise DMCompileError("conditional_passage is not supported in v2.")
+
+
+def _validate_ids(world: WorldDefinition) -> None:
+    global_ids: set[str] = set()
+    collections = {
+        "node": [node.id for node in world.nodes],
+        "item": [item.id for item in world.items],
+        "clue": [clue.id for clue in world.clues],
+        "recipe": [recipe.id for recipe in world.recipes],
+        "quest step": [step.step_id for step in world.quest_chain],
+    }
+    for kind, values in collections.items():
+        seen: set[str] = set()
+        for value in values:
+            normalize_snake_id(value, kind)
+            if value in seen:
+                raise DMCompileError(f"Duplicate {kind} id '{value}'.")
+            if value in global_ids:
+                raise DMCompileError(f"Duplicate world id '{value}' across collections.")
+            seen.add(value)
+            global_ids.add(value)
+
+
+def _validate_shape(world: WorldDefinition) -> None:
+    room_nodes = [node for node in world.nodes if node.type in {"location", "junction"}]
+    if len(world.nodes) < MIN_NODES:
+        raise DMCompileError(f"Worlds need at least {MIN_NODES} nodes.")
+    if len(world.nodes) > MAX_NODES:
+        raise DMCompileError(f"Worlds support at most {MAX_NODES} nodes.")
+    if len(world.items) > MAX_ITEMS:
+        raise DMCompileError(f"Worlds support at most {MAX_ITEMS} items.")
+    if len(world.clues) < MIN_CLUES or len(world.clues) > MAX_CLUES:
+        raise DMCompileError(f"Worlds must define between {MIN_CLUES} and {MAX_CLUES} clues.")
+    if len(world.quest_chain) < MIN_QUEST_STEPS or len(world.quest_chain) > MAX_QUEST_STEPS:
+        raise DMCompileError(f"quest_chain must contain between {MIN_QUEST_STEPS} and {MAX_QUEST_STEPS} steps.")
+    if world.meta.start_node_id not in {node.id for node in room_nodes}:
+        raise DMCompileError("meta.start_node_id must reference a location or junction.")
+    if world.meta.win_condition.type != "deduce":
+        raise DMCompileError("Only deduce win conditions are supported in v2.")
+    if not normalize_answer_text(world.meta.win_condition.answer_string):
+        raise DMCompileError("answer_string cannot normalize to an empty command.")
+
+
+def _validate_nodes(world: WorldDefinition) -> None:
+    node_by_id = {node.id: node for node in world.nodes}
+    item_ids = {item.id for item in world.items}
+    clue_ids = {clue.id for clue in world.clues}
+    hidden_readables = hidden_readable_ids(world)
+    guardian_id = world.meta.win_condition.target_npc_id
+
+    guardian_seen = False
+    for node in world.nodes:
+        if node.type in {"location", "junction"}:
+            continue
+        if node.type == "door":
+            _validate_lockable(node, item_ids)
+            continue
+        parent = node_by_id.get(node.parent_id)
+        if parent is None or parent.type not in {"location", "junction"}:
+            raise DMCompileError(f"Node '{node.id}' must live in a location or junction.")
+        if node.type == "container":
+            _validate_lockable(node, item_ids)
+        elif node.type == "readable":
+            if node.clue_id not in clue_ids:
+                raise DMCompileError(f"Readable '{node.id}' references unknown clue '{node.clue_id}'.")
+            if node.requires_item_id and node.requires_item_id not in item_ids:
+                raise DMCompileError(f"Readable '{node.id}' references unknown item '{node.requires_item_id}'.")
+        elif node.type == "fixture":
+            if node.requires_item_id not in item_ids:
+                raise DMCompileError(f"Fixture '{node.id}' references unknown item '{node.requires_item_id}'.")
+            if bool(node.reveals_item_id) == bool(node.reveals_readable_id):
+                raise DMCompileError(f"Fixture '{node.id}' must reveal exactly one item or readable.")
+            if node.reveals_item_id and node.reveals_item_id not in item_ids:
+                raise DMCompileError(f"Fixture '{node.id}' reveals unknown item '{node.reveals_item_id}'.")
+            if node.reveals_readable_id and node.reveals_readable_id not in node_by_id:
+                raise DMCompileError(f"Fixture '{node.id}' reveals unknown readable '{node.reveals_readable_id}'.")
+            if node.reveals_readable_id:
+                readable = node_by_id[node.reveals_readable_id]
+                if not isinstance(readable, ReadableNode):
+                    raise DMCompileError(f"Fixture '{node.id}' can only reveal readable nodes.")
+                if readable.parent_id != node.parent_id:
+                    raise DMCompileError(
+                        f"Fixture '{node.id}' must reveal readable '{readable.id}' in the same room."
+                    )
+        elif node.type == "npc":
+            if node.id == guardian_id:
+                guardian_seen = True
+                if node.requires_item_id or node.gives_item_id or node.gives_clue_id:
+                    raise DMCompileError("Guardian NPC cannot have trade fields.")
+            else:
+                if not node.requires_item_id:
+                    raise DMCompileError(f"NPC '{node.id}' requires requires_item_id in v2.")
+                if node.requires_item_id not in item_ids:
+                    raise DMCompileError(f"NPC '{node.id}' references unknown item '{node.requires_item_id}'.")
+                if bool(node.gives_item_id) == bool(node.gives_clue_id):
+                    raise DMCompileError(
+                        f"NPC '{node.id}' must define exactly one of gives_item_id or gives_clue_id."
+                    )
+                if node.gives_item_id and node.gives_item_id not in item_ids:
+                    raise DMCompileError(f"NPC '{node.id}' gives unknown item '{node.gives_item_id}'.")
+                if node.gives_clue_id and node.gives_clue_id not in clue_ids:
+                    raise DMCompileError(f"NPC '{node.id}' gives unknown clue '{node.gives_clue_id}'.")
+        else:  # pragma: no cover
+            raise AssertionError(f"Unhandled node type {node.type}")
+
+    if not guardian_seen:
+        raise DMCompileError(f"Guardian NPC '{guardian_id}' does not exist.")
+    for readable_id in hidden_readables:
+        readable = node_by_id[readable_id]
+        if not isinstance(readable, ReadableNode):
+            raise DMCompileError(f"Only readable nodes can be hidden, not '{readable_id}'.")
+
+
+def _validate_lockable(node: ContainerNode | DoorNode, item_ids: set[str]) -> None:
+    if node.open and node.locked:
+        raise DMCompileError(f"Lockable node '{node.id}' cannot be both open and locked.")
+    if node.locked and not node.lock_key_id:
+        raise DMCompileError(f"Lockable node '{node.id}' is locked but has no lock_key_id.")
+    if node.lock_key_id and node.lock_key_id not in item_ids:
+        raise DMCompileError(f"Lockable node '{node.id}' references unknown key '{node.lock_key_id}'.")
+
+
+def _validate_edges(world: WorldDefinition) -> None:
+    room_ids = {node.id for node in world.nodes if node.type in {"location", "junction"}}
+    node_by_id = {node.id: node for node in world.nodes}
+    item_ids = {item.id for item in world.items}
+    pair_groups: dict[frozenset[str], list[Any]] = defaultdict(list)
+    graph: dict[str, set[str]] = defaultdict(set)
+    direction_map: dict[tuple[str, str], str] = {}
+
+    for edge in world.edges:
+        if edge.from_node_id not in room_ids or edge.to_node_id not in room_ids:
+            raise DMCompileError(f"Edge '{edge.id}' must connect location or junction nodes only.")
+        if edge.from_node_id == edge.to_node_id:
+            raise DMCompileError(f"Edge '{edge.id}' cannot be self-referential.")
+        if edge.required_item_id and edge.required_item_id not in item_ids:
+            raise DMCompileError(f"Edge '{edge.id}' references unknown item '{edge.required_item_id}'.")
+        if edge.required_item_id and edge.required_item_id not in {
+            item.id for item in world.items if item.subtype == "key"
+        }:
+            raise DMCompileError(f"Edge '{edge.id}' must use a key item, not '{edge.required_item_id}'.")
+        if edge.type == "locked_passage":
+            if not edge.door_node_id:
+                raise DMCompileError(f"Locked edge '{edge.id}' requires door_node_id.")
+            if not edge.required_item_id:
+                raise DMCompileError(f"Locked edge '{edge.id}' requires required_item_id.")
+        elif edge.required_item_id is not None:
+            raise DMCompileError(f"Only locked_passage edges can reference required_item_id (edge '{edge.id}').")
+        if edge.door_node_id:
+            door = node_by_id.get(edge.door_node_id)
+            if not isinstance(door, DoorNode):
+                raise DMCompileError(f"Edge '{edge.id}' references unknown door '{edge.door_node_id}'.")
+            if edge.required_item_id and door.lock_key_id != edge.required_item_id:
+                raise DMCompileError(f"Edge '{edge.id}' and door '{door.id}' disagree on the key.")
+        key = (edge.from_node_id, edge.direction)
+        if key in direction_map:
+            raise DMCompileError(
+                f"Edges '{direction_map[key]}' and '{edge.id}' both leave '{edge.from_node_id}' via '{edge.direction}'."
+            )
+        direction_map[key] = edge.id
+        graph[edge.from_node_id].add(edge.to_node_id)
+        pair_groups[frozenset({edge.from_node_id, edge.to_node_id})].append(edge)
+
+    for pair, edges in pair_groups.items():
+        if len(edges) != 2:
+            raise DMCompileError(f"Edges between {', '.join(sorted(pair))} must be explicitly bidirectional.")
+        a, b = edges
+        if OPPOSITE_DIRECTION[a.direction] != b.direction:
+            raise DMCompileError(f"Edges '{a.id}' and '{b.id}' must use opposite directions.")
+        if a.type != b.type or a.required_item_id != b.required_item_id or a.door_node_id != b.door_node_id:
+            raise DMCompileError(f"Edge pair '{a.id}'/'{b.id}' must agree on type, key, and door.")
+
+    reachable = _reachable_rooms(graph, world.meta.start_node_id)
+    if reachable != room_ids:
+        raise DMCompileError(f"Some rooms are unreachable from the start node: {sorted(room_ids - reachable)}")
+
+
+def _validate_items(world: WorldDefinition) -> None:
+    node_by_id = {node.id: node for node in world.nodes}
+    produced = produced_item_ids(world)
+    recipe_outputs: set[str] = set()
+    recipe_inputs: set[frozenset[str]] = set()
+    for recipe in world.recipes:
+        inputs = frozenset(recipe.input_item_ids)
+        if len(inputs) != 2:
+            raise DMCompileError(f"Recipe '{recipe.id}' must have exactly two distinct input items.")
+        if inputs in recipe_inputs:
+            raise DMCompileError(f"Duplicate recipe inputs in '{recipe.id}'.")
+        recipe_inputs.add(inputs)
+        if recipe.output_item_id in recipe_outputs:
+            raise DMCompileError(f"Item '{recipe.output_item_id}' is produced by multiple recipes.")
+        recipe_outputs.add(recipe.output_item_id)
+
+    for item in world.items:
+        if item.id in produced and item.start_node_id is not None:
+            raise DMCompileError(f"Produced item '{item.id}' must not be initially placed.")
+        if item.id not in produced and item.start_node_id is None:
+            raise DMCompileError(f"Placed item '{item.id}' requires start_node_id.")
+        if item.start_node_id is None:
+            continue
+        holder = node_by_id.get(item.start_node_id)
+        if holder is None:
+            raise DMCompileError(f"Item '{item.id}' starts in unknown node '{item.start_node_id}'.")
+        if holder.type not in {"location", "junction", "container"}:
+            raise DMCompileError(f"Item '{item.id}' must start in a room or container.")
+        if item.subtype not in {"key", "puzzle"}:
+            raise DMCompileError(f"Item '{item.id}' uses unsupported subtype '{item.subtype}'.")
+
+
+def _validate_clues(world: WorldDefinition) -> None:
+    clue_sources: dict[str, list[str]] = defaultdict(list)
+    for node in world.nodes:
+        if isinstance(node, ReadableNode):
+            clue_sources[node.clue_id].append(node.id)
+        elif isinstance(node, NpcNode) and node.gives_clue_id:
+            clue_sources[node.gives_clue_id].append(node.id)
+
+    clue_ids = {clue.id for clue in world.clues}
+    if set(clue_sources) != clue_ids:
+        missing = sorted(clue_ids - set(clue_sources))
+        raise DMCompileError(f"Every clue needs exactly one source. Missing: {missing}")
+    for clue_id, source_ids in sorted(clue_sources.items()):
+        if len(source_ids) > 1:
+            raise DMCompileError(
+                f"Clue '{clue_id}' has multiple sources: {', '.join(sorted(source_ids))}."
+            )
+
+
+def _validate_visibility(world: WorldDefinition) -> None:
+    names: dict[str, str] = {}
+    for label in [node.label for node in world.nodes] + [item.label for item in world.items]:
+        safe = parser_safe_text(label)
+        if safe in names:
+            raise DMCompileError(
+                f"Visible labels '{label}' and '{names[safe]}' collapse to the same parser name '{safe}'."
+            )
+        names[safe] = label
+
+
+def _validate_answer_leaks(world: WorldDefinition) -> None:
+    answer = normalize_answer_text(world.meta.win_condition.answer_string)
+    forbidden = {f"the answer is {answer}", f"answer is {answer}", f"submit {answer}"}
+    text_fragments = [world.meta.title]
+    text_fragments.extend(clue.text for clue in world.clues)
+    for node in world.nodes:
+        text_fragments.extend([node.label, node.description])
+        if isinstance(node, ReadableNode):
+            text_fragments.append(node.text_content)
+    for text in text_fragments:
+        normalized = normalize_answer_text(text)
+        if any(phrase in normalized for phrase in forbidden):
+            raise DMCompileError("World leaks the final answer too directly. Clues must stay partial.")
+
+
+def _validate_guardian_path(world: WorldDefinition) -> None:
+    node_by_id = {node.id: node for node in world.nodes}
+    guardian = node_by_id[world.meta.win_condition.target_npc_id]
+    graph: dict[str, set[str]] = defaultdict(set)
+    for edge in world.edges:
+        if edge.type == "passage":
+            graph[edge.from_node_id].add(edge.to_node_id)
+    reachable = _reachable_rooms(graph, world.meta.start_node_id)
+    if guardian.parent_id not in reachable:
+        raise DMCompileError("Guardian room must be reachable from the start without item gates.")
+
+
+def _validate_clue_gates(world: WorldDefinition) -> None:
+    reachable = _reachable_zero_item_rooms(world)
+    hidden_readables = hidden_readable_ids(world)
+    for node in world.nodes:
+        if isinstance(node, ReadableNode):
+            if node.id in hidden_readables:
+                continue
+            if node.parent_id not in reachable:
+                continue
+            if node.requires_item_id:
+                continue
+            raise DMCompileError(
+                f"Readable '{node.id}' exposes clue '{node.clue_id}' without any item interaction."
+            )
+        if isinstance(node, NpcNode) and node.gives_clue_id and not node.requires_item_id:
+            raise DMCompileError(f"NPC '{node.id}' gives clue '{node.gives_clue_id}' without an item gate.")
+
+
+def _validate_item_usage(world: WorldDefinition) -> None:
+    quest_items: set[str] = set()
+    ordered = topological_linearize(world.quest_chain)
+    for action in (parse_quest_action(step.action) for step in ordered):
+        if isinstance(action, UnlockAction):
+            quest_items.add(action.key_id)
+        elif isinstance(action, (UseAction, GiveAction)):
+            quest_items.add(action.item_id)
+        elif isinstance(action, CombineAction):
+            quest_items.update({action.item_a_id, action.item_b_id})
+        elif isinstance(action, TakeAction):
+            quest_items.add(action.item_id)
+
+    mechanical_items = {
+        edge.required_item_id
+        for edge in world.edges
+        if edge.required_item_id
+    }
+    for node in world.nodes:
+        if node.type == "container" and node.lock_key_id:
+            mechanical_items.add(node.lock_key_id)
+        elif node.type == "door" and node.lock_key_id:
+            mechanical_items.add(node.lock_key_id)
+        elif node.type == "readable" and node.requires_item_id:
+            mechanical_items.add(node.requires_item_id)
+        elif node.type == "fixture":
+            mechanical_items.add(node.requires_item_id)
+            if node.reveals_item_id:
+                mechanical_items.add(node.reveals_item_id)
+        elif node.type == "npc":
+            if node.requires_item_id:
+                mechanical_items.add(node.requires_item_id)
+            if node.gives_item_id:
+                mechanical_items.add(node.gives_item_id)
+    for recipe in world.recipes:
+        mechanical_items.update(recipe.input_item_ids)
+        mechanical_items.add(recipe.output_item_id)
+
+    for item in world.items:
+        if item.id not in quest_items and item.id not in mechanical_items:
+            raise DMCompileError(f"Unused decorative items are not supported in v2: '{item.id}'.")
+
+
+def _validate_quest_shape(world: WorldDefinition) -> None:
+    ordered = topological_linearize(world.quest_chain)
+    parsed = [parse_quest_action(step.action) for step in ordered]
+    if not isinstance(parsed[-1], SubmitAction):
+        raise DMCompileError('The final quest step must be submit("answer").')
+    if len(parsed) < 2 or not isinstance(parsed[-2], TalkAction):
+        raise DMCompileError("The penultimate quest step must be talk(guardian).")
+    if parsed[-2].target_node_id != world.meta.win_condition.target_npc_id:
+        raise DMCompileError("The final talk step must target the guardian NPC.")
+    if normalize_answer_text(parsed[-1].answer_text) != normalize_answer_text(world.meta.win_condition.answer_string):
+        raise DMCompileError("The final submit step must match win_condition.answer_string.")
+    entity_names = {node.id: parser_safe_text(node.label) for node in world.nodes}
+    entity_names.update({item.id: parser_safe_text(item.label) for item in world.items})
+    simulate_walkthrough(world, parsed, entity_names)
+
+
+def _reachable_rooms(graph: dict[str, set[str]], start: str) -> set[str]:
+    seen = {start}
+    queue = deque([start])
+    while queue:
+        current = queue.popleft()
+        for nxt in graph.get(current, set()):
+            if nxt not in seen:
+                seen.add(nxt)
+                queue.append(nxt)
+    return seen
+
+
+def _reachable_zero_item_rooms(world: WorldDefinition) -> set[str]:
+    graph: dict[str, set[str]] = defaultdict(set)
+    for edge in world.edges:
+        if edge.type == "passage":
+            graph[edge.from_node_id].add(edge.to_node_id)
+    return _reachable_rooms(graph, world.meta.start_node_id)
diff --git a/agents/master/env.py b/agents/master/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..240b6010c86bacaff06c2dfdd7fcce63af1e221f
--- /dev/null
+++ b/agents/master/env.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+import math
+import uuid
+from pathlib import Path
+from typing import Any
+
+from .base import DMCompileError, DMInterfaceError, MAX_STEP_MULTIPLIER, TARGET_RATIO, TARGET_RATIO_SIGMA
+from .build import WorldCompiler
+from .interface import InterfaceAdapter, SimpleInterfaceAdapter
+from .play import EpisodeRunner, WalkthroughRunner
+from .schema import (
+    CompiledWorld,
+    DMAction,
+    DMFeedback,
+    DMObservation,
+    DMRewardBreakdown,
+    DMState,
+    Turn,
+    WorldDefinition,
+)
+from .session import EpisodeSession
+from .snapshots import LiveObserver
+from agents.shared.openenv_compat import Environment, StepResult, build_step_result
+
+
+class DMEnvironment(Environment[DMAction, DMObservation, DMState]):
+    def __init__(
+        self,
+        artifacts_root: Path | None = None,
+        target_ratio: float = TARGET_RATIO,
+        reward_sigma: float = TARGET_RATIO_SIGMA,
+        max_step_multiplier: int = MAX_STEP_MULTIPLIER,
+        interface_adapter: InterfaceAdapter = SimpleInterfaceAdapter(),
+        default_runner: EpisodeRunner | None = None,
+    ) -> None:
+        super().__init__()
+        if interface_adapter is None:
+            raise ValueError("interface_adapter must not be None.")
+        self.compiler = WorldCompiler(artifacts_root=artifacts_root)
+        self.target_ratio = target_ratio
+        self.reward_sigma = reward_sigma
+        self.max_step_multiplier = max_step_multiplier
+        self.interface_adapter = interface_adapter
+        self.default_runner = default_runner or WalkthroughRunner()
+        self.episode_count = 0
+        self.success_count = 0
+        self._state = DMState(
+            episode_id=uuid.uuid4().hex[:12],
+            target_ratio=target_ratio,
+        )
+        self.last_compiled_world: CompiledWorld | None = None
+
+    def reset(self, difficulty_hint: float | None = None, seed: int | None = None) -> DMObservation:
+        del seed
+        episode_target_ratio = self.target_ratio if difficulty_hint is None else difficulty_hint
+        self._state = DMState(
+            episode_id=uuid.uuid4().hex[:12],
+            compile_status="pending",
+            episode_status="running",
+            cumulative_success_rate=self._running_success_rate(),
+            target_ratio=episode_target_ratio,
+            difficulty_hint=difficulty_hint,
+        )
+        self.last_compiled_world = None
+        return self._apply_transform(
+            DMObservation(
+                done=False,
+                reward=None,
+                target_ratio_used=episode_target_ratio,
+            )
+        )
+
+    def step(  # type: ignore[override]
+        self,
+        action: DMAction | WorldDefinition | dict[str, Any],
+        runner: EpisodeRunner | None = None,
+        observer: LiveObserver | None = None,
+        timeout_s: float | None = None,
+    ) -> StepResult[DMObservation]:
+        del timeout_s
+        world_input = action.world_definition if isinstance(action, DMAction) else action
+        compiled: CompiledWorld | None = None
+        session: EpisodeSession | None = None
+        if observer is not None:
+            observer.on_run_start(self._state.episode_id, world_input)
+        self.last_compiled_world = None
+        self._state.current_world = None
+        try:
+            compiled = self.compiler.compile(world_input, episode_id=self._state.episode_id)
+            self.last_compiled_world = compiled
+            self._state.current_world = compiled.world
+            self._state.compile_status = "valid"
+            max_steps = max(1, len(compiled.solver_policy) * self.max_step_multiplier)
+
+            def on_turn(current_session: EpisodeSession, turn: Turn) -> None:
+                self._state.step_count = current_session.steps_taken
+                if observer is not None:
+                    observer.on_turn(current_session, turn)
+
+            session = EpisodeSession(
+                compiled,
+                interface_adapter=self.interface_adapter,
+                turn_listener=on_turn,
+            )
+            if observer is not None:
+                observer.on_compile_success(compiled, session)
+            (runner or self.default_runner).run(session, max_steps=max_steps)
+            player_won = bool(session.player_won)
+            min_steps = len(compiled.solver_policy)
+            reward_breakdown = self._reward_breakdown(player_won, session.steps_taken, min_steps)
+            reward = reward_breakdown.reward
+            self.episode_count += 1
+            self.success_count += int(player_won)
+            self._state.step_count = session.steps_taken
+            self._state.episode_status = "complete" if player_won else "failed"
+            self._state.cumulative_success_rate = self._running_success_rate()
+            observation = self._apply_transform(
+                DMObservation(
+                    episode_transcript=session.transcript,
+                    player_won=player_won,
+                    steps_taken=session.steps_taken,
+                    min_steps=min_steps,
+                    ratio=(session.steps_taken / min_steps) if min_steps else None,
+                    reward=reward,
+                    done=True,
+                    feedback=self._build_feedback(compiled, session),
+                    reward_breakdown=reward_breakdown,
+                    target_ratio_used=self._state.target_ratio,
+                )
+            )
+            if observer is not None:
+                observer.on_complete(compiled, session, observation)
+            return build_step_result(observation)
+        except (DMCompileError, DMInterfaceError, ValueError) as exc:
+            self.last_compiled_world = None
+            self._state.current_world = None
+            self._state.compile_status = "invalid"
+            self._state.episode_status = "failed"
+            if observer is not None:
+                observer.on_error(
+                    episode_id=self._state.episode_id,
+                    error=str(exc),
+                    world_input=world_input,
+                    compiled=compiled,
+                    session=session,
+                )
+            observation = self._apply_transform(
+                DMObservation(
+                    player_won=False,
+                    compile_error=str(exc),
+                    reward=0.0,
+                    done=True,
+                    reward_breakdown=DMRewardBreakdown(
+                        reward_mode="compile_failure_penalty",
+                        player_won=False,
+                        target_ratio=self._state.target_ratio,
+                        quality_score=0.0,
+                        reward=0.0,
+                    ),
+                    target_ratio_used=self._state.target_ratio,
+                )
+            )
+            return build_step_result(observation)
+        finally:
+            if session is not None:
+                session.close()
+
+    def compile_world(
+        self,
+        world_input: WorldDefinition | dict[str, Any],
+        *,
+        episode_id: str | None = None,
+    ) -> CompiledWorld:
+        return self.compiler.compile(world_input, episode_id=episode_id)
+
+    def play(
+        self,
+        world_input: WorldDefinition | dict[str, Any],
+        runner: EpisodeRunner | None = None,
+        observer: LiveObserver | None = None,
+    ) -> StepResult[DMObservation]:
+        self.reset()
+        return self.step(world_input, runner=runner, observer=observer)
+
+    @property
+    def state(self) -> DMState:
+        return self._state
+
+    def _reward_breakdown(
+        self,
+        player_won: bool,
+        steps_taken: int | None,
+        min_steps: int | None,
+    ) -> DMRewardBreakdown:
+        raw_ratio: float | None = None
+        clamped_ratio: float | None = None
+        target_ratio_delta: float | None = None
+        efficiency_score: float | None = None
+        quality_score = 0.0
+        if steps_taken is not None and min_steps is not None and min_steps > 0:
+            raw_ratio = steps_taken / min_steps
+            clamped_ratio = max(raw_ratio, 1.0)
+            target_ratio_delta = abs(clamped_ratio - self._state.target_ratio)
+            if player_won and steps_taken > 0:
+                efficiency_score = min(1.0, min_steps / steps_taken)
+                sigma_sq = max(self.reward_sigma, 1e-6) ** 2
+                quality_score = math.exp(-((clamped_ratio - self._state.target_ratio) ** 2) / (2.0 * sigma_sq))
+        reward = quality_score if player_won else 0.0
+        return DMRewardBreakdown(
+            reward_mode="gaussian_target_ratio",
+            player_won=player_won,
+            raw_ratio=raw_ratio,
+            clamped_ratio=clamped_ratio,
+            target_ratio=self._state.target_ratio,
+            target_ratio_delta=target_ratio_delta,
+            efficiency_score=efficiency_score,
+            quality_score=quality_score,
+            reward=reward,
+        )
+
+    def _build_feedback(self, compiled: CompiledWorld, session: EpisodeSession) -> DMFeedback:
+        room_ids = [node.id for node in compiled.world.nodes if node.type in {"location", "junction"}]
+        clue_ids = [clue.id for clue in compiled.world.clues]
+        unique_rooms = [node_id for node_id in session.visited_nodes if node_id in room_ids]
+        return DMFeedback(
+            unreachable_nodes=sorted(set(room_ids) - set(unique_rooms)),
+            unused_items=sorted({item.id for item in compiled.world.items} - session.used_items),
+            clues_missed=sorted(set(clue_ids) - session.discovered_clues),
+            mean_steps_per_room=session.steps_taken / max(1, len(set(unique_rooms))),
+            invalid_command_count=session.invalid_command_count,
+            wrong_submit_count=session.wrong_submit_count,
+        )
+
+    def _running_success_rate(self) -> float:
+        return 0.0 if self.episode_count == 0 else self.success_count / self.episode_count
diff --git a/agents/master/graph.py b/agents/master/graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..1609cdf2a4a91e37e326a01c0459b495a8123838
--- /dev/null
+++ b/agents/master/graph.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+from collections import defaultdict
+
+from .schema import DoorNode, Edge, NpcTrade, ReadableNode, UseEffect, WorldDefinition
+
+
+def readable_clue_mapping(world: WorldDefinition) -> dict[str, str]:
+    return {node.id: node.clue_id for node in world.nodes if isinstance(node, ReadableNode)}
+
+
+def clue_source_mapping(world: WorldDefinition) -> dict[str, str]:
+    mapping = {node.clue_id: node.id for node in world.nodes if isinstance(node, ReadableNode)}
+    for node in world.nodes:
+        if node.type == "npc" and node.gives_clue_id:
+            mapping[node.gives_clue_id] = node.id
+    return mapping
+
+
+def npc_trade_mapping(world: WorldDefinition) -> dict[str, NpcTrade]:
+    trades: dict[str, NpcTrade] = {}
+    for node in world.nodes:
+        if node.type != "npc" or node.id == world.meta.win_condition.target_npc_id:
+            continue
+        trades[node.id] = NpcTrade(
+            required_item_id=node.requires_item_id or "",
+            gives_item_id=node.gives_item_id,
+            gives_clue_id=node.gives_clue_id,
+        )
+    return trades
+
+
+def use_effect_mapping(world: WorldDefinition) -> dict[str, UseEffect]:
+    effects: dict[str, UseEffect] = {}
+    for node in world.nodes:
+        if node.type == "readable" and node.requires_item_id:
+            effects[node.id] = UseEffect(
+                required_item_id=node.requires_item_id,
+                clue_id=node.clue_id,
+                consumes_item=node.consumes_item,
+            )
+        elif node.type == "fixture":
+            effects[node.id] = UseEffect(
+                required_item_id=node.requires_item_id,
+                reveals_item_id=node.reveals_item_id,
+                reveals_readable_id=node.reveals_readable_id,
+                consumes_item=node.consumes_item,
+            )
+    return effects
+
+
+def recipe_mapping(world: WorldDefinition) -> dict[frozenset[str], str]:
+    return {frozenset(recipe.input_item_ids): recipe.output_item_id for recipe in world.recipes}
+
+
+def produced_item_ids(world: WorldDefinition) -> set[str]:
+    produced = {recipe.output_item_id for recipe in world.recipes}
+    for node in world.nodes:
+        if node.type == "npc" and node.gives_item_id:
+            produced.add(node.gives_item_id)
+        if node.type == "fixture" and node.reveals_item_id:
+            produced.add(node.reveals_item_id)
+    return produced
+
+
+def hidden_readable_ids(world: WorldDefinition) -> set[str]:
+    return {node.reveals_readable_id for node in world.nodes if node.type == "fixture" and node.reveals_readable_id}
+
+
+def door_room_mapping(world: WorldDefinition) -> dict[str, frozenset[str]]:
+    mapping: dict[str, set[str]] = defaultdict(set)
+    for edge in world.edges:
+        if edge.door_node_id:
+            mapping[edge.door_node_id].add(edge.from_node_id)
+            mapping[edge.door_node_id].add(edge.to_node_id)
+    return {door_id: frozenset(rooms) for door_id, rooms in mapping.items()}
+
+
+def edge_for_door(world: WorldDefinition, door_id: str) -> Edge | None:
+    for edge in world.edges:
+        if edge.door_node_id == door_id:
+            return edge
+    return None
+
+
+def door_nodes(world: WorldDefinition) -> dict[str, DoorNode]:
+    return {node.id: node for node in world.nodes if isinstance(node, DoorNode)}
diff --git a/agents/master/interface.py b/agents/master/interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..f309ddf437b461a9887a064ef84a26f5e05ffb30
--- /dev/null
+++ b/agents/master/interface.py
@@ -0,0 +1,831 @@
+from __future__ import annotations
+
+import json
+import os
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Literal, Protocol
+
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+from textworld.core import GameState
+
+from agents.hero.cli import parse_cli_command
+
+from .base import DMInterfaceError, SUPPORTED_DIRECTIONS
+
+if TYPE_CHECKING:
+    from .session import EpisodeSession
+
+
+DEFAULT_GEMINI_MODEL = "gemini-2.5-flash-lite"
+_TEXTWORLD_PROMPT_LINE_RE = re.compile(r"^\s*>\s.*-\=\s.*=\-(?:\d+/\d+)?\s*$")
+_TEXTWORLD_BANNER_CHAR_RE = re.compile(r"[\\|$_/]")
+_TEXTWORLD_ROOM_HEADER_RE = re.compile(r"^\s*-\=\s*(?P<label>.+?)\s*\=-\s*$")
+_TEXTWORLD_META_LINE_RE = re.compile(r"^\s*(?:score:|moves:|available commands:|type 'help')", re.IGNORECASE)
+
+
+class InterfaceAdapter(Protocol):
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        ...
+
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        ...
+
+
+class SimpleInterfaceAdapter:
+    """A deterministic parser for explicit non-LLM play."""
+
+    _ARTICLE_RE = re.compile(r"\b(the|a|an)\b", re.IGNORECASE)
+
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        command = raw_command.strip()
+        lowered = command.lower()
+        if lowered in SUPPORTED_DIRECTIONS:
+            return "go " + lowered
+        if lowered in {"look", "look around"}:
+            return "look"
+        if lowered in {"inventory", "check inventory", "show inventory"}:
+            return "inventory"
+        if lowered in {"wait", "pass"}:
+            return "wait"
+        if lowered.startswith("answer "):
+            return "submit " + command[7:].strip()
+        if lowered.startswith("say "):
+            return "submit " + command[4:].strip().strip("\"'")
+        if lowered.startswith("talk to "):
+            return "talk " + command[8:].strip()
+        if lowered.startswith("speak to "):
+            return "talk " + command[9:].strip()
+        if lowered.startswith("use ") and " on " in lowered:
+            item_text, target_text = re.split(r"\s+on\s+", command[4:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "use " + self._normalize_object_text(item_text) + " on " + self._normalize_object_text(target_text)
+        if lowered.startswith("give ") and " to " in lowered:
+            item_text, target_text = re.split(r"\s+to\s+", command[5:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "give " + self._normalize_object_text(item_text) + " to " + self._normalize_object_text(target_text)
+        if lowered.startswith("combine ") and " with " in lowered:
+            item_a, item_b = re.split(r"\s+with\s+", command[8:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "combine " + self._normalize_object_text(item_a) + " with " + self._normalize_object_text(item_b)
+        if lowered.startswith("combine ") and " and " in lowered:
+            item_a, item_b = re.split(r"\s+and\s+", command[8:].strip(), maxsplit=1, flags=re.IGNORECASE)
+            return "combine " + self._normalize_object_text(item_a) + " with " + self._normalize_object_text(item_b)
+
+        parts = command.split(maxsplit=1)
+        if len(parts) != 2:
+            return lowered
+
+        verb = parts[0].lower()
+        if verb not in {"read", "talk", "open", "take", "unlock", "examine"}:
+            return lowered
+
+        normalized = self._normalize_object_text(parts[1])
+        if verb == "examine":
+            if session.node_id_for_command_name(normalized, node_types={"readable"}):
+                return "read " + normalized
+            if session.node_id_for_command_name(normalized, node_types={"npc"}):
+                return "talk " + normalized
+
+        return verb + " " + normalized
+
+    def _normalize_object_text(self, text: str) -> str:
+        object_text = self._ARTICLE_RE.sub(" ", text)
+        return re.sub(r"\s+", " ", object_text).strip().lower()
+
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        del state
+        return enrich_feedback_text(sanitize_feedback_text(feedback), session)
+
+
+class StrictCliInterfaceAdapter:
+    """A deterministic adapter for parser-style CLI commands."""
+
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        del session
+        parsed = parse_cli_command(raw_command)
+        if not parsed.valid or parsed.normalized_command is None:
+            raise DMInterfaceError(parsed.error or "Command does not match the strict CLI grammar.")
+        return parsed.normalized_command
+
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        del state
+        return enrich_feedback_text(sanitize_feedback_text(feedback), session)
+
+
+@dataclass(frozen=True)
+class _TranslationGlossary:
+    canonical_to_alias: dict[str, str]
+    alias_to_canonical: dict[str, str]
+
+
+class GeminiInterfaceAdapter:
+    _ARTICLE_RE = re.compile(r"\b(the|a|an)\b", re.IGNORECASE)
+    _PARSER_SAFE_NAME_RE = re.compile(r"^[a-z0-9]+(?: [a-z0-9]+)*$")
+    _TRAILING_POLITENESS_RE = re.compile(r"(?:\s+(?:please|for me|thanks|thank you))+[.!?]*$", re.IGNORECASE)
+    _COMMAND_SYSTEM = (
+        "Translate the player's text into exactly one canonical dungeon command. "
+        "Return only the command and nothing else."
+    )
+    _OBSERVATION_SYSTEM = (
+        "Rewrite dungeon feedback in at most two short sentences. "
+        "Preserve facts exactly. Do not infer, solve, explain, or add implications."
+    )
+    _TRANSLATED_COMMAND_SYSTEM = (
+        "The player is using a corporate app metaphor layered over a fantasy dungeon. "
+        "Translate the player's text back into exactly one canonical dungeon command from the underlying fantasy world. "
+        "Return only the canonical command and nothing else."
+    )
+    _TRANSLATED_OBSERVATION_SYSTEM = (
+        "Rewrite the dungeon observation as a corporate app interface while preserving facts one-to-one. "
+        "Use the provided aliases exactly, keep directions unchanged, and do not add hints, solutions, or new mechanics."
+    )
+    _TRANSLATION_GLOSSARY_SYSTEM = (
+        "Create a one-to-one alias glossary that maps fantasy dungeon terms into a corporate app metaphor. "
+        "Return JSON only."
+    )
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = DEFAULT_GEMINI_MODEL,
+        narrate_observations: bool = False,
+        translation_mode: Literal["none", "corporate_app"] = "none",
+        max_admissible_commands: int = 18,
+    ) -> None:
+        if translation_mode not in {"none", "corporate_app"}:
+            raise ValueError(f"Unsupported Gemini translation mode: {translation_mode}")
+        self.model = model
+        self.narrate_observations = narrate_observations
+        self.translation_mode = translation_mode
+        self.max_admissible_commands = max_admissible_commands
+        self._client = self._create_client(api_key)
+        self._translation_glossary_cache: dict[str, _TranslationGlossary] = {}
+        self._translation_observation_cache: dict[tuple[str, str], str] = {}
+
+    def translate_command(self, raw_command: str, session: EpisodeSession) -> str:
+        lowered = raw_command.strip().lower()
+        if not lowered:
+            raise DMInterfaceError("Command must not be empty.")
+        admissible = set(session.available_commands())
+        direct = self._normalize_generated_command(self._preprocess_player_text(lowered))
+        if resolved := self._resolve_candidate_command(direct, session, admissible):
+            return resolved
+        movement = self._extract_direction_command(lowered, admissible)
+        if movement is not None:
+            return movement
+
+        prompt = self._command_prompt(raw_command, session, admissible)
+        generated = self._generate_command(
+            system_instruction=self._TRANSLATED_COMMAND_SYSTEM if self._translation_enabled() else self._COMMAND_SYSTEM,
+            prompt=prompt,
+            max_output_tokens=48,
+            temperature=0.1,
+        )
+        if resolved := self._resolve_candidate_command(generated, session, admissible):
+            return resolved
+        raise DMInterfaceError(f"Gemini returned an invalid command: {generated or '<empty>'}")
+
+    def render_observation(self, feedback: str, state: GameState | None, session: EpisodeSession) -> str:
+        sanitized = sanitize_feedback_text(feedback)
+        enriched = enrich_feedback_text(sanitized, session)
+        if not sanitized:
+            return enriched
+        if self._translation_enabled():
+            cache_key = (self._translation_cache_key(session), enriched)
+            cached = self._translation_observation_cache.get(cache_key)
+            if cached is not None:
+                return cached
+            prompt = self._observation_prompt(enriched, session)
+            generated = self._generate_observation(
+                system_instruction=self._TRANSLATED_OBSERVATION_SYSTEM,
+                prompt=prompt,
+                max_output_tokens=220 if not self.narrate_observations else 120,
+                temperature=0.2,
+            )
+            if not generated:
+                raise DMInterfaceError("Gemini returned an empty translated observation.")
+            self._translation_observation_cache[cache_key] = generated
+            return generated
+        if not self.narrate_observations:
+            return enriched
+        if self._should_preserve_feedback(sanitized, state):
+            return enriched
+
+        prompt = self._observation_prompt(sanitized, session)
+        generated = self._generate_observation(
+            system_instruction=self._OBSERVATION_SYSTEM,
+            prompt=prompt,
+            max_output_tokens=80,
+            temperature=0.2,
+        )
+        if not generated:
+            raise DMInterfaceError("Gemini returned an empty observation.")
+        return enrich_feedback_text(generated, session)
+
+    def _create_client(self, api_key: str | None) -> genai.Client:
+        load_dotenv(self._repo_root() / ".env", override=False)
+        key = api_key or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        if not key:
+            raise DMInterfaceError("Missing GEMINI_API_KEY or GOOGLE_API_KEY.")
+        return genai.Client(api_key=key)
+
+    @staticmethod
+    def _repo_root() -> Path:
+        return Path(__file__).resolve().parents[2]
+
+    def _command_prompt(self, raw_command: str, session: EpisodeSession, admissible: set[str]) -> str:
+        commands = sorted(admissible)[: self.max_admissible_commands]
+        interactables = self._interactables(session)
+        current_room = session.state.location or session.current_room_id
+        lines: list[str] = []
+        if self._translation_enabled():
+            glossary = self._translation_glossary(session)
+            lines.extend(
+                [
+                    "The player only sees the translated corporate-app interface.",
+                    "Map their request back to the underlying dungeon command.",
+                    "Treat rooms as apps/workspaces, NPCs as coworkers or reviewers, and items as files, tools, credentials, or tickets.",
+                    "Translated aliases (alias => canonical):",
+                    *[f"- {alias} => {canonical}" for alias, canonical in sorted(glossary.alias_to_canonical.items())],
+                ]
+            )
+        lines.extend(
+            [
+                "Use an exact visible command whenever possible.",
+                "Allowed verbs: go, open, unlock, take, read, use, combine, give, talk, submit, look, inventory, wait",
+                f"Room: {current_room}",
+                "Visible commands:",
+                *[f"- {command}" for command in commands],
+            ]
+        )
+        if interactables:
+            lines.append(f"Objects here: {', '.join(interactables)}")
+        lines.append("If the player is answering the guardian, use: submit <answer>")
+        lines.append("If no valid mapping exists, return INVALID")
+        lines.append(f"Player text: {raw_command.strip()}")
+        return "\n".join(lines)
+
+    def _observation_prompt(self, feedback: str, session: EpisodeSession) -> str:
+        current_room = session.state.location or session.current_room_id
+        if self._translation_enabled():
+            glossary = self._translation_glossary(session)
+            lines = [
+                f"Canonical room: {current_room}",
+                "Use this exact alias glossary (canonical => alias):",
+                *[f"- {canonical} => {alias}" for canonical, alias in sorted(glossary.canonical_to_alias.items())],
+                "Preserve the same facts, object counts, and navigation affordances.",
+                "Keep any 'Visible here:' and 'Exits:' sections, but rewrite the entity names with the aliases above.",
+            ]
+            if self.narrate_observations:
+                lines.append("Keep the response compact.")
+            lines.append("Canonical observation:")
+            lines.append(feedback)
+            return "\n".join(lines)
+        return (
+            f"Room: {current_room}\n"
+            "Describe only what the game text explicitly says.\n"
+            "Never reveal what a clue means or what answer it implies.\n"
+            f"Feedback: {feedback}"
+        )
+
+    def _translation_glossary_prompt(self, session: EpisodeSession) -> str:
+        lines = [
+            "Return JSON with shape: {\"aliases\": [{\"source\": \"...\", \"alias\": \"...\"}]}",
+            "Rules:",
+            "- Every alias must be unique.",
+            "- Use lowercase letters, numbers, and spaces only.",
+            "- Do not use articles like a, an, or the.",
+            "- Keep aliases short and parser-safe.",
+            "- Rooms should feel like apps, dashboards, workspaces, portals, or queues.",
+            "- NPCs should feel like coworkers, reviewers, owners, admins, or operators.",
+            "- Items should feel like files, tickets, tokens, credentials, tools, or documents.",
+            "- Preserve identity one-to-one. Do not merge multiple source terms into one alias.",
+            "Terms:",
+        ]
+        for kind, source in self._translation_terms(session):
+            lines.append(f"- {kind}: {source}")
+        return "\n".join(lines)
+
+    def _interactables(self, session: EpisodeSession) -> list[str]:
+        names: list[str] = []
+        for node in session.compiled.world.nodes:
+            if getattr(node, "parent_id", None) != session.current_room_id:
+                continue
+            safe_name = session.compiled.node_command_names.get(node.id)
+            if safe_name is not None and node.type in {"container", "readable", "npc", "door", "fixture"}:
+                names.append(safe_name)
+        return sorted(names)[:8]
+
+    def _generate_response(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        response = self._client.models.generate_content(
+            model=self.model,
+            contents=f"{system_instruction}\n\n{prompt}",
+            config=types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                candidate_count=1,
+            ),
+        )
+        return getattr(response, "text", "") or ""
+
+    def _generate_command(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        return self._sanitize_command_response(
+            self._generate_response(
+                system_instruction=system_instruction,
+                prompt=prompt,
+                max_output_tokens=max_output_tokens,
+                temperature=temperature,
+            )
+        )
+
+    def _generate_observation(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        return self._sanitize_multiline_response(
+            self._generate_response(
+                system_instruction=system_instruction,
+                prompt=prompt,
+                max_output_tokens=max_output_tokens,
+                temperature=temperature,
+            )
+        )
+
+    def _generate_json(
+        self,
+        *,
+        system_instruction: str,
+        prompt: str,
+        max_output_tokens: int,
+        temperature: float,
+    ) -> str:
+        return self._sanitize_json_response(
+            self._generate_response(
+                system_instruction=system_instruction,
+                prompt=prompt,
+                max_output_tokens=max_output_tokens,
+                temperature=temperature,
+            )
+        )
+
+    def _resolve_candidate_command(
+        self,
+        candidate: str,
+        session: EpisodeSession,
+        admissible: set[str],
+    ) -> str | None:
+        for option in self._candidate_variants(candidate, session):
+            if not option:
+                continue
+            if option == "invalid":
+                continue
+            if resolved := self._resolve_admissible_command(option, admissible):
+                return resolved
+            if self._allow_unlisted_canonical(option):
+                return option
+        return None
+
+    def _candidate_variants(self, candidate: str, session: EpisodeSession) -> list[str]:
+        variants = [self._normalize_generated_command(candidate)]
+        if self._translation_enabled():
+            canonicalized = self._canonicalize_translated_command(variants[0], session)
+            if canonicalized not in variants:
+                variants.insert(0, canonicalized)
+        return variants
+
+    def _canonicalize_translated_command(self, command: str, session: EpisodeSession) -> str:
+        glossary = self._translation_glossary(session)
+        rewritten = command
+        for alias, canonical in sorted(glossary.alias_to_canonical.items(), key=lambda item: (-len(item[0]), item[0])):
+            rewritten = re.sub(
+                rf"(?<![a-z0-9]){re.escape(alias)}(?![a-z0-9])",
+                canonical,
+                rewritten,
+            )
+        return self._normalize_generated_command(rewritten)
+
+    def _translation_glossary(self, session: EpisodeSession) -> _TranslationGlossary:
+        cache_key = self._translation_cache_key(session)
+        cached = self._translation_glossary_cache.get(cache_key)
+        if cached is not None:
+            return cached
+        terms = self._translation_terms(session)
+        generated = self._generate_json(
+            system_instruction=self._TRANSLATION_GLOSSARY_SYSTEM,
+            prompt=self._translation_glossary_prompt(session),
+            max_output_tokens=700,
+            temperature=0.2,
+        )
+        glossary = self._parse_translation_glossary(generated, terms)
+        self._translation_glossary_cache[cache_key] = glossary
+        return glossary
+
+    def _parse_translation_glossary(
+        self,
+        payload: str,
+        terms: list[tuple[str, str]],
+    ) -> _TranslationGlossary:
+        try:
+            data = json.loads(payload)
+        except json.JSONDecodeError as exc:
+            raise DMInterfaceError("Gemini returned invalid translation glossary JSON.") from exc
+
+        raw_aliases: dict[str, str] = {}
+        if isinstance(data, dict):
+            aliases = data.get("aliases", data)
+            if isinstance(aliases, dict):
+                raw_aliases = {
+                    self._normalize_object_text(str(source)): str(alias)
+                    for source, alias in aliases.items()
+                    if isinstance(source, str)
+                }
+            elif isinstance(aliases, list):
+                for entry in aliases:
+                    if not isinstance(entry, dict):
+                        continue
+                    source = entry.get("source")
+                    alias = entry.get("alias")
+                    if isinstance(source, str) and isinstance(alias, str):
+                        raw_aliases[self._normalize_object_text(source)] = alias
+        if not raw_aliases:
+            raise DMInterfaceError("Gemini returned an empty translation glossary.")
+
+        canonical_to_alias: dict[str, str] = {}
+        alias_to_canonical: dict[str, str] = {}
+        used_aliases: set[str] = set()
+        for _kind, source in terms:
+            requested_alias = self._normalize_parser_safe_alias(raw_aliases.get(source, ""))
+            alias = self._dedupe_alias(source, requested_alias, used_aliases)
+            canonical_to_alias[source] = alias
+            alias_to_canonical[alias] = source
+            used_aliases.add(alias)
+        return _TranslationGlossary(
+            canonical_to_alias=canonical_to_alias,
+            alias_to_canonical=alias_to_canonical,
+        )
+
+    def _translation_terms(self, session: EpisodeSession) -> list[tuple[str, str]]:
+        terms: list[tuple[str, str]] = []
+        seen: set[str] = set()
+        for node in session.compiled.world.nodes:
+            source = session.compiled.node_command_names.get(node.id)
+            if source is None or source in seen:
+                continue
+            kind = "room" if node.type in {"location", "junction"} else node.type
+            seen.add(source)
+            terms.append((kind, source))
+        for item in session.compiled.world.items:
+            source = session.compiled.item_command_names.get(item.id)
+            if source is None or source in seen:
+                continue
+            seen.add(source)
+            terms.append(("item", source))
+        answer = session.compiled.correct_answer_normalized
+        if answer and answer not in seen:
+            terms.append(("answer", answer))
+        return sorted(terms, key=lambda item: (item[0], item[1]))
+
+    def _dedupe_alias(self, source: str, alias: str, used_aliases: set[str]) -> str:
+        for candidate in (alias, source):
+            if candidate and candidate not in used_aliases:
+                return candidate
+        suffix = 2
+        while True:
+            candidate = f"{source} {suffix}"
+            if candidate not in used_aliases and self._PARSER_SAFE_NAME_RE.fullmatch(candidate):
+                return candidate
+            suffix += 1
+
+    def _normalize_parser_safe_alias(self, value: str) -> str:
+        alias = self._normalize_object_text(value)
+        if not alias or not self._PARSER_SAFE_NAME_RE.fullmatch(alias):
+            return ""
+        return alias
+
+    def _translation_cache_key(self, session: EpisodeSession) -> str:
+        episode_id = getattr(session.compiled, "episode_id", "") or "session"
+        return f"{episode_id}:{session.compiled.game_file}"
+
+    def _translation_enabled(self) -> bool:
+        return self.translation_mode != "none"
+
+    @classmethod
+    def _preprocess_player_text(cls, text: str) -> str:
+        normalized = re.sub(r"\s+", " ", text.strip().lower())
+        replacements = (
+            ("pick up ", "take "),
+            ("grab ", "take "),
+            ("using ", "with "),
+            ("talk to ", "talk "),
+            ("speak to ", "talk "),
+        )
+        for source, target in replacements:
+            normalized = normalized.replace(source, target)
+
+        prefixes = (
+            "please ",
+            "please, ",
+            "can you ",
+            "could you ",
+            "would you ",
+            "will you ",
+            "go ahead and ",
+            "i want to ",
+            "i'd like to ",
+            "try to ",
+        )
+        stripped = True
+        while stripped:
+            stripped = False
+            for prefix in prefixes:
+                if normalized.startswith(prefix):
+                    normalized = normalized[len(prefix) :].strip()
+                    stripped = True
+
+        normalized = cls._TRAILING_POLITENESS_RE.sub("", normalized).strip()
+        return normalized
+
+    @staticmethod
+    def _extract_direction_command(text: str, admissible: set[str]) -> str | None:
+        directions = [direction for direction in SUPPORTED_DIRECTIONS if re.search(rf"\b{direction}\b", text)]
+        if len(directions) != 1:
+            return None
+        if not re.search(r"\b(go|head|move|walk|run|travel|enter|step)\b", text):
+            return None
+        candidate = f"go {directions[0]}"
+        return candidate if candidate in admissible else None
+
+    @staticmethod
+    def _allow_unlisted_canonical(command: str) -> bool:
+        return GeminiInterfaceAdapter._is_canonical_command(command) and not GeminiInterfaceAdapter._contains_conversational_fluff(command)
+
+    @staticmethod
+    def _contains_conversational_fluff(command: str) -> bool:
+        return bool(
+            re.search(
+                r"\b(for me|please|thanks|thank you|could you|can you|would you|will you)\b",
+                command,
+            )
+        )
+
+    @staticmethod
+    def _normalize_generated_command(text: str) -> str:
+        normalized = re.sub(r"\s+", " ", text.strip().lower())
+        normalized = normalized.removeprefix("command: ").removeprefix("response: ").strip()
+        normalized = normalized.rstrip(".!?")
+        if normalized in SUPPORTED_DIRECTIONS:
+            return "go " + normalized
+        if normalized.startswith("talk to "):
+            return "talk " + GeminiInterfaceAdapter._normalize_object_text(normalized[8:].strip())
+        if normalized.startswith("speak to "):
+            return "talk " + GeminiInterfaceAdapter._normalize_object_text(normalized[9:].strip())
+        if normalized.startswith("answer "):
+            return "submit " + normalized[7:].strip()
+        if normalized.startswith("say "):
+            return "submit " + normalized[4:].strip().strip("\"'")
+        if normalized.startswith("combine ") and " and " in normalized:
+            item_a, item_b = normalized[8:].split(" and ", 1)
+            return "combine " + GeminiInterfaceAdapter._normalize_object_text(item_a) + " with " + GeminiInterfaceAdapter._normalize_object_text(item_b)
+        if normalized.startswith("unlock ") and " with " in normalized:
+            target, key = normalized[7:].split(" with ", 1)
+            return "unlock " + GeminiInterfaceAdapter._normalize_object_text(target) + " with " + GeminiInterfaceAdapter._normalize_object_text(key)
+        if normalized.startswith("use ") and " on " in normalized:
+            item, target = normalized[4:].split(" on ", 1)
+            return "use " + GeminiInterfaceAdapter._normalize_object_text(item) + " on " + GeminiInterfaceAdapter._normalize_object_text(target)
+        if normalized.startswith("give ") and " to " in normalized:
+            item, target = normalized[5:].split(" to ", 1)
+            return "give " + GeminiInterfaceAdapter._normalize_object_text(item) + " to " + GeminiInterfaceAdapter._normalize_object_text(target)
+        if normalized.startswith("combine ") and " with " in normalized:
+            item_a, item_b = normalized[8:].split(" with ", 1)
+            return "combine " + GeminiInterfaceAdapter._normalize_object_text(item_a) + " with " + GeminiInterfaceAdapter._normalize_object_text(item_b)
+        if normalized.startswith(("open ", "read ", "talk ", "take ", "examine ")):
+            verb, obj = normalized.split(" ", 1)
+            return verb + " " + GeminiInterfaceAdapter._normalize_object_text(obj)
+        return normalized
+
+    @staticmethod
+    def _normalize_object_text(text: str) -> str:
+        object_text = GeminiInterfaceAdapter._ARTICLE_RE.sub(" ", text)
+        return re.sub(r"\s+", " ", object_text).strip().lower()
+
+    @staticmethod
+    def _is_canonical_command(command: str) -> bool:
+        if command in {"look", "inventory", "wait"}:
+            return True
+        if command.startswith("go "):
+            return command[3:] in SUPPORTED_DIRECTIONS
+        if command.startswith(("open ", "read ", "talk ", "submit ")):
+            return bool(command.split(maxsplit=1)[1].strip())
+        if command.startswith("use "):
+            return " on " in command and all(part.strip() for part in command[4:].split(" on ", 1))
+        if command.startswith("combine "):
+            return " with " in command and all(part.strip() for part in command[8:].split(" with ", 1))
+        if command.startswith("give "):
+            return " to " in command and all(part.strip() for part in command[5:].split(" to ", 1))
+        if command.startswith("take "):
+            return bool(command.split(maxsplit=1)[1].strip())
+        if command.startswith("unlock "):
+            if " with " not in command:
+                return False
+            door_text, key_text = command[7:].split(" with ", 1)
+            return bool(door_text.strip() and key_text.strip())
+        return False
+
+    @staticmethod
+    def _sanitize_command_response(text: str) -> str:
+        cleaned = text.strip().strip("`").strip().strip("\"'")
+        if not cleaned:
+            return ""
+        first_line = cleaned.splitlines()[0].strip()
+        if ":" in first_line:
+            prefix, suffix = first_line.split(":", 1)
+            if prefix.lower() in {"command", "response"}:
+                first_line = suffix.strip()
+        return re.sub(r"\s+", " ", first_line).strip().lower()
+
+    @staticmethod
+    def _sanitize_multiline_response(text: str) -> str:
+        cleaned = GeminiInterfaceAdapter._sanitize_json_response(text)
+        if not cleaned:
+            return ""
+        lines: list[str] = []
+        blank_run = 0
+        for raw_line in cleaned.splitlines():
+            line = raw_line.strip()
+            if not line:
+                blank_run += 1
+                if blank_run <= 1:
+                    lines.append("")
+                continue
+            blank_run = 0
+            if ":" in line:
+                prefix, suffix = line.split(":", 1)
+                if prefix.lower() == "observation":
+                    line = suffix.strip()
+            lines.append(line)
+        return "\n".join(lines).strip().strip("\"'")
+
+    @staticmethod
+    def _sanitize_json_response(text: str) -> str:
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            cleaned = re.sub(r"^```(?:json|text)?\s*", "", cleaned)
+            cleaned = re.sub(r"\s*```$", "", cleaned)
+        return cleaned.strip()
+
+    @staticmethod
+    def _should_preserve_feedback(feedback: str, state: GameState | None) -> bool:
+        if '"' in feedback or "'" in feedback:
+            return True
+        if state is not None and (state.last_command or "").startswith("read"):
+            return True
+        return False
+
+    @staticmethod
+    def _resolve_admissible_command(candidate: str, admissible: set[str]) -> str | None:
+        if candidate in admissible:
+            return candidate
+        if " " not in candidate:
+            return None
+        verb, remainder = candidate.split(" ", 1)
+        candidate_tokens = [token for token in re.split(r"\s+", remainder) if token and token not in {"from", "with", "on", "to"}]
+        matches: list[tuple[int, str]] = []
+        for option in admissible:
+            if not option.startswith(verb + " "):
+                continue
+            option_tokens = [token for token in re.split(r"\s+", option[len(verb) + 1 :]) if token and token not in {"from", "with", "on", "to"}]
+            if candidate_tokens and all(token in option_tokens for token in candidate_tokens):
+                matches.append((len(option_tokens), option))
+        if not matches:
+            return None
+        matches.sort(key=lambda item: (item[0], item[1]))
+        return matches[0][1]
+
+
+def sanitize_feedback_text(feedback: str) -> str:
+    lines = feedback.replace("\r\n", "\n").splitlines()
+    cleaned_lines: list[str] = []
+    for raw_line in lines:
+        line = raw_line.rstrip()
+        stripped = line.strip()
+        if not stripped:
+            cleaned_lines.append("")
+            continue
+        if _TEXTWORLD_PROMPT_LINE_RE.match(line):
+            continue
+        if stripped.startswith(">"):
+            continue
+        if _TEXTWORLD_META_LINE_RE.match(stripped):
+            continue
+        room_match = _TEXTWORLD_ROOM_HEADER_RE.match(stripped)
+        if room_match:
+            cleaned_lines.append(f"Location: {room_match.group('label').strip()}")
+            continue
+        if _is_probable_banner_line(stripped):
+            continue
+        cleaned_lines.append(stripped)
+
+    start_index = 0
+    for index, line in enumerate(cleaned_lines):
+        stripped = line.strip()
+        if not stripped:
+            continue
+        if stripped.startswith("Explore ") or stripped.startswith("Location: ") or not _is_probable_banner_line(stripped):
+            start_index = index
+            break
+    useful_lines = cleaned_lines[start_index:]
+
+    collapsed: list[str] = []
+    blank_run = 0
+    for line in useful_lines:
+        stripped = line.strip()
+        if not stripped:
+            blank_run += 1
+            if blank_run <= 1:
+                collapsed.append("")
+            continue
+        blank_run = 0
+        collapsed.append(stripped)
+    return "\n".join(collapsed).strip()
+
+
+def enrich_feedback_text(feedback: str, session: EpisodeSession) -> str:
+    supplement_lines = _observation_context_lines(session)
+    if not supplement_lines:
+        return feedback.strip()
+    merged: list[str] = []
+    base = feedback.strip()
+    if base:
+        merged.append(base)
+    for line in supplement_lines:
+        if line not in base:
+            merged.append(line)
+    return "\n\n".join(merged).strip()
+
+
+def _observation_context_lines(session: EpisodeSession) -> list[str]:
+    visible = _visible_entities(session)
+    exits = sorted(command[3:] for command in session.available_commands() if command.startswith("go "))
+    lines: list[str] = []
+    if visible:
+        lines.append("Visible here: " + ", ".join(visible))
+    if exits:
+        lines.append("Exits: " + ", ".join(exits))
+    return lines
+
+
+def _visible_entities(session: EpisodeSession) -> list[str]:
+    visible: list[str] = []
+    seen: set[str] = set()
+    for node in session.compiled.world.nodes:
+        if getattr(node, "parent_id", None) != session.current_room_id:
+            continue
+        if node.type == "readable" and node.id not in session.revealed_readables:
+            continue
+        name = session.compiled.node_command_names.get(node.id)
+        if name and name not in seen:
+            seen.add(name)
+            visible.append(name)
+    for edge in session.compiled.world.edges:
+        if edge.from_node_id != session.current_room_id or not edge.door_node_id:
+            continue
+        name = session.compiled.node_command_names.get(edge.door_node_id)
+        if name and name not in seen:
+            seen.add(name)
+            visible.append(name)
+    for item in session.compiled.world.items:
+        if session.item_locations.get(item.id) != session.current_room_id:
+            continue
+        name = session.compiled.item_command_names.get(item.id)
+        if name and name not in seen:
+            seen.add(name)
+            visible.append(name)
+    return visible
+
+
+def _is_probable_banner_line(line: str) -> bool:
+    if len(line) < 12:
+        return False
+    if line.startswith("Explore ") or line.startswith("Location: "):
+        return False
+    banner_chars = len(_TEXTWORLD_BANNER_CHAR_RE.findall(line))
+    return banner_chars >= max(4, len(line) // 6)
diff --git a/agents/master/logic.py b/agents/master/logic.py
new file mode 100644
index 0000000000000000000000000000000000000000..98e2bc87b83d8198b7703390126a991a768e808e
--- /dev/null
+++ b/agents/master/logic.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+import json
+import shutil
+import textwrap
+from pathlib import Path
+
+import textworld
+from textworld.core import EnvInfos
+from textworld.generator.data import LOGIC_DATA_PATH, TEXT_GRAMMARS_PATH
+
+from .base import (
+    CUSTOM_GRAMMAR_DIR,
+    CUSTOM_LOGIC_DIR,
+    normalize_answer_text,
+    suppress_unsupported_game_warning,
+)
+from .schema import WorldDefinition
+
+
+def build_logic_dir(artifacts_dir: Path, world: WorldDefinition) -> Path:
+    logic_dir = artifacts_dir / "kb_logic"
+    logic_dir.mkdir(parents=True, exist_ok=True)
+    overrides = {path.name for path in CUSTOM_LOGIC_DIR.glob("*.twl")}
+    for builtin in Path(LOGIC_DATA_PATH).glob("*.twl"):
+        if builtin.name not in overrides:
+            shutil.copy(builtin, logic_dir / builtin.name)
+    for custom in CUSTOM_LOGIC_DIR.glob("*.twl"):
+        shutil.copy(custom, logic_dir / custom.name)
+    (logic_dir / "world_submit_overlay.twl").write_text(submission_overlay(world), encoding="utf-8")
+    return logic_dir
+
+
+def build_grammar_dir(artifacts_dir: Path) -> Path:
+    grammar_dir = artifacts_dir / "kb_grammar"
+    grammar_dir.mkdir(parents=True, exist_ok=True)
+    overrides = {path.name for path in CUSTOM_GRAMMAR_DIR.glob("*.twg")}
+    for builtin in Path(TEXT_GRAMMARS_PATH).glob("*.twg"):
+        if builtin.name not in overrides:
+            shutil.copy(builtin, grammar_dir / builtin.name)
+    for custom in CUSTOM_GRAMMAR_DIR.glob("*.twg"):
+        shutil.copy(custom, grammar_dir / custom.name)
+    return grammar_dir
+
+
+def submit_command_text(world: WorldDefinition) -> str:
+    return "submit " + normalize_answer_text(world.meta.win_condition.answer_string)
+
+
+def submission_overlay(world: WorldDefinition) -> str:
+    answer = submit_command_text(world).replace('"', '\\"')
+    return textwrap.dedent(
+        f'''
+        type submission {{
+            rules {{
+                submit/final :: $at(P, r) & $at(npc, r) & $guardian(npc) & $consulted(npc) & $correct(answer, npc) -> solved(answer);
+            }}
+            reverse_rules {{
+                submit/final :: submit/final;
+            }}
+            inform7 {{
+                commands {{
+                    submit/final :: "{answer}" :: "taking inventory";
+                }}
+                code :: """
+                    Understand "{answer}" as taking inventory.
+                    After taking inventory:
+                        if the player's command matches the text "{answer}":
+                            repeat with candidate running through answer-likes:
+                                now candidate is solved;
+                """;
+            }}
+        }}
+        '''
+    ).strip() + "\n"
+
+
+def write_artifacts(artifacts_dir: Path, world: WorldDefinition, walkthrough_commands: list[str]) -> None:
+    (artifacts_dir / "world_definition.normalized.json").write_text(world.model_dump_json(indent=2), encoding="utf-8")
+    (artifacts_dir / "walkthrough.json").write_text(json.dumps(walkthrough_commands, indent=2), encoding="utf-8")
+
+
+def solver_policy(game_file: str) -> list[str]:
+    with suppress_unsupported_game_warning():
+        env = textworld.start(game_file, request_infos=EnvInfos(policy_commands=True, extras=["walkthrough"]))
+        try:
+            state = env.reset()
+        finally:
+            close = getattr(env, "close", None)
+            if callable(close):
+                close()
+    return list(state.policy_commands or state.get("extra.walkthrough") or [])
diff --git a/agents/master/main.py b/agents/master/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6417b26884ae621fd4f048d213169372db477e7
--- /dev/null
+++ b/agents/master/main.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+from .base import DMCompileError, DMInterfaceError
+from .env import DMEnvironment
+from .interface import DEFAULT_GEMINI_MODEL, GeminiInterfaceAdapter, SimpleInterfaceAdapter
+from .play import ManualRunner, RandomAdmissibleRunner, WalkthroughRunner
+from .sample import load_world, sample_world_definition
+from .server import run_server
+from .snapshots import DEFAULT_LIVE_DIR, LiveSnapshotWriter
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Dungeon DM environment harness")
+    parser.add_argument("mode", choices=["validate", "play", "sample", "serve"], help="What to do.")
+    parser.add_argument("world", nargs="?", help="Path to a world-definition JSON file.")
+    parser.add_argument("--runner", choices=["walkthrough", "random", "manual"], default="walkthrough")
+    parser.add_argument("--interface", choices=["simple", "gemini"], default="simple")
+    parser.add_argument("--model", default=DEFAULT_GEMINI_MODEL)
+    parser.add_argument("--narrate", action="store_true", help="Narrate observations through Gemini.")
+    parser.add_argument("--live", action="store_true", help="Write live viewer snapshots while playing.")
+    parser.add_argument("--live-dir", type=Path, default=DEFAULT_LIVE_DIR)
+    parser.add_argument("--port", type=int, default=8000)
+    args = parser.parse_args(argv)
+    if args.mode == "serve":
+        run_server(port=args.port, live_dir=args.live_dir)
+        return 0
+
+    if args.mode == "sample":
+        print(json.dumps(sample_world_definition(), indent=2))
+        return 0
+    if not args.world:
+        parser.error("A world-definition JSON file is required for validate/play.")
+
+    try:
+        adapter = SimpleInterfaceAdapter()
+        if args.interface == "gemini":
+            adapter = GeminiInterfaceAdapter(model=args.model, narrate_observations=args.narrate)
+        env = DMEnvironment(interface_adapter=adapter)
+        world = load_world(args.world)
+        if args.mode == "validate":
+            compiled = env.compile_world(world)
+            print(f"Compiled successfully: {compiled.game_file}")
+            print(f"Solver policy: {compiled.solver_policy}")
+            return 0
+
+        runner = {"manual": ManualRunner(), "random": RandomAdmissibleRunner(), "walkthrough": WalkthroughRunner()}[
+            args.runner
+        ]
+        observer = LiveSnapshotWriter(live_dir=args.live_dir, runner_name=args.runner) if args.live else None
+        result = env.play(world, runner=runner, observer=observer)
+        if result.observation.compile_error is not None:
+            print(result.observation.compile_error, file=sys.stderr)
+            return 1
+        print(
+            json.dumps(
+                {
+                    "reward": result.reward,
+                    "done": result.done,
+                    "observation": result.observation.model_dump(),
+                },
+                indent=2,
+            )
+        )
+        return 0
+    except (DMCompileError, DMInterfaceError, ValueError) as exc:
+        print(str(exc), file=sys.stderr)
+        return 1
diff --git a/agents/master/play.py b/agents/master/play.py
new file mode 100644
index 0000000000000000000000000000000000000000..57f760310b6182593fc64eb2771dd5818088c1c5
--- /dev/null
+++ b/agents/master/play.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import random
+from typing import Iterable, Protocol, TYPE_CHECKING
+
+from .base import DMInterfaceError
+
+if TYPE_CHECKING:
+    from .session import EpisodeSession
+
+
+class EpisodeRunner(Protocol):
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        ...
+
+
+class WalkthroughRunner:
+    def __init__(self, commands: Iterable[str] | None = None) -> None:
+        self._commands = list(commands) if commands is not None else None
+
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        commands = list(self._commands or session.compiled.solver_policy)
+        for command in commands:
+            if session.done or session.steps_taken >= max_steps:
+                return
+            session.step(command)
+
+
+class CommandSequenceRunner:
+    def __init__(self, commands: Iterable[str]) -> None:
+        self._commands = list(commands)
+
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        for command in self._commands:
+            if session.done or session.steps_taken >= max_steps:
+                return
+            session.step(command)
+
+
+class RandomAdmissibleRunner:
+    def __init__(self, seed: int | None = None) -> None:
+        self._rng = random.Random(seed)
+
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        while not session.done and session.steps_taken < max_steps:
+            options = session.available_commands()
+            if not options:
+                return
+            session.step(self._rng.choice(options))
+
+
+class ManualRunner:
+    def run(self, session: EpisodeSession, max_steps: int) -> None:
+        print(session.current_feedback())
+        while not session.done and session.steps_taken < max_steps:
+            print()
+            print(f"Step {session.steps_taken + 1}/{max_steps}")
+            command = input("> ").strip()
+            if command in {"quit", "exit"}:
+                return
+            try:
+                turn = session.step(command)
+            except DMInterfaceError:
+                print("I'm not sure what you mean. Try rephrasing that command.")
+                if session.available_commands():
+                    print("Admissible:", ", ".join(session.available_commands()))
+                continue
+            print(turn.observation)
+            if session.available_commands():
+                print("Admissible:", ", ".join(session.available_commands()))
diff --git a/agents/master/policy.py b/agents/master/policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..18dfdd802e4e46bd304a69d1066aa3d773044a2a
--- /dev/null
+++ b/agents/master/policy.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+from typing import Protocol
+
+from pydantic import Field
+
+from agents.shared.llm_client import StructuredModelClient
+from agents.shared.model_schema import StrictModel
+
+from .schema import WorldDefinition
+
+
+class DungeonMasterPolicyError(RuntimeError):
+    pass
+
+
+class DungeonMasterPolicy(Protocol):
+    def generate_world(
+        self,
+        *,
+        target_ratio: float,
+        repair_context: "DMRepairContext | None" = None,
+    ) -> WorldDefinition:
+        ...
+
+
+class DMRepairContext(StrictModel):
+    attempt_number: int
+    error_message: str
+    previous_candidate_json: str | None = None
+
+
+class WinConditionCandidate(StrictModel):
+    type: str
+    target_npc_id: str
+    answer_string: str
+
+
+class WorldMetaCandidate(StrictModel):
+    title: str
+    difficulty_target: float
+    start_node_id: str
+    win_condition: WinConditionCandidate
+
+
+class WorldNodeCandidate(StrictModel):
+    id: str
+    type: str
+    label: str
+    description: str
+    parent_id: str | None = None
+    open: bool | None = None
+    locked: bool | None = None
+    lock_key_id: str | None = None
+    clue_id: str | None = None
+    requires_item_id: str | None = None
+    consumes_item: bool | None = None
+    text_content: str | None = None
+    reveals_item_id: str | None = None
+    reveals_readable_id: str | None = None
+    gives_item_id: str | None = None
+    gives_clue_id: str | None = None
+
+
+class EdgeCandidate(StrictModel):
+    id: str
+    from_node_id: str
+    to_node_id: str
+    direction: str
+    type: str
+    required_item_id: str | None = None
+    door_node_id: str | None = None
+
+
+class ItemCandidate(StrictModel):
+    id: str
+    label: str
+    description: str
+    subtype: str
+    start_node_id: str | None = None
+
+
+class ClueCandidate(StrictModel):
+    id: str
+    text: str
+
+
+class RecipeCandidate(StrictModel):
+    id: str
+    input_item_ids: list[str]
+    output_item_id: str
+
+
+class QuestStepCandidate(StrictModel):
+    step_id: str
+    description: str
+    requires_step_ids: list[str] = Field(default_factory=list)
+    action: str
+
+
+class WorldDefinitionCandidate(StrictModel):
+    meta: WorldMetaCandidate
+    nodes: list[WorldNodeCandidate]
+    edges: list[EdgeCandidate]
+    items: list[ItemCandidate]
+    clues: list[ClueCandidate]
+    recipes: list[RecipeCandidate] = Field(default_factory=list)
+    quest_chain: list[QuestStepCandidate]
+
+
+class DungeonMasterLLMPolicy:
+    def __init__(
+        self,
+        client: StructuredModelClient,
+        *,
+        model_name: str,
+        temperature: float = 0.0,
+        max_output_tokens: int = 8192,
+    ) -> None:
+        self.client = client
+        self.model_name = model_name
+        self.temperature = temperature
+        self.max_output_tokens = max_output_tokens
+
+    def generate_world(
+        self,
+        *,
+        target_ratio: float,
+        repair_context: DMRepairContext | None = None,
+    ) -> WorldDefinition:
+        from .prompt import build_dm_world_messages
+
+        try:
+            candidate = self.client.generate_structured(
+                build_dm_world_messages(target_ratio=target_ratio, repair_context=repair_context),
+                WorldDefinitionCandidate,
+                model_name=self.model_name,
+                temperature=self.temperature,
+                max_output_tokens=self.max_output_tokens,
+            )
+            return WorldDefinition.model_validate(candidate.model_dump(mode="json", exclude_none=True))
+        except Exception as exc:
+            raise DungeonMasterPolicyError(self._normalize_error(exc)) from exc
+
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        return " ".join(str(exc).split()) or exc.__class__.__name__
diff --git a/agents/master/prompt.py b/agents/master/prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbb15ea66711e2d9950c2ad3f92b6e0ab9024e39
--- /dev/null
+++ b/agents/master/prompt.py
@@ -0,0 +1,371 @@
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+
+from agents.shared.model_schema import ModelMessage
+
+from .sample import sample_world_definition
+
+if TYPE_CHECKING:
+    from .policy import DMRepairContext
+
+
+DM_WORLD_SYSTEM_PROMPT = """You are the dungeon master policy for a structured text adventure generator.
+
+Return exactly one valid WorldDefinition JSON object as minified JSON on a single line.
+Do not use markdown fences, indentation, comments, or extra prose.
+
+World requirements:
+- Build a fair, solvable mystery dungeon with 4 to 6 rooms.
+- Use only the supported schema fields and node types.
+- Use ids in snake_case.
+- Set meta.difficulty_target equal to the requested target ratio.
+- The win condition must be deduce with a short lowercase answer string.
+- The final answer must never be leaked directly in clue text.
+- The world must be mechanically consistent: all references must point to real ids and every puzzle chain must be completable.
+- Do not add unsupported fields to node variants. In particular, location, junction, and door nodes must not include `parent_id`.
+- Every readable must include `text_content`.
+- Keep the world compact enough to fit in one response: short labels, short descriptions, and a concise quest chain.
+
+Supported mechanics:
+- Containers and doors can be opened.
+- Locked doors require a real key item.
+- Readables may require an item before they become legible.
+- Fixtures may reveal an item or a readable after a correct use action.
+- NPCs may trade one required item for one item or one clue.
+- Recipes combine exactly two items into one output item.
+- Navigation uses only passage and locked_passage edges.
+
+Quest-chain rules:
+- Every quest action must be one of:
+  open(node_id)
+  take(item_id,source_node_id)
+  unlock(door_id,key_id)
+  go(room_id)
+  read(readable_id)
+  use(item_id,target_node_id)
+  combine(item_a_id,item_b_id)
+  give(item_id,npc_id)
+  talk(npc_id)
+  submit("answer")
+- Do not invent unsupported actions such as inspect(), search(), solve(), explore(), or win().
+- The quest chain must be topologically valid and correspond to a real solvable playthrough.
+- Every quest step object must use exactly these keys: step_id, description, requires_step_ids, action.
+- Use requires_step_ids (plural) even for one dependency. Never use requires_step_id.
+- Include exactly 3 clues that narrow the answer without stating it directly.
+- Include a guardian NPC for the final submission.
+- Every clue id in clues[] must have exactly one real source: either one readable.clue_id or one non-guardian npc.gives_clue_id.
+- Do not include unused clue ids and do not leave readables without clue_id.
+- Clue text and readable text must never contain the exact answer_string.
+- Every room-to-room connection must include the reverse edge explicitly.
+- Every locked_passage pair must reference a real door node id that already exists in nodes[].
+- Any item used by required_item_id or lock_key_id must have subtype key.
+- Keep descriptions and clue texts short, concrete, and under 14 words when possible.
+- Prefer 4 rooms, 9 to 11 nodes, 4 to 5 items, 3 clues, 0 recipes, and 6 to 9 quest steps.
+- Use the shortest valid quest chain that still supports the target difficulty.
+- meta must include title, difficulty_target, start_node_id, and win_condition.
+- item objects use subtype, never type.
+- clue objects use id and text, never clue_id.
+- fixture objects use reveals_item_id or reveals_readable_id.
+- NPC trade objects use requires_item_id plus gives_item_id or gives_clue_id.
+
+Reliability matters more than novelty. Stay close to the reference world's mechanical bundle unless repair feedback requires a different fix.
+"""
+
+_DM_WORLD_USER_PROMPTS = (
+    (
+        "Generate one full WorldDefinition JSON object as minified one-line JSON.\n"
+        "Requested target ratio: {target_ratio}\n\n"
+        "Hard output requirements:\n"
+        "- Required top-level fields: meta, nodes, edges, items, clues, recipes, quest_chain.\n"
+        "- Supported node types: location, junction, container, door, readable, fixture, npc.\n"
+        "- Supported edge types: passage, locked_passage.\n"
+        "- Supported item subtypes: key, puzzle.\n"
+        "- Every locked_passage must reference a real door_node_id and a real required_item_id.\n"
+        "- Every locked door must have a matching lock_key_id.\n"
+        "- Every fixture must have requires_item_id and reveal at most one item or one readable.\n"
+        "- Location, junction, and door nodes must not include parent_id.\n"
+        "- Every readable must include text_content.\n"
+        "- Every non-guardian NPC trade must require a real item.\n"
+        "- Use 6 to 9 quest steps unless a shorter valid chain is clearly enough.\n"
+        "- meta must include title, start_node_id, and win_condition.\n"
+        "- items use subtype, not type.\n"
+        "- clues use id, not clue_id.\n"
+        "- every clue id must have exactly one readable or non-guardian npc source.\n"
+        "- fixtures use reveals_item_id or reveals_readable_id.\n"
+        "- NPC trades use requires_item_id plus gives_item_id or gives_clue_id.\n"
+        "- every locked_passage must reference a real door node id and a key item.\n"
+        "- The final answer must stay implicit until the player gathers clues and speaks to the guardian.\n\n"
+        "Compact structural snippets to mimic exactly:\n"
+        "meta={meta_example_json}\n"
+        "item={item_example_json}\n"
+        "clue={clue_example_json}\n"
+        "fixture={fixture_example_json}\n"
+        "npc={npc_example_json}\n"
+        "quest_step={quest_step_example_json}\n"
+        "edge_pair={edge_pair_example_json}\n"
+        "readable={readable_example_json}\n"
+    ),
+    (
+        "Produce a compact but fully valid WorldDefinition JSON object as minified one-line JSON.\n"
+        "Target difficulty ratio: {target_ratio}\n\n"
+        "Mechanical constraints:\n"
+        "- Output minified JSON only on one line.\n"
+        "- Keep the graph solvable and internally consistent.\n"
+        "- Keep all ids in snake_case and all references real.\n"
+        "- Preserve the supported node, edge, and item types exactly.\n"
+        "- Do not add unsupported fields to node variants.\n"
+        "- Every readable must include text_content.\n"
+        "- The world must require clue gathering before the guardian submission.\n"
+        "- Use exactly 3 clues.\n"
+        "- Every clue id must appear exactly once in a readable.clue_id or npc.gives_clue_id.\n"
+        "- Every edge pair must include both directions explicitly.\n"
+        "- Every locked_passage must reference a real door node id already present in nodes[].\n"
+        "- Any required_item_id on a locked_passage must be a key item.\n"
+        "- Quest steps must use requires_step_ids (plural).\n\n"
+        "Exact meta example:\n{meta_example_json}\n"
+        "Exact item example:\n{item_example_json}\n"
+        "Exact clue example:\n{clue_example_json}\n"
+        "Exact fixture example:\n{fixture_example_json}\n"
+        "Exact NPC example:\n{npc_example_json}\n"
+        "Exact quest step example:\n{quest_step_example_json}\n"
+        "Exact bidirectional edge example:\n{edge_pair_example_json}\n"
+        "Exact readable example:\n{readable_example_json}\n"
+    ),
+    (
+        "Return one original WorldDefinition JSON object for a mystery dungeon as minified one-line JSON.\n"
+        "Requested target ratio: {target_ratio}\n\n"
+        "Checklist:\n"
+        "- 4 to 6 rooms.\n"
+        "- 3 to 5 clues.\n"
+        "- A real guardian NPC for the final answer.\n"
+        "- A quest chain that compiles into a real walkthrough.\n"
+        "- No unsupported extra fields and no missing required fields like readable.text_content.\n"
+        "- No unsupported mechanics, no unsupported actions, no prose.\n"
+        "- Use requires_step_ids (plural), not requires_step_id.\n"
+        "- Use exactly 3 clues and explicit reverse edges.\n"
+        "- Every clue id must have exactly one source and no clue may be orphaned.\n"
+        "- Every locked_passage must use an existing door node id and a key item.\n"
+        "- Prefer 6 to 9 quest steps, not long walkthroughs.\n\n"
+        "Mini schema examples:\n"
+        "meta={meta_example_json}\n"
+        "item={item_example_json}\n"
+        "clue={clue_example_json}\n"
+        "fixture={fixture_example_json}\n"
+        "npc={npc_example_json}\n"
+        "quest_step={quest_step_example_json}\n"
+        "edge_pair={edge_pair_example_json}\n"
+        "readable={readable_example_json}\n"
+    ),
+)
+
+_DM_META_EXAMPLE = {
+    "title": "The Ember Vault",
+    "difficulty_target": 1.75,
+    "start_node_id": "foyer",
+    "win_condition": {
+        "type": "deduce",
+        "target_npc_id": "stone_guardian",
+        "answer_string": "vesna",
+    },
+}
+
+_DM_ITEM_EXAMPLE = {
+    "id": "brass_key",
+    "subtype": "key",
+    "start_node_id": "entry_chest",
+    "label": "Brass Key",
+    "description": "short key description",
+}
+
+_DM_CLUE_EXAMPLE = {
+    "id": "initial_clue",
+    "text": "short clue text",
+}
+
+_DM_FIXTURE_EXAMPLE = {
+    "id": "stone_well",
+    "type": "fixture",
+    "parent_id": "courtyard",
+    "requires_item_id": "full_map",
+    "consumes_item": False,
+    "reveals_item_id": None,
+    "reveals_readable_id": "water_plaque",
+    "label": "Stone Well",
+    "description": "short fixture description",
+}
+
+_DM_NPC_EXAMPLE = {
+    "id": "cartographer",
+    "type": "npc",
+    "parent_id": "gallery",
+    "requires_item_id": "full_map",
+    "gives_item_id": "lens",
+    "gives_clue_id": None,
+    "label": "Cartographer",
+    "description": "short npc description",
+}
+
+_DM_QUEST_STEP_EXAMPLE = {
+    "step_id": "open_entry_chest",
+    "description": "open the chest",
+    "requires_step_ids": [],
+    "action": "open(entry_chest)",
+}
+
+_DM_EDGE_PAIR_EXAMPLE = [
+    {
+        "id": "foyer_east",
+        "from_node_id": "foyer",
+        "to_node_id": "workshop",
+        "direction": "east",
+        "type": "locked_passage",
+        "required_item_id": "brass_key",
+        "door_node_id": "iron_door",
+    },
+    {
+        "id": "workshop_west",
+        "from_node_id": "workshop",
+        "to_node_id": "foyer",
+        "direction": "west",
+        "type": "locked_passage",
+        "required_item_id": "brass_key",
+        "door_node_id": "iron_door",
+    },
+]
+
+_DM_READABLE_EXAMPLE = {
+    "id": "ash_mural",
+    "type": "readable",
+    "parent_id": "workshop",
+    "clue_id": "initial_clue",
+    "requires_item_id": "torch",
+    "consumes_item": False,
+    "label": "Ash Mural",
+    "description": "short readable description",
+    "text_content": "short readable text",
+}
+
+
+def _compress_reference_world_for_prompt(reference_world: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "meta": reference_world.get("meta", {}),
+        "nodes": [
+            _compress_world_node(node)
+            for node in reference_world.get("nodes", [])
+            if isinstance(node, dict)
+        ],
+        "edges": [
+            {
+                key: edge[key]
+                for key in ("id", "from_node_id", "to_node_id", "direction", "type", "required_item_id", "door_node_id")
+                if key in edge
+            }
+            for edge in reference_world.get("edges", [])
+            if isinstance(edge, dict)
+        ],
+        "items": [
+            {
+                **{
+                    key: item[key]
+                    for key in ("id", "subtype", "start_node_id")
+                    if key in item
+                },
+                "label": str(item.get("label") or item.get("id") or "item"),
+                "description": "short item description",
+            }
+            for item in reference_world.get("items", [])
+            if isinstance(item, dict)
+        ],
+        "clues": [
+            {"id": clue["id"], "text": "short clue text"}
+            for clue in reference_world.get("clues", [])
+            if isinstance(clue, dict) and "id" in clue
+        ],
+        "recipes": [
+            {
+                key: recipe[key]
+                for key in ("id", "input_item_ids", "output_item_id")
+                if key in recipe
+            }
+            for recipe in reference_world.get("recipes", [])
+            if isinstance(recipe, dict)
+        ],
+        "quest_chain": [
+            {
+                **{
+                    key: step[key]
+                    for key in ("step_id", "requires_step_ids", "action")
+                    if key in step
+                },
+                "description": "short quest step",
+            }
+            for step in reference_world.get("quest_chain", [])
+            if isinstance(step, dict)
+        ],
+    }
+
+
+def _compress_world_node(node: dict[str, Any]) -> dict[str, Any]:
+    compressed = {
+        key: node[key]
+        for key in (
+            "id",
+            "type",
+            "parent_id",
+            "open",
+            "locked",
+            "lock_key_id",
+            "clue_id",
+            "requires_item_id",
+            "consumes_item",
+            "reveals_item_id",
+            "reveals_readable_id",
+            "gives_item_id",
+            "gives_clue_id",
+        )
+        if key in node
+    }
+    compressed["label"] = str(node.get("label") or node.get("id") or node.get("type") or "node")
+    compressed["description"] = f"short {str(node.get('type') or 'node')} description"
+    if node.get("type") == "readable":
+        compressed["text_content"] = "short readable text"
+    return compressed
+
+
+def build_dm_world_messages(
+    *,
+    target_ratio: float,
+    repair_context: "DMRepairContext | None" = None,
+    reference_world: dict[str, Any] | None = None,
+    prompt_style: int = 0,
+) -> list[ModelMessage]:
+    exemplar_world = reference_world or sample_world_definition()
+    structural_exemplar = _compress_reference_world_for_prompt(exemplar_world)
+    template = _DM_WORLD_USER_PROMPTS[prompt_style % len(_DM_WORLD_USER_PROMPTS)]
+    prompt = template.format(
+        target_ratio=target_ratio,
+        reference_world_json=json.dumps(structural_exemplar, separators=(",", ":")),
+        meta_example_json=json.dumps(_DM_META_EXAMPLE, separators=(",", ":")),
+        item_example_json=json.dumps(_DM_ITEM_EXAMPLE, separators=(",", ":")),
+        clue_example_json=json.dumps(_DM_CLUE_EXAMPLE, separators=(",", ":")),
+        fixture_example_json=json.dumps(_DM_FIXTURE_EXAMPLE, separators=(",", ":")),
+        npc_example_json=json.dumps(_DM_NPC_EXAMPLE, separators=(",", ":")),
+        quest_step_example_json=json.dumps(_DM_QUEST_STEP_EXAMPLE, separators=(",", ":")),
+        edge_pair_example_json=json.dumps(_DM_EDGE_PAIR_EXAMPLE, separators=(",", ":")),
+        readable_example_json=json.dumps(_DM_READABLE_EXAMPLE, separators=(",", ":")),
+    )
+    if repair_context is not None:
+        prompt += (
+            "\nThe previous WorldDefinition failed schema validation or compilation.\n"
+            f"Repair attempt: {repair_context.attempt_number}\n"
+            f"Normalized error: {repair_context.error_message}\n"
+            "Return a fully corrected WorldDefinition only.\n"
+        )
+        if repair_context.previous_candidate_json:
+            prompt += f"Previous invalid WorldDefinition JSON:\n{repair_context.previous_candidate_json}\n"
+    return [
+        ModelMessage(role="system", content=DM_WORLD_SYSTEM_PROMPT),
+        ModelMessage(role="user", content=prompt),
+    ]
diff --git a/agents/master/quest.py b/agents/master/quest.py
new file mode 100644
index 0000000000000000000000000000000000000000..489dea07c12b89e5b24d475bf63220127846d68d
--- /dev/null
+++ b/agents/master/quest.py
@@ -0,0 +1,418 @@
+from __future__ import annotations
+
+import re
+from collections import defaultdict, deque
+
+from .base import (
+    COMBINE_RE,
+    DMCompileError,
+    GIVE_RE,
+    GO_RE,
+    INVENTORY_ID,
+    OPEN_RE,
+    READ_RE,
+    STORED_ID,
+    SUBMIT_RE,
+    TALK_RE,
+    TAKE_RE,
+    UNLOCK_RE,
+    USE_RE,
+    normalize_answer_text,
+)
+from .graph import door_room_mapping, hidden_readable_ids, recipe_mapping, use_effect_mapping
+from .schema import (
+    CombineAction,
+    ContainerNode,
+    FixtureNode,
+    GiveAction,
+    GoAction,
+    Item,
+    NpcNode,
+    OpenAction,
+    QuestAction,
+    QuestStep,
+    ReadAction,
+    ReadableNode,
+    SimulationState,
+    SubmitAction,
+    TalkAction,
+    TakeAction,
+    UnlockAction,
+    UseAction,
+    WorldDefinition,
+)
+
+
+def topological_linearize(steps: list[QuestStep]) -> list[QuestStep]:
+    by_id = {step.step_id: step for step in steps}
+    for step in steps:
+        for dependency in step.requires_step_ids:
+            if dependency not in by_id:
+                raise DMCompileError(f"Quest step '{step.step_id}' depends on unknown step '{dependency}'.")
+
+    visiting: set[str] = set()
+    visited: set[str] = set()
+
+    def visit(step_id: str) -> None:
+        if step_id in visited:
+            return
+        if step_id in visiting:
+            raise DMCompileError("quest_chain contains a cycle.")
+        visiting.add(step_id)
+        for dependency in by_id[step_id].requires_step_ids:
+            visit(dependency)
+        visiting.remove(step_id)
+        visited.add(step_id)
+
+    for step in steps:
+        visit(step.step_id)
+
+    seen: set[str] = set()
+    for step in steps:
+        missing = [dependency for dependency in step.requires_step_ids if dependency not in seen]
+        if missing:
+            raise DMCompileError(
+                f"Quest step '{step.step_id}' appears before its required steps: {', '.join(sorted(missing))}."
+            )
+        seen.add(step.step_id)
+    return steps
+
+
+def parse_quest_action(text: str) -> QuestAction:
+    compact = re.sub(r"\s+", "", text)
+    if match := GO_RE.fullmatch(compact):
+        return GoAction(target_node_id=match.group("target"))
+    if match := OPEN_RE.fullmatch(compact):
+        return OpenAction(target_node_id=match.group("target"))
+    if match := UNLOCK_RE.fullmatch(compact):
+        return UnlockAction(door_id=match.group("door"), key_id=match.group("key"))
+    if match := TAKE_RE.fullmatch(compact):
+        return TakeAction(item_id=match.group("item"), source_node_id=match.group("source"))
+    if match := READ_RE.fullmatch(compact):
+        return ReadAction(target_node_id=match.group("target"))
+    if match := USE_RE.fullmatch(compact):
+        return UseAction(item_id=match.group("item"), target_node_id=match.group("target"))
+    if match := COMBINE_RE.fullmatch(compact):
+        return CombineAction(item_a_id=match.group("item_a"), item_b_id=match.group("item_b"))
+    if match := GIVE_RE.fullmatch(compact):
+        return GiveAction(item_id=match.group("item"), npc_id=match.group("npc"))
+    if match := TALK_RE.fullmatch(compact):
+        return TalkAction(target_node_id=match.group("target"))
+    if match := SUBMIT_RE.fullmatch(text.strip()):
+        return SubmitAction(answer_text=match.group("answer"))
+    raise DMCompileError(f"Unsupported quest action DSL '{text}'.")
+
+
+def simulate_walkthrough(
+    world: WorldDefinition,
+    actions: list[QuestAction],
+    entity_names: dict[str, str],
+) -> list[str]:
+    node_by_id = {node.id: node for node in world.nodes}
+    item_by_id = {item.id: item for item in world.items}
+    edge_by_target = {(edge.from_node_id, edge.to_node_id): edge for edge in world.edges}
+    door_rooms = door_room_mapping(world)
+    hidden_readables = hidden_readable_ids(world)
+    use_effects = use_effect_mapping(world)
+    recipes = recipe_mapping(world)
+    clue_ids = {clue.id for clue in world.clues}
+
+    state = SimulationState(
+        current_room_id=world.meta.start_node_id,
+        item_locations={item.id: item.start_node_id or STORED_ID for item in world.items},
+        visited_nodes={world.meta.start_node_id},
+        revealed_readables={node.id for node in world.nodes if node.type == "readable" and node.id not in hidden_readables},
+    )
+    for node in world.nodes:
+        if node.type in {"container", "door"}:
+            if node.open:
+                state.open_nodes.add(node.id)
+            if node.locked:
+                state.locked_nodes.add(node.id)
+
+    commands: list[str] = []
+    for action in actions:
+        if isinstance(action, GoAction):
+            _apply_go(action, edge_by_target, state, commands)
+        elif isinstance(action, OpenAction):
+            _apply_open(action, node_by_id, door_rooms, state, entity_names, commands)
+        elif isinstance(action, UnlockAction):
+            _apply_unlock(action, node_by_id, item_by_id, door_rooms, state, entity_names, commands)
+        elif isinstance(action, TakeAction):
+            _apply_take(action, node_by_id, item_by_id, state, entity_names, commands)
+        elif isinstance(action, ReadAction):
+            _apply_read(action, node_by_id, state, entity_names, commands)
+        elif isinstance(action, UseAction):
+            _apply_use(action, node_by_id, state, entity_names, commands, use_effects)
+        elif isinstance(action, CombineAction):
+            _apply_combine(action, state, entity_names, commands, recipes)
+        elif isinstance(action, GiveAction):
+            _apply_give(action, node_by_id, state, entity_names, commands)
+        elif isinstance(action, TalkAction):
+            _apply_talk(action, node_by_id, state, entity_names, commands)
+        elif isinstance(action, SubmitAction):
+            _apply_submit(action, world, node_by_id, state, commands, clue_ids)
+        else:  # pragma: no cover
+            raise AssertionError(f"Unhandled quest action {action!r}")
+
+    return commands
+
+
+def _apply_go(
+    action: GoAction,
+    edge_by_target: dict[tuple[str, str], object],
+    state: SimulationState,
+    commands: list[str],
+) -> None:
+    edge = edge_by_target.get((state.current_room_id, action.target_node_id))
+    if edge is None:
+        raise DMCompileError(
+            f"Quest moves from '{state.current_room_id}' to non-adjacent room '{action.target_node_id}'."
+        )
+    if edge.door_node_id and edge.door_node_id not in state.open_nodes:
+        raise DMCompileError(f"Quest moves through closed door '{edge.door_node_id}'.")
+    if edge.type == "locked_passage" and edge.door_node_id in state.locked_nodes:
+        raise DMCompileError(f"Quest moves through locked door '{edge.door_node_id}'.")
+    state.current_room_id = edge.to_node_id
+    state.visited_nodes.add(edge.to_node_id)
+    commands.append(f"go {edge.direction}")
+
+
+def _apply_open(
+    action: OpenAction,
+    node_by_id: dict[str, object],
+    door_rooms: dict[str, frozenset[str]],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    node = node_by_id.get(action.target_node_id)
+    if node is None or node.type not in {"container", "door"}:
+        raise DMCompileError(f"open(...) targets unknown lockable '{action.target_node_id}'.")
+    if node.id in state.locked_nodes:
+        raise DMCompileError(f"Quest opens locked '{node.id}' before unlocking it.")
+    if node.type == "door":
+        if state.current_room_id not in door_rooms.get(node.id, frozenset()):
+            raise DMCompileError(f"Door '{node.id}' is not reachable from room '{state.current_room_id}'.")
+    else:
+        _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    state.open_nodes.add(node.id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"open {entity_names[node.id]}")
+
+
+def _apply_unlock(
+    action: UnlockAction,
+    node_by_id: dict[str, object],
+    item_by_id: dict[str, Item],
+    door_rooms: dict[str, frozenset[str]],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    if action.key_id not in item_by_id:
+        raise DMCompileError(f"Quest references unknown key '{action.key_id}'.")
+    if action.key_id not in state.inventory:
+        raise DMCompileError(f"Quest unlocks '{action.door_id}' without key '{action.key_id}'.")
+    node = node_by_id.get(action.door_id)
+    if node is None or node.type not in {"door", "container"}:
+        raise DMCompileError(f"unlock(...) targets unknown lockable '{action.door_id}'.")
+    if node.lock_key_id != action.key_id:
+        raise DMCompileError(f"'{node.id}' does not match key '{action.key_id}'.")
+    if node.type == "door":
+        if state.current_room_id not in door_rooms.get(node.id, frozenset()):
+            raise DMCompileError(f"Door '{node.id}' is not reachable from room '{state.current_room_id}'.")
+    else:
+        _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    state.locked_nodes.discard(node.id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"unlock {entity_names[node.id]} with {entity_names[action.key_id]}")
+
+
+def _apply_take(
+    action: TakeAction,
+    node_by_id: dict[str, object],
+    item_by_id: dict[str, Item],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    item = item_by_id.get(action.item_id)
+    if item is None:
+        raise DMCompileError(f"Quest references unknown item '{action.item_id}'.")
+    actual_location = state.item_locations.get(item.id)
+    if actual_location != action.source_node_id:
+        raise DMCompileError(
+            f"Quest expects item '{item.id}' in '{action.source_node_id}', but it is in '{actual_location}'."
+        )
+    if action.source_node_id == state.current_room_id:
+        command = f"take {entity_names[item.id]}"
+    else:
+        source = node_by_id.get(action.source_node_id)
+        if source is None or not isinstance(source, ContainerNode):
+            raise DMCompileError(f"Quest cannot take '{item.id}' from '{action.source_node_id}'.")
+        _require_parent_room(source.parent_id, source.id, state.current_room_id)
+        if source.id not in state.open_nodes:
+            raise DMCompileError(f"Quest takes from closed container '{source.id}'.")
+        command = f"take {entity_names[item.id]} from {entity_names[source.id]}"
+    state.inventory.add(item.id)
+    state.item_locations[item.id] = INVENTORY_ID
+    state.visited_nodes.add(item.id)
+    commands.append(command)
+
+
+def _apply_read(
+    action: ReadAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    node = _typed_node(node_by_id, action.target_node_id, ReadableNode, "read")
+    _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    if node.id not in state.revealed_readables:
+        raise DMCompileError(f"Readable '{node.id}' has not been revealed yet.")
+    if node.requires_item_id and node.id not in state.prepared_readables:
+        raise DMCompileError(f"Readable '{node.id}' still requires item '{node.requires_item_id}'.")
+    state.discovered_clues.add(node.clue_id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"read {entity_names[node.id]}")
+
+
+def _apply_use(
+    action: UseAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+    use_effects: dict[str, object],
+) -> None:
+    effect = use_effects.get(action.target_node_id)
+    if effect is None:
+        raise DMCompileError(f"use(...) targets unknown use-effect node '{action.target_node_id}'.")
+    if effect.required_item_id != action.item_id:
+        raise DMCompileError(f"'{action.target_node_id}' does not accept item '{action.item_id}'.")
+    if action.item_id not in state.inventory:
+        raise DMCompileError(f"Quest uses item '{action.item_id}' before taking it.")
+    node = node_by_id.get(action.target_node_id)
+    if node is None or node.type not in {"readable", "fixture"}:
+        raise DMCompileError(f"use(...) targets unsupported node '{action.target_node_id}'.")
+    _require_parent_room(node.parent_id, node.id, state.current_room_id)
+    if isinstance(node, ReadableNode) and node.id not in state.revealed_readables:
+        raise DMCompileError(f"Readable '{node.id}' has not been revealed yet.")
+
+    if effect.consumes_item:
+        state.inventory.remove(action.item_id)
+        state.item_locations[action.item_id] = None
+    if effect.clue_id:
+        state.prepared_readables.add(node.id)
+        state.discovered_clues.add(effect.clue_id)
+    if effect.reveals_item_id:
+        state.item_locations[effect.reveals_item_id] = state.current_room_id
+    if effect.reveals_readable_id:
+        state.revealed_readables.add(effect.reveals_readable_id)
+    if isinstance(node, FixtureNode):
+        state.used_fixtures.add(node.id)
+    state.visited_nodes.add(node.id)
+    commands.append(f"use {entity_names[action.item_id]} on {entity_names[node.id]}")
+
+
+def _apply_combine(
+    action: CombineAction,
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+    recipes: dict[frozenset[str], str],
+) -> None:
+    recipe_key = frozenset({action.item_a_id, action.item_b_id})
+    output_item_id = recipes.get(recipe_key)
+    if output_item_id is None:
+        raise DMCompileError(f"No recipe combines '{action.item_a_id}' with '{action.item_b_id}'.")
+    if action.item_a_id not in state.inventory or action.item_b_id not in state.inventory:
+        raise DMCompileError("Quest combines items before both are in inventory.")
+    state.inventory.remove(action.item_a_id)
+    state.inventory.remove(action.item_b_id)
+    state.item_locations[action.item_a_id] = None
+    state.item_locations[action.item_b_id] = None
+    state.inventory.add(output_item_id)
+    state.item_locations[output_item_id] = INVENTORY_ID
+    state.produced_items.add(output_item_id)
+    state.visited_nodes.add(output_item_id)
+    commands.append(f"combine {entity_names[action.item_a_id]} with {entity_names[action.item_b_id]}")
+
+
+def _apply_give(
+    action: GiveAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    npc = _typed_node(node_by_id, action.npc_id, NpcNode, "give")
+    _require_parent_room(npc.parent_id, npc.id, state.current_room_id)
+    if action.item_id not in state.inventory:
+        raise DMCompileError(f"Quest gives '{action.item_id}' before taking it.")
+    if npc.requires_item_id != action.item_id:
+        raise DMCompileError(f"NPC '{npc.id}' does not want '{action.item_id}'.")
+    if npc.id in state.satisfied_npcs:
+        raise DMCompileError(f"Quest trades with NPC '{npc.id}' more than once.")
+    state.inventory.remove(action.item_id)
+    state.item_locations[action.item_id] = None
+    if npc.gives_item_id:
+        state.inventory.add(npc.gives_item_id)
+        state.item_locations[npc.gives_item_id] = INVENTORY_ID
+        state.produced_items.add(npc.gives_item_id)
+    if npc.gives_clue_id:
+        state.discovered_clues.add(npc.gives_clue_id)
+    state.satisfied_npcs.add(npc.id)
+    state.visited_nodes.add(npc.id)
+    commands.append(f"give {entity_names[action.item_id]} to {entity_names[npc.id]}")
+
+
+def _apply_talk(
+    action: TalkAction,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    entity_names: dict[str, str],
+    commands: list[str],
+) -> None:
+    npc = _typed_node(node_by_id, action.target_node_id, NpcNode, "talk")
+    _require_parent_room(npc.parent_id, npc.id, state.current_room_id)
+    state.consulted_npcs.add(npc.id)
+    state.visited_nodes.add(npc.id)
+    commands.append(f"talk {entity_names[npc.id]}")
+
+
+def _apply_submit(
+    action: SubmitAction,
+    world: WorldDefinition,
+    node_by_id: dict[str, object],
+    state: SimulationState,
+    commands: list[str],
+    clue_ids: set[str],
+) -> None:
+    guardian_id = world.meta.win_condition.target_npc_id
+    guardian = _typed_node(node_by_id, guardian_id, NpcNode, "submit")
+    _require_parent_room(guardian.parent_id, guardian.id, state.current_room_id)
+    if guardian.id not in state.consulted_npcs:
+        raise DMCompileError("Quest submits before talking to the guardian.")
+    if state.discovered_clues != clue_ids:
+        missing = sorted(clue_ids - state.discovered_clues)
+        raise DMCompileError(f"Quest submits before all clues are discovered: {missing}")
+    if normalize_answer_text(action.answer_text) != normalize_answer_text(world.meta.win_condition.answer_string):
+        raise DMCompileError("The final submit step must match win_condition.answer_string.")
+    commands.append("submit " + normalize_answer_text(action.answer_text))
+
+
+def _typed_node(node_by_id: dict[str, object], node_id: str, expected: type, label: str):
+    node = node_by_id.get(node_id)
+    if node is None or not isinstance(node, expected):
+        raise DMCompileError(f"{label}(...) targets unknown {expected.__name__.lower()} '{node_id}'.")
+    return node
+
+
+def _require_parent_room(parent_id: str, node_id: str, current_room_id: str) -> None:
+    if parent_id != current_room_id:
+        raise DMCompileError(
+            f"Quest interacts with '{node_id}' from room '{current_room_id}', but it lives in '{parent_id}'."
+        )
diff --git a/agents/master/sample.py b/agents/master/sample.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5b95e7ea163012a0c4177318f95bf2a2fa911e5
--- /dev/null
+++ b/agents/master/sample.py
@@ -0,0 +1,499 @@
+from __future__ import annotations
+
+import json
+import random
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+@dataclass(frozen=True)
+class WorldTheme:
+    title: str
+    answer: str
+    foyer_label: str
+    foyer_description: str
+    shrine_label: str
+    shrine_description: str
+    workshop_label: str
+    workshop_description: str
+    courtyard_label: str
+    courtyard_description: str
+    gallery_label: str
+    gallery_description: str
+    entry_chest_label: str
+    entry_chest_description: str
+    iron_door_label: str
+    iron_door_description: str
+    ash_mural_label: str
+    ash_mural_description: str
+    ash_mural_text: str
+    iron_chest_label: str
+    iron_chest_description: str
+    stone_well_label: str
+    stone_well_description: str
+    water_plaque_label: str
+    water_plaque_description: str
+    water_plaque_text: str
+    cartographer_label: str
+    cartographer_description: str
+    faded_letter_label: str
+    faded_letter_description: str
+    faded_letter_text: str
+    stone_guardian_label: str
+    stone_guardian_description: str
+    brass_key_label: str
+    brass_key_description: str
+    torch_label: str
+    torch_description: str
+    torn_map_left_label: str
+    torn_map_left_description: str
+    torn_map_right_label: str
+    torn_map_right_description: str
+    full_map_label: str
+    full_map_description: str
+    lens_label: str
+    lens_description: str
+    initial_clue_text: str
+    river_clue_text: str
+    waterwarden_clue_text: str
+
+
+_WORLD_THEMES: tuple[WorldTheme, ...] = (
+    WorldTheme(
+        title="The River Ward",
+        answer="mira",
+        foyer_label="Foyer",
+        foyer_description="A drafty entry hall with passages north, south, east, and west.",
+        shrine_label="Shrine",
+        shrine_description="An open shrine watched by a silent stone guardian.",
+        workshop_label="Workshop",
+        workshop_description="An ash-streaked workshop lit by a guttering lamp.",
+        courtyard_label="Courtyard",
+        courtyard_description="Rainwater gathers around a cracked stone well.",
+        gallery_label="Gallery",
+        gallery_description="Portraits of the wardens hang above a long dust-covered table.",
+        entry_chest_label="Entry Chest",
+        entry_chest_description="A squat travel chest sits beside the door.",
+        iron_door_label="Iron Door",
+        iron_door_description="A blackened iron door seals the workshop.",
+        ash_mural_label="Ash Mural",
+        ash_mural_description="An ash-dark mural is impossible to make out with the naked eye.",
+        ash_mural_text="The mural preserves one line: the betrayer's name begins with M.",
+        iron_chest_label="Iron Chest",
+        iron_chest_description="A soot-stained iron chest is tucked under a bench.",
+        stone_well_label="Stone Well",
+        stone_well_description="Etchings circle the well's rim, but they only align from the proper vantage.",
+        water_plaque_label="Water Plaque",
+        water_plaque_description="A bronze plaque slides out from the well masonry.",
+        water_plaque_text="The betrayer lived closest to the river gate.",
+        cartographer_label="Cartographer",
+        cartographer_description="The cartographer studies the walls and waits for a completed survey.",
+        faded_letter_label="Faded Letter",
+        faded_letter_description="A faded letter is still too blurred to decipher.",
+        faded_letter_text="Of the wardens, only Mira kept quarters beside the water.",
+        stone_guardian_label="Stone Guardian",
+        stone_guardian_description="The guardian asks for the betrayer's name once you are ready.",
+        brass_key_label="Brass Key",
+        brass_key_description="A brass key with soot in its teeth.",
+        torch_label="Torch",
+        torch_description="A pitch torch with a steady flame.",
+        torn_map_left_label="Torn Map Left",
+        torn_map_left_description="The left half of a survey map.",
+        torn_map_right_label="Torn Map Right",
+        torn_map_right_description="The right half of a survey map.",
+        full_map_label="Full Map",
+        full_map_description="A restored map of the ward.",
+        lens_label="Lens",
+        lens_description="A polished lens in a brass frame.",
+        initial_clue_text="The betrayer's name begins with M.",
+        river_clue_text="The betrayer lived closest to the river gate.",
+        waterwarden_clue_text="Of the wardens, only Mira kept quarters beside the water.",
+    ),
+    WorldTheme(
+        title="The Ember Vault",
+        answer="vesna",
+        foyer_label="Receiving Hall",
+        foyer_description="A warm stone hall lined with soot and copper hooks.",
+        shrine_label="Crucible Shrine",
+        shrine_description="A brass sentinel stands before a furnace-bright altar.",
+        workshop_label="Forge Annex",
+        workshop_description="Bellows creak above benches powdered with black ash.",
+        courtyard_label="Quench Yard",
+        courtyard_description="A cracked basin gathers rain beside the old quench line.",
+        gallery_label="Ledger Hall",
+        gallery_description="Burned account books rest beneath portraits of furnace wardens.",
+        entry_chest_label="Courier Trunk",
+        entry_chest_description="A courier trunk waits under a soot-marked peg rail.",
+        iron_door_label="Furnace Door",
+        iron_door_description="A scorched iron door blocks the annex.",
+        ash_mural_label="Cinder Frieze",
+        ash_mural_description="A smoke-dark frieze only sharpens under moving flame.",
+        ash_mural_text="A surviving line says the betrayer's name begins with V.",
+        iron_chest_label="Coal Locker",
+        iron_chest_description="A riveted locker is wedged beneath a slagged bench.",
+        stone_well_label="Quench Basin",
+        stone_well_description="Marks on the basin align only when seen with the full survey.",
+        water_plaque_label="Cooling Plaque",
+        water_plaque_description="A brass plate rises from a seam in the basin stone.",
+        water_plaque_text="The betrayer worked closest to the quench trench.",
+        cartographer_label="Quartermaster",
+        cartographer_description="The quartermaster trades only for a complete furnace survey.",
+        faded_letter_label="Scorched Ledger",
+        faded_letter_description="Heat has blurred the ink into copper-colored streaks.",
+        faded_letter_text="Only Vesna kept the cooling ledgers beside the trench.",
+        stone_guardian_label="Brass Sentinel",
+        stone_guardian_description="The sentinel requests the betrayer's name when the case is ready.",
+        brass_key_label="Copper Key",
+        brass_key_description="A copper key with furnace grit packed in the cuts.",
+        torch_label="Coal Torch",
+        torch_description="A coal torch that burns with a steady orange core.",
+        torn_map_left_label="Smelter Map Left",
+        torn_map_left_description="The left half of a furnace survey.",
+        torn_map_right_label="Smelter Map Right",
+        torn_map_right_description="The right half of a furnace survey.",
+        full_map_label="Furnace Survey",
+        full_map_description="A restored survey of the ember vault.",
+        lens_label="Gauge Lens",
+        lens_description="A thick gauge lens set in a brass ring.",
+        initial_clue_text="The betrayer's name begins with V.",
+        river_clue_text="The betrayer worked closest to the quench trench.",
+        waterwarden_clue_text="Only Vesna kept the cooling ledgers beside the trench.",
+    ),
+    WorldTheme(
+        title="The Astral Archive",
+        answer="selene",
+        foyer_label="Entry Rotunda",
+        foyer_description="A quiet rotunda opens toward stacked corridors and a dim observatory stair.",
+        shrine_label="Moon Chapel",
+        shrine_description="A silver warden stands beneath a ceiling of cold stars.",
+        workshop_label="Chart Room",
+        workshop_description="Tables of brass instruments glint in powdery moon dust.",
+        courtyard_label="Star Court",
+        courtyard_description="A dry fountain mirrors the constellations in chipped stone.",
+        gallery_label="Catalog Hall",
+        gallery_description="Glass cases hold the names of long-dead archivists.",
+        entry_chest_label="Porter's Case",
+        entry_chest_description="A leather case rests under the chart hooks.",
+        iron_door_label="Star Door",
+        iron_door_description="A ribbed iron door seals the chart room.",
+        ash_mural_label="Night Chart",
+        ash_mural_description="The chart is unreadable until lit from the proper angle.",
+        ash_mural_text="One surviving note says the betrayer's name begins with S.",
+        iron_chest_label="Index Chest",
+        iron_chest_description="A narrow chest sits below a shelf of cracked lenses.",
+        stone_well_label="Dry Fountain",
+        stone_well_description="Its star marks align only when the full survey is restored.",
+        water_plaque_label="Star Plaque",
+        water_plaque_description="A silver plaque slides free from the fountain rim.",
+        water_plaque_text="The betrayer slept nearest the eastern telescope.",
+        cartographer_label="Archivist",
+        cartographer_description="The archivist will trade for a complete celestial survey.",
+        faded_letter_label="Blurred Index",
+        faded_letter_description="The index script is too faint without magnification.",
+        faded_letter_text="Among the archivists, only Selene kept quarters by the east telescope.",
+        stone_guardian_label="Silver Warden",
+        stone_guardian_description="The warden will hear the accusation once you have evidence.",
+        brass_key_label="Star Key",
+        brass_key_description="A slim key engraved with a crescent notch.",
+        torch_label="Lamp Wand",
+        torch_description="A narrow lamp wand with a clean blue flame.",
+        torn_map_left_label="Celestial Map Left",
+        torn_map_left_description="The left half of a star survey.",
+        torn_map_right_label="Celestial Map Right",
+        torn_map_right_description="The right half of a star survey.",
+        full_map_label="Celestial Survey",
+        full_map_description="A restored survey of the astral archive.",
+        lens_label="Astrolabe Lens",
+        lens_description="A polished lens mounted in silver wire.",
+        initial_clue_text="The betrayer's name begins with S.",
+        river_clue_text="The betrayer slept nearest the eastern telescope.",
+        waterwarden_clue_text="Among the archivists, only Selene kept quarters by the east telescope.",
+    ),
+    WorldTheme(
+        title="The Glass Conservatory",
+        answer="liora",
+        foyer_label="Gate House",
+        foyer_description="A humid gate house opens onto vine-choked passages.",
+        shrine_label="Bloom Shrine",
+        shrine_description="A mossy guardian waits among chipped planters.",
+        workshop_label="Potting Room",
+        workshop_description="Clay dust and root knives cover the worktables.",
+        courtyard_label="Glass Court",
+        courtyard_description="A cracked basin sits beneath panes webbed with ivy.",
+        gallery_label="Seed Gallery",
+        gallery_description="Pressed flowers hang beside records of vanished caretakers.",
+        entry_chest_label="Garden Chest",
+        entry_chest_description="A cedar chest is tucked beside the rain cloaks.",
+        iron_door_label="Greenhouse Door",
+        iron_door_description="A warped iron door blocks the potting room.",
+        ash_mural_label="Vine Panel",
+        ash_mural_description="The panel's scratches only read clearly under a steady flame.",
+        ash_mural_text="A scratched line says the betrayer's name begins with L.",
+        iron_chest_label="Tool Locker",
+        iron_chest_description="A damp locker crouches under a potting bench.",
+        stone_well_label="Ivy Basin",
+        stone_well_description="The etched rings align only when the full garden survey is in hand.",
+        water_plaque_label="Root Plaque",
+        water_plaque_description="A greened plaque slides from the basin wall.",
+        water_plaque_text="The betrayer tended the beds nearest the rain cistern.",
+        cartographer_label="Head Gardener",
+        cartographer_description="The gardener will barter only for a complete bed map.",
+        faded_letter_label="Watered Note",
+        faded_letter_description="The note is blurred by old rain and fertilizer.",
+        faded_letter_text="Only Liora kept the cistern ledgers beside the rain beds.",
+        stone_guardian_label="Moss Guardian",
+        stone_guardian_description="The guardian listens when you are ready to name the betrayer.",
+        brass_key_label="Trellis Key",
+        brass_key_description="A greened key shaped like a curling vine.",
+        torch_label="Glass Lantern",
+        torch_description="A glass-sided lantern with a bright white flame.",
+        torn_map_left_label="Bed Map Left",
+        torn_map_left_description="The left half of a conservatory plan.",
+        torn_map_right_label="Bed Map Right",
+        torn_map_right_description="The right half of a conservatory plan.",
+        full_map_label="Bed Survey",
+        full_map_description="A restored survey of the conservatory beds.",
+        lens_label="Prism Lens",
+        lens_description="A prism lens wrapped in tarnished copper.",
+        initial_clue_text="The betrayer's name begins with L.",
+        river_clue_text="The betrayer tended the beds nearest the rain cistern.",
+        waterwarden_clue_text="Only Liora kept the cistern ledgers beside the rain beds.",
+    ),
+    WorldTheme(
+        title="The Salt Bastion",
+        answer="corin",
+        foyer_label="Watch Hall",
+        foyer_description="A salt-stung hall opens toward barracks, chapel, and the sea court.",
+        shrine_label="Tide Chapel",
+        shrine_description="A stone warden keeps watch over a shrine of ropes and shells.",
+        workshop_label="Signal Room",
+        workshop_description="Lantern hooks sway above benches dusted with salt ash.",
+        courtyard_label="Sea Court",
+        courtyard_description="A dry cistern sits beneath walls pitted by ocean wind.",
+        gallery_label="Roll Hall",
+        gallery_description="Roster boards hang beneath portraits of old coast captains.",
+        entry_chest_label="Harbor Chest",
+        entry_chest_description="A travel chest sits beside a rack of oilskins.",
+        iron_door_label="Beacon Door",
+        iron_door_description="A rusted iron door bars the signal room.",
+        ash_mural_label="Signal Board",
+        ash_mural_description="Salt haze hides the markings until a lamp is raised close.",
+        ash_mural_text="A surviving mark says the betrayer's name begins with C.",
+        iron_chest_label="Tar Locker",
+        iron_chest_description="A tar-black locker hides below a signal bench.",
+        stone_well_label="Dry Cistern",
+        stone_well_description="Its carved rings make sense only with the restored coast survey.",
+        water_plaque_label="Harbor Plaque",
+        water_plaque_description="A plaque rises from a crack in the cistern lip.",
+        water_plaque_text="The betrayer bunked nearest the harbor chain.",
+        cartographer_label="Harbor Clerk",
+        cartographer_description="The clerk trades only for a complete bastion survey.",
+        faded_letter_label="Salted Roll",
+        faded_letter_description="Salt has crusted over the roster names.",
+        faded_letter_text="Only Corin kept the harbor ledgers beside the chain gate.",
+        stone_guardian_label="Stone Warden",
+        stone_guardian_description="The warden asks for the betrayer's name when the proof is ready.",
+        brass_key_label="Anchor Key",
+        brass_key_description="A heavy key stamped with a worn anchor.",
+        torch_label="Signal Lamp",
+        torch_description="A shuttered lamp with a disciplined yellow flame.",
+        torn_map_left_label="Coast Map Left",
+        torn_map_left_description="The left half of a bastion survey.",
+        torn_map_right_label="Coast Map Right",
+        torn_map_right_description="The right half of a bastion survey.",
+        full_map_label="Coast Survey",
+        full_map_description="A restored survey of the salt bastion.",
+        lens_label="Captain's Lens",
+        lens_description="A salt-clear lens held in a bronze ring.",
+        initial_clue_text="The betrayer's name begins with C.",
+        river_clue_text="The betrayer bunked nearest the harbor chain.",
+        waterwarden_clue_text="Only Corin kept the harbor ledgers beside the chain gate.",
+    ),
+)
+
+
+def sample_world_definition(seed: int | None = None, difficulty_target: float = 1.5) -> dict[str, Any]:
+    theme = _select_theme(seed)
+    return _build_world(theme, difficulty_target=difficulty_target)
+
+
+def load_world(path: str) -> dict[str, Any]:
+    return json.loads(Path(path).read_text(encoding="utf-8"))
+
+
+def _select_theme(seed: int | None) -> WorldTheme:
+    if seed is None:
+        return _WORLD_THEMES[0]
+    rng = random.Random(seed)
+    return _WORLD_THEMES[rng.randrange(len(_WORLD_THEMES))]
+
+
+def _build_world(theme: WorldTheme, *, difficulty_target: float) -> dict[str, Any]:
+    return {
+        "meta": {
+            "title": theme.title,
+            "difficulty_target": difficulty_target,
+            "start_node_id": "foyer",
+            "win_condition": {
+                "type": "deduce",
+                "target_npc_id": "stone_guardian",
+                "answer_string": theme.answer,
+            },
+        },
+        "nodes": [
+            {"id": "foyer", "type": "location", "label": theme.foyer_label, "description": theme.foyer_description},
+            {"id": "shrine", "type": "location", "label": theme.shrine_label, "description": theme.shrine_description},
+            {"id": "workshop", "type": "location", "label": theme.workshop_label, "description": theme.workshop_description},
+            {"id": "courtyard", "type": "location", "label": theme.courtyard_label, "description": theme.courtyard_description},
+            {"id": "gallery", "type": "location", "label": theme.gallery_label, "description": theme.gallery_description},
+            {
+                "id": "entry_chest",
+                "type": "container",
+                "label": theme.entry_chest_label,
+                "description": theme.entry_chest_description,
+                "parent_id": "foyer",
+                "open": False,
+                "locked": False,
+                "lock_key_id": None,
+            },
+            {
+                "id": "iron_door",
+                "type": "door",
+                "label": theme.iron_door_label,
+                "description": theme.iron_door_description,
+                "open": False,
+                "locked": True,
+                "lock_key_id": "brass_key",
+            },
+            {
+                "id": "ash_mural",
+                "type": "readable",
+                "label": theme.ash_mural_label,
+                "description": theme.ash_mural_description,
+                "parent_id": "workshop",
+                "clue_id": "initial_clue",
+                "requires_item_id": "torch",
+                "consumes_item": False,
+                "text_content": theme.ash_mural_text,
+            },
+            {
+                "id": "iron_chest",
+                "type": "container",
+                "label": theme.iron_chest_label,
+                "description": theme.iron_chest_description,
+                "parent_id": "workshop",
+                "open": False,
+                "locked": False,
+                "lock_key_id": None,
+            },
+            {
+                "id": "stone_well",
+                "type": "fixture",
+                "label": theme.stone_well_label,
+                "description": theme.stone_well_description,
+                "parent_id": "courtyard",
+                "requires_item_id": "full_map",
+                "reveals_item_id": None,
+                "reveals_readable_id": "water_plaque",
+                "consumes_item": False,
+            },
+            {
+                "id": "water_plaque",
+                "type": "readable",
+                "label": theme.water_plaque_label,
+                "description": theme.water_plaque_description,
+                "parent_id": "courtyard",
+                "clue_id": "river_clue",
+                "requires_item_id": None,
+                "consumes_item": False,
+                "text_content": theme.water_plaque_text,
+            },
+            {
+                "id": "cartographer",
+                "type": "npc",
+                "label": theme.cartographer_label,
+                "description": theme.cartographer_description,
+                "parent_id": "gallery",
+                "requires_item_id": "full_map",
+                "gives_item_id": "lens",
+                "gives_clue_id": None,
+            },
+            {
+                "id": "faded_letter",
+                "type": "readable",
+                "label": theme.faded_letter_label,
+                "description": theme.faded_letter_description,
+                "parent_id": "gallery",
+                "clue_id": "waterwarden_clue",
+                "requires_item_id": "lens",
+                "consumes_item": False,
+                "text_content": theme.faded_letter_text,
+            },
+            {
+                "id": "stone_guardian",
+                "type": "npc",
+                "label": theme.stone_guardian_label,
+                "description": theme.stone_guardian_description,
+                "parent_id": "shrine",
+                "requires_item_id": None,
+                "gives_item_id": None,
+                "gives_clue_id": None,
+            },
+        ],
+        "edges": [
+            {"id": "foyer_north", "from_node_id": "foyer", "to_node_id": "shrine", "direction": "north", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "shrine_south", "from_node_id": "shrine", "to_node_id": "foyer", "direction": "south", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "foyer_east", "from_node_id": "foyer", "to_node_id": "workshop", "direction": "east", "type": "locked_passage", "required_item_id": "brass_key", "door_node_id": "iron_door"},
+            {"id": "workshop_west", "from_node_id": "workshop", "to_node_id": "foyer", "direction": "west", "type": "locked_passage", "required_item_id": "brass_key", "door_node_id": "iron_door"},
+            {"id": "foyer_west", "from_node_id": "foyer", "to_node_id": "courtyard", "direction": "west", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "courtyard_east", "from_node_id": "courtyard", "to_node_id": "foyer", "direction": "east", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "foyer_south", "from_node_id": "foyer", "to_node_id": "gallery", "direction": "south", "type": "passage", "required_item_id": None, "door_node_id": None},
+            {"id": "gallery_north", "from_node_id": "gallery", "to_node_id": "foyer", "direction": "north", "type": "passage", "required_item_id": None, "door_node_id": None},
+        ],
+        "items": [
+            {"id": "brass_key", "label": theme.brass_key_label, "description": theme.brass_key_description, "subtype": "key", "start_node_id": "entry_chest"},
+            {"id": "torch", "label": theme.torch_label, "description": theme.torch_description, "subtype": "puzzle", "start_node_id": "workshop"},
+            {"id": "torn_map_left", "label": theme.torn_map_left_label, "description": theme.torn_map_left_description, "subtype": "puzzle", "start_node_id": "iron_chest"},
+            {"id": "torn_map_right", "label": theme.torn_map_right_label, "description": theme.torn_map_right_description, "subtype": "puzzle", "start_node_id": "courtyard"},
+            {"id": "full_map", "label": theme.full_map_label, "description": theme.full_map_description, "subtype": "puzzle", "start_node_id": None},
+            {"id": "lens", "label": theme.lens_label, "description": theme.lens_description, "subtype": "puzzle", "start_node_id": None},
+        ],
+        "clues": [
+            {"id": "initial_clue", "text": theme.initial_clue_text},
+            {"id": "river_clue", "text": theme.river_clue_text},
+            {"id": "waterwarden_clue", "text": theme.waterwarden_clue_text},
+        ],
+        "recipes": [
+            {
+                "id": "restore_map",
+                "input_item_ids": ["torn_map_left", "torn_map_right"],
+                "output_item_id": "full_map",
+            }
+        ],
+        "quest_chain": [
+            {"step_id": "open_entry_chest", "description": f"Open the {theme.entry_chest_label.lower()}.", "requires_step_ids": [], "action": "open(entry_chest)"},
+            {"step_id": "take_brass_key", "description": f"Take the {theme.brass_key_label.lower()}.", "requires_step_ids": ["open_entry_chest"], "action": "take(brass_key,entry_chest)"},
+            {"step_id": "unlock_workshop", "description": f"Unlock the {theme.iron_door_label.lower()}.", "requires_step_ids": ["take_brass_key"], "action": "unlock(iron_door,brass_key)"},
+            {"step_id": "open_workshop", "description": f"Open the {theme.iron_door_label.lower()}.", "requires_step_ids": ["unlock_workshop"], "action": "open(iron_door)"},
+            {"step_id": "go_workshop", "description": f"Enter the {theme.workshop_label.lower()}.", "requires_step_ids": ["open_workshop"], "action": "go(workshop)"},
+            {"step_id": "take_torch", "description": f"Take the {theme.torch_label.lower()}.", "requires_step_ids": ["go_workshop"], "action": "take(torch,workshop)"},
+            {"step_id": "use_torch_on_mural", "description": f"Use the {theme.torch_label.lower()} on the {theme.ash_mural_label.lower()}.", "requires_step_ids": ["take_torch"], "action": "use(torch,ash_mural)"},
+            {"step_id": "open_iron_chest", "description": f"Open the {theme.iron_chest_label.lower()}.", "requires_step_ids": ["go_workshop"], "action": "open(iron_chest)"},
+            {"step_id": "take_left_map", "description": f"Take the {theme.torn_map_left_label.lower()}.", "requires_step_ids": ["open_iron_chest"], "action": "take(torn_map_left,iron_chest)"},
+            {"step_id": "return_foyer", "description": f"Return to the {theme.foyer_label.lower()}.", "requires_step_ids": ["take_left_map"], "action": "go(foyer)"},
+            {"step_id": "go_courtyard", "description": f"Head to the {theme.courtyard_label.lower()}.", "requires_step_ids": ["return_foyer"], "action": "go(courtyard)"},
+            {"step_id": "take_right_map", "description": f"Take the {theme.torn_map_right_label.lower()}.", "requires_step_ids": ["go_courtyard"], "action": "take(torn_map_right,courtyard)"},
+            {"step_id": "combine_map", "description": f"Restore the {theme.full_map_label.lower()}.", "requires_step_ids": ["take_right_map"], "action": "combine(torn_map_left,torn_map_right)"},
+            {"step_id": "use_map_on_well", "description": f"Use the {theme.full_map_label.lower()} on the {theme.stone_well_label.lower()}.", "requires_step_ids": ["combine_map"], "action": "use(full_map,stone_well)"},
+            {"step_id": "read_plaque", "description": f"Read the {theme.water_plaque_label.lower()}.", "requires_step_ids": ["use_map_on_well"], "action": "read(water_plaque)"},
+            {"step_id": "go_foyer_again", "description": f"Go back to the {theme.foyer_label.lower()}.", "requires_step_ids": ["read_plaque"], "action": "go(foyer)"},
+            {"step_id": "go_gallery", "description": f"Head to the {theme.gallery_label.lower()}.", "requires_step_ids": ["go_foyer_again"], "action": "go(gallery)"},
+            {"step_id": "give_map", "description": f"Give the map to the {theme.cartographer_label.lower()}.", "requires_step_ids": ["go_gallery"], "action": "give(full_map,cartographer)"},
+            {"step_id": "use_lens_on_letter", "description": f"Use the {theme.lens_label.lower()} on the {theme.faded_letter_label.lower()}.", "requires_step_ids": ["give_map"], "action": "use(lens,faded_letter)"},
+            {"step_id": "return_foyer_final", "description": f"Return to the {theme.foyer_label.lower()} again.", "requires_step_ids": ["use_lens_on_letter"], "action": "go(foyer)"},
+            {"step_id": "go_shrine", "description": f"Go to the {theme.shrine_label.lower()}.", "requires_step_ids": ["return_foyer_final"], "action": "go(shrine)"},
+            {"step_id": "talk_guardian", "description": f"Speak to the {theme.stone_guardian_label.lower()}.", "requires_step_ids": ["go_shrine"], "action": "talk(stone_guardian)"},
+            {"step_id": "submit_answer", "description": "Submit the betrayer's name.", "requires_step_ids": ["talk_guardian"], "action": f'submit("{theme.answer}")'},
+        ],
+    }
diff --git a/agents/master/schema.py b/agents/master/schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb18d4eeb316c814d49859fce8927d53e9be8d8f
--- /dev/null
+++ b/agents/master/schema.py
@@ -0,0 +1,316 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Annotated, Literal, TypeAlias
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from agents.shared.openenv_compat import Action, Observation, State
+
+
+class StrictModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+
+class WorldMeta(StrictModel):
+    title: str
+    difficulty_target: float
+    start_node_id: str
+    win_condition: "WinCondition"
+
+
+class WinCondition(StrictModel):
+    type: Literal["deduce"]
+    target_npc_id: str
+    answer_string: str
+
+
+class BaseNode(StrictModel):
+    id: str
+    label: str
+    description: str
+
+
+class LocationNode(BaseNode):
+    type: Literal["location"]
+
+
+class JunctionNode(BaseNode):
+    type: Literal["junction"]
+
+
+class ContainerNode(BaseNode):
+    type: Literal["container"]
+    parent_id: str
+    open: bool = False
+    locked: bool = False
+    lock_key_id: str | None = None
+
+
+class DoorNode(BaseNode):
+    type: Literal["door"]
+    open: bool = False
+    locked: bool = False
+    lock_key_id: str | None = None
+
+
+class ReadableNode(BaseNode):
+    type: Literal["readable"]
+    parent_id: str
+    clue_id: str
+    requires_item_id: str | None = None
+    consumes_item: bool = False
+    text_content: str
+
+
+class FixtureNode(BaseNode):
+    type: Literal["fixture"]
+    parent_id: str
+    requires_item_id: str
+    reveals_item_id: str | None = None
+    reveals_readable_id: str | None = None
+    consumes_item: bool = False
+
+
+class NpcNode(BaseNode):
+    type: Literal["npc"]
+    parent_id: str
+    requires_item_id: str | None = None
+    gives_item_id: str | None = None
+    gives_clue_id: str | None = None
+
+
+WorldNode: TypeAlias = Annotated[
+    LocationNode | JunctionNode | ContainerNode | DoorNode | ReadableNode | FixtureNode | NpcNode,
+    Field(discriminator="type"),
+]
+
+
+class Edge(StrictModel):
+    id: str
+    from_node_id: str
+    to_node_id: str
+    direction: Literal["north", "south", "east", "west", "up", "down", "in", "out"]
+    type: Literal["passage", "locked_passage"]
+    required_item_id: str | None = None
+    door_node_id: str | None = None
+
+
+class Item(StrictModel):
+    id: str
+    label: str
+    description: str
+    subtype: Literal["key", "puzzle"]
+    start_node_id: str | None = None
+
+
+class Clue(StrictModel):
+    id: str
+    text: str
+
+
+class Recipe(StrictModel):
+    id: str
+    input_item_ids: list[str] = Field(min_length=2, max_length=2)
+    output_item_id: str
+
+
+class QuestStep(StrictModel):
+    step_id: str
+    description: str
+    requires_step_ids: list[str] = Field(default_factory=list)
+    action: str
+
+
+class WorldDefinition(StrictModel):
+    meta: WorldMeta
+    nodes: list[WorldNode]
+    edges: list[Edge]
+    items: list[Item]
+    clues: list[Clue]
+    recipes: list[Recipe] = Field(default_factory=list)
+    quest_chain: list[QuestStep]
+
+
+class DMAction(Action):
+    world_definition: WorldDefinition
+
+
+class Turn(StrictModel):
+    step: int
+    player_action: str
+    textworld_command: str
+    observation: str
+    game_state_delta: dict[str, object]
+
+
+class DMFeedback(StrictModel):
+    unreachable_nodes: list[str]
+    unused_items: list[str]
+    clues_missed: list[str]
+    mean_steps_per_room: float
+    invalid_command_count: int = 0
+    wrong_submit_count: int = 0
+
+
+class DMRewardBreakdown(StrictModel):
+    reward_mode: Literal["gaussian_target_ratio", "compile_failure_penalty"] = "gaussian_target_ratio"
+    player_won: bool
+    raw_ratio: float | None = None
+    clamped_ratio: float | None = None
+    target_ratio: float
+    target_ratio_delta: float | None = None
+    efficiency_score: float | None = None
+    quality_score: float = 0.0
+    reward: float
+
+
+class DMObservation(Observation):
+    episode_transcript: list[Turn] = Field(default_factory=list)
+    player_won: bool | None = None
+    steps_taken: int | None = None
+    min_steps: int | None = None
+    ratio: float | None = None
+    compile_error: str | None = None
+    feedback: DMFeedback | None = None
+    reward_breakdown: DMRewardBreakdown | None = None
+    target_ratio_used: float | None = None
+
+
+class DMState(State):
+    current_world: WorldDefinition | None = None
+    compile_status: Literal["valid", "invalid", "pending"] = "pending"
+    episode_status: Literal["running", "complete", "failed"] = "running"
+    cumulative_success_rate: float = 0.0
+    target_ratio: float = 0.0
+    difficulty_hint: float | None = None
+
+
+@dataclass(frozen=True)
+class GoAction:
+    target_node_id: str
+
+
+@dataclass(frozen=True)
+class OpenAction:
+    target_node_id: str
+
+
+@dataclass(frozen=True)
+class UnlockAction:
+    door_id: str
+    key_id: str
+
+
+@dataclass(frozen=True)
+class TakeAction:
+    item_id: str
+    source_node_id: str
+
+
+@dataclass(frozen=True)
+class ReadAction:
+    target_node_id: str
+
+
+@dataclass(frozen=True)
+class UseAction:
+    item_id: str
+    target_node_id: str
+
+
+@dataclass(frozen=True)
+class CombineAction:
+    item_a_id: str
+    item_b_id: str
+
+
+@dataclass(frozen=True)
+class GiveAction:
+    item_id: str
+    npc_id: str
+
+
+@dataclass(frozen=True)
+class TalkAction:
+    target_node_id: str
+
+
+@dataclass(frozen=True)
+class SubmitAction:
+    answer_text: str
+
+
+QuestAction = (
+    GoAction
+    | OpenAction
+    | UnlockAction
+    | TakeAction
+    | ReadAction
+    | UseAction
+    | CombineAction
+    | GiveAction
+    | TalkAction
+    | SubmitAction
+)
+
+
+@dataclass(frozen=True)
+class NpcTrade:
+    required_item_id: str
+    gives_item_id: str | None
+    gives_clue_id: str | None
+
+
+@dataclass(frozen=True)
+class UseEffect:
+    required_item_id: str
+    clue_id: str | None = None
+    reveals_item_id: str | None = None
+    reveals_readable_id: str | None = None
+    consumes_item: bool = False
+
+
+@dataclass
+class CompiledWorld:
+    episode_id: str
+    world: WorldDefinition
+    artifacts_dir: Path
+    game_file: Path
+    walkthrough_commands: list[str]
+    solver_policy: list[str]
+    correct_answer_normalized: str
+    correct_submit_command: str
+    guardian_id: str
+    guardian_room_id: str
+    room_name_to_id: dict[str, str]
+    node_command_names: dict[str, str]
+    item_command_names: dict[str, str]
+    item_start_locations: dict[str, str | None]
+    clue_text_by_id: dict[str, str]
+    readable_clue_by_id: dict[str, str]
+    npc_trade_map: dict[str, NpcTrade]
+    recipe_map: dict[frozenset[str], str]
+    use_effects: dict[str, UseEffect]
+    produced_item_ids: set[str]
+    room_edges_by_target: dict[tuple[str, str], Edge]
+    room_edges_by_direction: dict[tuple[str, str], Edge]
+    door_rooms: dict[str, frozenset[str]]
+
+
+@dataclass
+class SimulationState:
+    current_room_id: str
+    inventory: set[str] = field(default_factory=set)
+    item_locations: dict[str, str | None] = field(default_factory=dict)
+    open_nodes: set[str] = field(default_factory=set)
+    locked_nodes: set[str] = field(default_factory=set)
+    discovered_clues: set[str] = field(default_factory=set)
+    consulted_npcs: set[str] = field(default_factory=set)
+    satisfied_npcs: set[str] = field(default_factory=set)
+    revealed_readables: set[str] = field(default_factory=set)
+    prepared_readables: set[str] = field(default_factory=set)
+    used_fixtures: set[str] = field(default_factory=set)
+    produced_items: set[str] = field(default_factory=set)
+    visited_nodes: set[str] = field(default_factory=set)
diff --git a/agents/master/server.py b/agents/master/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b50d7e9ad375800458dbe97baac0cc2a07b8ba1
--- /dev/null
+++ b/agents/master/server.py
@@ -0,0 +1,370 @@
+from __future__ import annotations
+
+import json
+import mimetypes
+import threading
+from http import HTTPStatus
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+from .base import DMCompileError, DMInterfaceError
+from .build import WorldCompiler
+from .interface import GeminiInterfaceAdapter, SimpleInterfaceAdapter
+from .schema import CompiledWorld, WorldDefinition
+from .session import EpisodeSession
+from .snapshots import (
+    DEFAULT_LIVE_DIR,
+    STATE_FILENAME,
+    WORLD_FILENAME,
+    LiveCurrentRoom,
+    LiveMetrics,
+    LiveRuntime,
+    LiveStateSnapshot,
+    load_live_payload,
+)
+
+
+WEB_DIST_DIR = Path(__file__).resolve().parents[2] / "www" / "dist"
+
+
+class GameSessionManager:
+    """Thread-safe container for an interactive play session."""
+
+    def __init__(self, live_dir: Path, use_gemini: bool = False) -> None:
+        self._lock = threading.Lock()
+        self._session: EpisodeSession | None = None
+        self._compiled: CompiledWorld | None = None
+        self._compiler = WorldCompiler()
+        self._live_dir = live_dir
+        self._use_gemini = use_gemini
+        self._clear_stale_files()
+
+    def _clear_stale_files(self) -> None:
+        """Remove leftover state/world JSON from a previous session."""
+        for fname in (STATE_FILENAME, WORLD_FILENAME):
+            path = self._live_dir / fname
+            path.unlink(missing_ok=True)
+
+    def start(self, world_input: WorldDefinition | dict[str, Any]) -> dict[str, Any]:
+        with self._lock:
+            if self._session is not None:
+                self._session.close()
+            compiled = self._compiler.compile(world_input)
+            adapter = self._make_adapter()
+            session = EpisodeSession(compiled, interface_adapter=adapter)
+            self._compiled = compiled
+            self._session = session
+            self._write_world(compiled.world)
+            self._write_state("running")
+            return {
+                "ok": True,
+                "episode_id": compiled.episode_id,
+                "observation": session.current_feedback(),
+                "available_commands": session.available_commands(),
+                "room": self._room_info(session),
+            }
+
+    def reset(self) -> dict[str, Any]:
+        with self._lock:
+            if self._session is not None:
+                self._session.close()
+            self._session = None
+            self._compiled = None
+            self._clear_stale_files()
+            return {"ok": True}
+
+    def command(self, raw_command: str) -> dict[str, Any]:
+        with self._lock:
+            session = self._session
+            if session is None:
+                return {"ok": False, "error": "No active session. POST /api/start first."}
+            if session.done:
+                return {
+                    "ok": False,
+                    "error": "Episode is complete.",
+                    "done": True,
+                    "player_won": session.player_won,
+                }
+            try:
+                turn = session.step(raw_command)
+            except (DMInterfaceError, RuntimeError) as exc:
+                return {"ok": False, "error": str(exc)}
+
+            status = "complete" if session.done and session.player_won else (
+                "failed" if session.done else "running"
+            )
+            self._write_state(status)
+            return {
+                "ok": True,
+                "step": turn.step,
+                "command": turn.textworld_command,
+                "observation": turn.observation,
+                "done": session.done,
+                "player_won": session.player_won,
+                "available_commands": [] if session.done else session.available_commands(),
+                "room": self._room_info(session),
+            }
+
+    def get_state_payload(self) -> dict[str, Any] | None:
+        with self._lock:
+            session = self._session
+            compiled = self._compiled
+            if session is None or compiled is None:
+                return None
+            return self._snapshot(session, compiled).model_dump()
+
+    def _make_adapter(self) -> SimpleInterfaceAdapter | GeminiInterfaceAdapter:
+        if self._use_gemini:
+            try:
+                return GeminiInterfaceAdapter(narrate_observations=True)
+            except DMInterfaceError:
+                pass
+        return SimpleInterfaceAdapter()
+
+    def _write_world(self, world: WorldDefinition) -> None:
+        self._write_json(WORLD_FILENAME, world.model_dump_json(indent=2))
+
+    def _write_state(self, status: str) -> None:
+        session = self._session
+        compiled = self._compiled
+        if session is None or compiled is None:
+            return
+        snapshot = self._snapshot(session, compiled, status=status)
+        self._write_json(STATE_FILENAME, snapshot.model_dump_json(indent=2))
+
+    def _snapshot(
+        self,
+        session: EpisodeSession,
+        compiled: CompiledWorld,
+        status: str | None = None,
+    ) -> LiveStateSnapshot:
+        from datetime import datetime, timezone
+
+        room_ids = {
+            node.id for node in compiled.world.nodes if node.type in {"location", "junction"}
+        }
+        commands = [] if session.done else session.available_commands()
+
+        if status is None:
+            if session.done:
+                status = "complete" if session.player_won else "failed"
+            else:
+                status = "running"
+
+        return LiveStateSnapshot(
+            episode_id=compiled.episode_id,
+            status=status,
+            updated_at=datetime.now(timezone.utc).isoformat(),
+            title=compiled.world.meta.title,
+            transcript=list(session.transcript),
+            metrics=LiveMetrics(
+                steps_taken=session.steps_taken,
+                min_steps=len(compiled.solver_policy),
+                ratio=session.steps_taken / len(compiled.solver_policy) if compiled.solver_policy else None,
+                player_won=session.player_won if session.done else None,
+            ),
+            runtime=LiveRuntime(
+                current_room_id=session.current_room_id,
+                inventory_item_ids=sorted(session.inventory),
+                discovered_clue_ids=sorted(session.discovered_clues),
+                traded_npc_ids=sorted(session.traded_npcs),
+                visited_room_ids=sorted(room_ids & session.visited_nodes),
+                available_commands=commands,
+                invalid_command_count=session.invalid_command_count,
+                wrong_submit_count=session.wrong_submit_count,
+                open_node_ids=sorted(session.open_nodes),
+                locked_node_ids=sorted(session.locked_nodes),
+            ),
+            current_room=self._current_room_snapshot(session),
+        )
+
+    @staticmethod
+    def _current_room_snapshot(session: EpisodeSession) -> LiveCurrentRoom | None:
+        node_by_id = {node.id: node for node in session.compiled.world.nodes}
+        room = node_by_id.get(session.current_room_id)
+        if room is None:
+            return None
+        visible_nodes = [
+            node.id
+            for node in session.compiled.world.nodes
+            if getattr(node, "parent_id", None) == session.current_room_id
+            and (node.type != "readable" or node.id in session.revealed_readables)
+        ]
+        visible_nodes.extend(
+            sorted(
+                door_id
+                for door_id, rooms in session.compiled.door_rooms.items()
+                if session.current_room_id in rooms
+            )
+        )
+        visible_items = sorted(
+            item_id
+            for item_id, location in session.item_locations.items()
+            if location == session.current_room_id
+        )
+        return LiveCurrentRoom(
+            id=room.id,
+            label=room.label,
+            description=room.description,
+            visible_node_ids=sorted(set(visible_nodes)),
+            visible_item_ids=visible_items,
+        )
+
+    @staticmethod
+    def _room_info(session: EpisodeSession) -> dict[str, Any]:
+        node_by_id = {node.id: node for node in session.compiled.world.nodes}
+        room = node_by_id.get(session.current_room_id)
+        return {
+            "id": session.current_room_id,
+            "label": room.label if room else session.current_room_id,
+            "description": room.description if room else "",
+        }
+
+    def _write_json(self, filename: str, payload: str) -> None:
+        self._live_dir.mkdir(parents=True, exist_ok=True)
+        path = self._live_dir / filename
+        tmp_path = path.with_suffix(path.suffix + ".tmp")
+        tmp_path.write_text(payload + "\n", encoding="utf-8")
+        tmp_path.replace(path)
+
+
+def create_server(
+    *,
+    live_dir: Path | None = None,
+    host: str = "127.0.0.1",
+    port: int = 8000,
+    use_gemini: bool = False,
+) -> ThreadingHTTPServer:
+    resolved_live_dir = live_dir or DEFAULT_LIVE_DIR
+    game = GameSessionManager(resolved_live_dir, use_gemini=use_gemini)
+
+    class LiveViewerHandler(BaseHTTPRequestHandler):
+        server_version = "AgentsMasterLive/1.0"
+
+        def do_GET(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            if path == "/api/state":
+                self._serve_live_file(STATE_FILENAME)
+                return
+            if path == "/api/world":
+                self._serve_live_file(WORLD_FILENAME)
+                return
+            if path == "/":
+                self._serve_index()
+                return
+            if path == "/favicon.ico":
+                self.send_response(HTTPStatus.NO_CONTENT)
+                self.end_headers()
+                return
+            if self._serve_web_file(path):
+                return
+            if WEB_DIST_DIR.exists() and Path(path).suffix == "":
+                self._serve_index()
+                return
+            self._respond(HTTPStatus.NOT_FOUND, b"Not found\n", "text/plain; charset=utf-8")
+
+        def do_POST(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            body = self._read_body()
+
+            if path == "/api/reset":
+                result = game.reset()
+                self._json_respond(HTTPStatus.OK, result)
+                return
+
+            if path == "/api/start":
+                try:
+                    world_input = json.loads(body) if body else None
+                    if world_input is None:
+                        self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": "Missing JSON body."})
+                        return
+                    result = game.start(world_input)
+                    self._json_respond(HTTPStatus.OK, result)
+                except (DMCompileError, ValueError, json.JSONDecodeError) as exc:
+                    self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)})
+                return
+
+            if path == "/api/command":
+                try:
+                    data = json.loads(body) if body else {}
+                    command = data.get("command", "").strip()
+                    if not command:
+                        self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": "Missing 'command' field."})
+                        return
+                    result = game.command(command)
+                    self._json_respond(HTTPStatus.OK, result)
+                except json.JSONDecodeError as exc:
+                    self._json_respond(HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)})
+                return
+
+            self._respond(HTTPStatus.NOT_FOUND, b"Not found\n", "text/plain; charset=utf-8")
+
+        def log_message(self, format: str, *args: object) -> None:  # noqa: A003
+            del format, args
+
+        def _read_body(self) -> bytes:
+            length = int(self.headers.get("Content-Length", 0))
+            return self.rfile.read(length) if length > 0 else b""
+
+        def _serve_index(self) -> None:
+            index_path = WEB_DIST_DIR / "index.html"
+            if index_path.is_file():
+                self._respond(HTTPStatus.OK, index_path.read_bytes(), "text/html; charset=utf-8")
+            else:
+                from .templates import render_index
+                self._respond(HTTPStatus.OK, render_index().encode("utf-8"), "text/html; charset=utf-8")
+
+        def _serve_live_file(self, filename: str) -> None:
+            payload = load_live_payload(resolved_live_dir, filename)
+            if payload is None:
+                self.send_response(HTTPStatus.NO_CONTENT)
+                self.send_header("Cache-Control", "no-store")
+                self.end_headers()
+                return
+            self._respond(
+                HTTPStatus.OK, payload, "application/json; charset=utf-8",
+                extra_headers={"Cache-Control": "no-store"},
+            )
+
+        def _serve_web_file(self, path: str) -> bool:
+            candidate = (WEB_DIST_DIR / path.lstrip("/")).resolve()
+            try:
+                candidate.relative_to(WEB_DIST_DIR.resolve())
+            except ValueError:
+                return False
+            if not candidate.is_file():
+                return False
+            content_type = mimetypes.guess_type(candidate.name)[0] or "application/octet-stream"
+            self._respond(HTTPStatus.OK, candidate.read_bytes(), content_type)
+            return True
+
+        def _json_respond(self, status: HTTPStatus, data: dict[str, Any]) -> None:
+            payload = json.dumps(data).encode("utf-8")
+            self._respond(status, payload, "application/json; charset=utf-8",
+                          extra_headers={"Cache-Control": "no-store"})
+
+        def _respond(
+            self, status: HTTPStatus, payload: bytes, content_type: str,
+            *, extra_headers: dict[str, str] | None = None,
+        ) -> None:
+            self.send_response(status)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(payload)))
+            if extra_headers:
+                for key, value in extra_headers.items():
+                    self.send_header(key, value)
+            self.end_headers()
+            self.wfile.write(payload)
+
+    return ThreadingHTTPServer((host, port), LiveViewerHandler)
+
+
+def run_server(*, port: int = 8000, live_dir: Path | None = None, host: str = "127.0.0.1", use_gemini: bool = False) -> None:
+    server = create_server(live_dir=live_dir, host=host, port=port, use_gemini=use_gemini)
+    print(f"Serving live viewer on http://{host}:{server.server_address[1]}")
+    try:
+        server.serve_forever()
+    finally:
+        server.server_close()
diff --git a/agents/master/session.py b/agents/master/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc4c06f970318109ef74f9881e41d98f0c4ee26c
--- /dev/null
+++ b/agents/master/session.py
@@ -0,0 +1,484 @@
+from __future__ import annotations
+
+import json
+import textwrap
+from collections import deque
+from typing import TYPE_CHECKING, Any, Callable
+
+import textworld
+from textworld.core import EnvInfos, GameState
+
+from .base import INVENTORY_ID, normalize_answer_text, suppress_unsupported_game_warning
+from .interface import InterfaceAdapter, SimpleInterfaceAdapter
+from .schema import CompiledWorld, Turn
+
+if TYPE_CHECKING:
+    TurnListener = Callable[["EpisodeSession", Turn], None]
+
+
+class EpisodeSession:
+    def __init__(
+        self,
+        compiled: CompiledWorld,
+        interface_adapter: InterfaceAdapter = SimpleInterfaceAdapter(),
+        turn_listener: "TurnListener | None" = None,
+    ) -> None:
+        if interface_adapter is None:
+            raise ValueError("interface_adapter must not be None.")
+        self.compiled = compiled
+        self.interface_adapter = interface_adapter
+        self.turn_listener = turn_listener
+        with suppress_unsupported_game_warning():
+            self.env = textworld.start(str(compiled.game_file), request_infos=self._requested_infos())
+            self.state = self.env.reset()
+        self._closed = False
+        self.done = False
+        self.player_won = False
+        self.steps_taken = 0
+        self.invalid_command_count = 0
+        self.wrong_submit_count = 0
+        self.used_items: set[str] = set()
+        self.discovered_clues: set[str] = set()
+        self.consulted_npcs: set[str] = set()
+        self.traded_npcs: set[str] = set()
+        self.prepared_readables: set[str] = set()
+        self.completed_recipe_outputs: set[str] = set()
+        self.completed_use_targets: set[str] = set()
+        self.unlocked_doors: set[str] = set()
+        self.consulted_guardian = False
+        self.hidden_readables = {
+            effect.reveals_readable_id for effect in compiled.use_effects.values() if effect.reveals_readable_id
+        }
+        self.revealed_readables = {
+            node.id for node in compiled.world.nodes if node.type == "readable" and node.id not in self.hidden_readables
+        }
+        self.item_locations = dict(compiled.item_start_locations)
+        self.inventory = {item_id for item_id, location in self.item_locations.items() if location == INVENTORY_ID}
+        self.open_nodes = {
+            node.id for node in compiled.world.nodes if node.type in {"container", "door"} and getattr(node, "open", False)
+        }
+        self.locked_nodes = {
+            node.id for node in compiled.world.nodes if node.type in {"container", "door"} and getattr(node, "locked", False)
+        }
+        self.current_room_id = compiled.world.meta.start_node_id
+        self.visited_nodes: set[str] = {self.current_room_id}
+        self.transcript: list[Turn] = []
+        self.recent_normalized_commands: deque[str] = deque(maxlen=3)
+        self._node_by_id = {node.id: node for node in compiled.world.nodes}
+        self._label_by_id = {node.id: node.label for node in compiled.world.nodes}
+        self._label_by_id.update({item.id: item.label for item in compiled.world.items})
+        self._item_name_to_id = {name: item_id for item_id, name in compiled.item_command_names.items()}
+        self.last_state_fingerprint = self.state_fingerprint()
+
+    @staticmethod
+    def _requested_infos() -> EnvInfos:
+        return EnvInfos(
+            feedback=True,
+            description=True,
+            inventory=True,
+            location=True,
+            facts=False,
+            won=True,
+            lost=True,
+            score=True,
+            moves=True,
+            last_action=True,
+            last_command=True,
+            admissible_commands=True,
+            policy_commands=True,
+            extras=["walkthrough"],
+        )
+
+    def available_commands(self) -> list[str]:
+        commands = set(self.state.admissible_commands or [])
+        commands.update(self._custom_commands())
+        return sorted(commands)
+
+    def current_feedback(self) -> str:
+        return self.interface_adapter.render_observation(self.state.feedback or "", self.state, self)
+
+    def state_fingerprint(self) -> str:
+        return json.dumps(
+            {
+                "room": self.current_room_id,
+                "inventory": sorted(self.inventory),
+                "clues": sorted(self.discovered_clues),
+                "opened": sorted(self.open_nodes),
+                "traded": sorted(self.traded_npcs),
+                "use_targets": sorted(self.completed_use_targets),
+                "recipe_outputs": sorted(self.completed_recipe_outputs),
+            },
+            sort_keys=True,
+        )
+
+    def node_id_for_command_name(self, command_name: str, node_types: set[str] | None = None) -> str | None:
+        for node in self.compiled.world.nodes:
+            safe_name = self.compiled.node_command_names.get(node.id)
+            if safe_name != command_name:
+                continue
+            if node_types is None or node.type in node_types:
+                return node.id
+        return None
+
+    def step(self, raw_command: str) -> Turn:
+        if self.done:
+            raise RuntimeError("Episode is already complete.")
+
+        lowered = self.interface_adapter.translate_command(raw_command, self).lower().strip()
+        if turn := self._handle_submit(raw_command, lowered):
+            return turn
+        if self._is_wrapper_command(lowered):
+            return self._step_wrapper(raw_command, lowered)
+        return self._step_env(raw_command, lowered)
+
+    def _handle_submit(self, raw_command: str, lowered: str) -> Turn | None:
+        if not lowered.startswith("submit "):
+            return None
+        answer = normalize_answer_text(lowered[7:])
+        if self.current_room_id != self.compiled.guardian_room_id or self.compiled.guardian_id not in self.consulted_npcs:
+            return self._wrapper_only_turn(
+                raw_command,
+                lowered,
+                "The guardian has not asked for your answer yet.",
+                {"wrapper": "submit_rejected", "reason": "guardian_not_ready"},
+            )
+        required_clues = set(self.compiled.clue_text_by_id)
+        if self.discovered_clues != required_clues:
+            return self._wrapper_only_turn(
+                raw_command,
+                lowered,
+                "The guardian waits. You have not gathered enough evidence yet.",
+                {
+                    "wrapper": "submit_rejected",
+                    "reason": "missing_clues",
+                    "missing_clues": sorted(required_clues - self.discovered_clues),
+                },
+            )
+        if answer != self.compiled.correct_answer_normalized:
+            self.wrong_submit_count += 1
+            return self._wrapper_only_turn(
+                raw_command,
+                lowered,
+                "The guardian shakes their head. That answer is wrong.",
+                {"wrapper": "submit_rejected", "reason": "wrong_answer", "submitted": answer},
+            )
+        self.steps_taken += 1
+        self.done = True
+        self.player_won = True
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=self.compiled.correct_submit_command,
+            observation="The guardian weighs your answer, then nods.\n\nThe dungeon yields. You solved it.",
+            game_state_delta={"wrapper": "submit_forwarded", "won": True, "location": self.current_room_id},
+        )
+        return self._record_turn(turn)
+
+    def _step_env(self, raw_command: str, lowered: str) -> Turn:
+        previous = self.state
+        admissible = set(previous.admissible_commands or [])
+        self.state, _, env_done = self.env.step(lowered)
+        self.steps_taken += 1
+        succeeded = lowered in admissible
+        if not succeeded:
+            self.invalid_command_count += 1
+        else:
+            self._apply_env_side_effects(lowered)
+        self.done = bool(env_done or self.state.won)
+        observation = self.interface_adapter.render_observation(self.state.feedback or "", self.state, self)
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=lowered,
+            observation=observation,
+            game_state_delta=self._compute_delta(previous, self.state, succeeded, self.current_room_id),
+        )
+        return self._record_turn(turn)
+
+    def _step_wrapper(self, raw_command: str, lowered: str) -> Turn:
+        observation, delta = self._apply_wrapper_command(lowered)
+        self.steps_taken += 1
+        if delta.get("succeeded") is False:
+            self.invalid_command_count += 1
+        delta.setdefault("location", self.current_room_id)
+        rendered = self.interface_adapter.render_observation(observation, self.state, self)
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=lowered,
+            observation=rendered,
+            game_state_delta=delta,
+        )
+        return self._record_turn(turn)
+
+    def _apply_env_side_effects(self, command: str) -> None:
+        if command.startswith("go "):
+            direction = command[3:].strip()
+            edge = self.compiled.room_edges_by_direction.get((self.current_room_id, direction))
+            if edge is not None:
+                self.current_room_id = edge.to_node_id
+                self.visited_nodes.add(edge.to_node_id)
+            return
+        if command.startswith("open "):
+            node_id = self.node_id_for_command_name(command[5:].strip(), node_types={"container", "door"})
+            if node_id:
+                self.open_nodes.add(node_id)
+                self.visited_nodes.add(node_id)
+            return
+        if command.startswith("unlock ") and " with " in command:
+            target_name, key_name = command[7:].split(" with ", 1)
+            target_id = self.node_id_for_command_name(target_name.strip(), node_types={"container", "door"})
+            if target_id:
+                self.locked_nodes.discard(target_id)
+                if self._node_by_id[target_id].type == "door":
+                    self.unlocked_doors.add(target_id)
+                self.visited_nodes.add(target_id)
+            self._mark_item_by_name(key_name.strip())
+            return
+        if command.startswith("take "):
+            item_name = command[5:].split(" from ", 1)[0].strip()
+            item_id = self._item_name_to_id.get(item_name)
+            if item_id:
+                self.inventory.add(item_id)
+                self.item_locations[item_id] = INVENTORY_ID
+                self.used_items.add(item_id)
+                self.visited_nodes.add(item_id)
+
+    def _apply_wrapper_command(self, command: str) -> tuple[str, dict[str, Any]]:
+        if command.startswith("read "):
+            return self._apply_read(command)
+        if command.startswith("talk "):
+            return self._apply_talk(command)
+        if command.startswith("use ") and " on " in command:
+            return self._apply_use(command)
+        if command.startswith("combine ") and " with " in command:
+            return self._apply_combine(command)
+        if command.startswith("give ") and " to " in command:
+            return self._apply_give(command)
+        raise RuntimeError(f"Unsupported wrapper command '{command}'.")
+
+    def _apply_read(self, command: str) -> tuple[str, dict[str, Any]]:
+        readable_id = self.node_id_for_command_name(command[5:].strip(), node_types={"readable"})
+        if not readable_id or readable_id not in self.revealed_readables:
+            return self._fail("You can't read that right now.", command)
+        node = self._node_by_id[readable_id]
+        if node.parent_id != self.current_room_id:
+            return self._fail("You are too far away to read that.", command)
+        if node.requires_item_id and readable_id not in self.prepared_readables:
+            return self._fail("You still need the right tool before the text becomes legible.", command)
+        clue_id = self.compiled.readable_clue_by_id[readable_id]
+        self.discovered_clues.add(clue_id)
+        self.visited_nodes.add(readable_id)
+        return self._success(
+            textwrap.dedent(
+                f"""
+                {node.description}
+
+                "{self.compiled.clue_text_by_id[clue_id]}"
+                """
+            ).strip(),
+            command,
+        )
+
+    def _apply_talk(self, command: str) -> tuple[str, dict[str, Any]]:
+        npc_id = self.node_id_for_command_name(command[5:].strip(), node_types={"npc"})
+        if not npc_id:
+            return self._fail("You can't talk to that right now.", command)
+        node = self._node_by_id[npc_id]
+        if node.parent_id != self.current_room_id:
+            return self._fail("You are too far away to talk to that.", command)
+        self.consulted_npcs.add(npc_id)
+        if npc_id == self.compiled.guardian_id:
+            self.consulted_guardian = True
+        self.visited_nodes.add(npc_id)
+        return self._success(node.description, command)
+
+    def _apply_use(self, command: str) -> tuple[str, dict[str, Any]]:
+        item_name, target_name = command[4:].split(" on ", 1)
+        item_id = self._item_name_to_id.get(item_name.strip())
+        target_id = self.node_id_for_command_name(target_name.strip(), node_types={"readable", "fixture"})
+        if not item_id or item_id not in self.inventory:
+            return self._fail("You don't have the item needed for that.", command)
+        if not target_id:
+            return self._fail("You can't use that here.", command)
+        target = self._node_by_id[target_id]
+        if target.parent_id != self.current_room_id:
+            return self._fail("That target is not within reach.", command)
+        effect = self.compiled.use_effects.get(target_id)
+        if effect is None or effect.required_item_id != item_id:
+            return self._fail("That item doesn't seem to work there.", command)
+        if effect.consumes_item:
+            self.inventory.discard(item_id)
+            self.item_locations[item_id] = None
+        self.used_items.add(item_id)
+        self.visited_nodes.add(target_id)
+        self.completed_use_targets.add(target_id)
+        if effect.clue_id:
+            self.prepared_readables.add(target_id)
+            self.discovered_clues.add(effect.clue_id)
+            return self._success(
+                textwrap.dedent(
+                    f"""
+                    {target.description}
+
+                    "{self.compiled.clue_text_by_id[effect.clue_id]}"
+                    """
+                ).strip(),
+                command,
+            )
+        if effect.reveals_readable_id:
+            self.revealed_readables.add(effect.reveals_readable_id)
+            return self._success(f"The {self._label_by_id[effect.reveals_readable_id]} is revealed.", command)
+        if effect.reveals_item_id:
+            self.item_locations[effect.reveals_item_id] = self.current_room_id
+            return self._success(f"The {self._label_by_id[effect.reveals_item_id]} is revealed.", command)
+        return self._fail("Nothing happens.", command)
+
+    def _apply_combine(self, command: str) -> tuple[str, dict[str, Any]]:
+        item_a_name, item_b_name = command[8:].split(" with ", 1)
+        item_a_id = self._item_name_to_id.get(item_a_name.strip())
+        item_b_id = self._item_name_to_id.get(item_b_name.strip())
+        if not item_a_id or not item_b_id or item_a_id not in self.inventory or item_b_id not in self.inventory:
+            return self._fail("You do not have both pieces required to combine those.", command)
+        output_id = self.compiled.recipe_map.get(frozenset({item_a_id, item_b_id}))
+        if not output_id:
+            return self._fail("Those items do not fit together.", command)
+        self.inventory.discard(item_a_id)
+        self.inventory.discard(item_b_id)
+        self.item_locations[item_a_id] = None
+        self.item_locations[item_b_id] = None
+        self.inventory.add(output_id)
+        self.item_locations[output_id] = INVENTORY_ID
+        self.used_items.update({item_a_id, item_b_id, output_id})
+        self.completed_recipe_outputs.add(output_id)
+        self.visited_nodes.add(output_id)
+        return self._success(f"You assemble the {self._label_by_id[output_id]}.", command)
+
+    def _apply_give(self, command: str) -> tuple[str, dict[str, Any]]:
+        item_name, npc_name = command[5:].split(" to ", 1)
+        item_id = self._item_name_to_id.get(item_name.strip())
+        npc_id = self.node_id_for_command_name(npc_name.strip(), node_types={"npc"})
+        if not item_id or item_id not in self.inventory:
+            return self._fail("You do not have that item to give.", command)
+        if not npc_id:
+            return self._fail("There is no one here by that name.", command)
+        npc = self._node_by_id[npc_id]
+        if npc.parent_id != self.current_room_id:
+            return self._fail("That person is not here.", command)
+        trade = self.compiled.npc_trade_map.get(npc_id)
+        if trade is None or trade.required_item_id != item_id:
+            return self._fail("They are not interested in that item.", command)
+        if npc_id in self.traded_npcs:
+            return self._fail("That trade has already been completed.", command)
+        self.inventory.discard(item_id)
+        self.item_locations[item_id] = None
+        self.used_items.add(item_id)
+        self.traded_npcs.add(npc_id)
+        if trade.gives_item_id:
+            self.inventory.add(trade.gives_item_id)
+            self.item_locations[trade.gives_item_id] = INVENTORY_ID
+            self.used_items.add(trade.gives_item_id)
+            return self._success(f"You receive the {self._label_by_id[trade.gives_item_id]}.", command)
+        if trade.gives_clue_id:
+            self.discovered_clues.add(trade.gives_clue_id)
+            return self._success(f'"{self.compiled.clue_text_by_id[trade.gives_clue_id]}"', command)
+        return self._fail("Nothing comes of the trade.", command)
+
+    def _custom_commands(self) -> set[str]:
+        commands: set[str] = set()
+        for node in self.compiled.world.nodes:
+            if node.type == "npc" and node.parent_id == self.current_room_id:
+                commands.add(f"talk {self.compiled.node_command_names[node.id]}")
+                trade = self.compiled.npc_trade_map.get(node.id)
+                if trade and node.id not in self.traded_npcs and trade.required_item_id in self.inventory:
+                    commands.add(
+                        f"give {self.compiled.item_command_names[trade.required_item_id]} to {self.compiled.node_command_names[node.id]}"
+                    )
+            elif node.type == "readable" and node.parent_id == self.current_room_id and node.id in self.revealed_readables:
+                if not node.requires_item_id or node.id in self.prepared_readables:
+                    commands.add(f"read {self.compiled.node_command_names[node.id]}")
+            elif node.type == "fixture" and node.parent_id == self.current_room_id:
+                effect = self.compiled.use_effects.get(node.id)
+                if effect and effect.required_item_id in self.inventory:
+                    commands.add(
+                        f"use {self.compiled.item_command_names[effect.required_item_id]} on {self.compiled.node_command_names[node.id]}"
+                    )
+        for readable_id, effect in self.compiled.use_effects.items():
+            node = self._node_by_id.get(readable_id)
+            if node and node.type == "readable" and node.parent_id == self.current_room_id and effect.required_item_id in self.inventory:
+                commands.add(
+                    f"use {self.compiled.item_command_names[effect.required_item_id]} on {self.compiled.node_command_names[readable_id]}"
+                )
+        for recipe_inputs, output_id in self.compiled.recipe_map.items():
+            del output_id
+            item_ids = sorted(recipe_inputs)
+            if all(item_id in self.inventory for item_id in item_ids):
+                commands.add(
+                    f"combine {self.compiled.item_command_names[item_ids[0]]} with {self.compiled.item_command_names[item_ids[1]]}"
+                )
+                commands.add(
+                    f"combine {self.compiled.item_command_names[item_ids[1]]} with {self.compiled.item_command_names[item_ids[0]]}"
+                )
+        return commands
+
+    def _is_wrapper_command(self, command: str) -> bool:
+        return any(
+            command.startswith(prefix)
+            for prefix in ("read ", "talk ", "use ", "combine ", "give ")
+        )
+
+    def _mark_item_by_name(self, name: str) -> None:
+        item_id = self._item_name_to_id.get(name)
+        if item_id:
+            self.used_items.add(item_id)
+
+    def _success(self, observation: str, command: str) -> tuple[str, dict[str, Any]]:
+        return observation, {"wrapper": "custom", "command": command, "succeeded": True, "location": self.current_room_id}
+
+    def _fail(self, observation: str, command: str) -> tuple[str, dict[str, Any]]:
+        return observation, {"wrapper": "custom", "command": command, "succeeded": False, "location": self.current_room_id}
+
+    @staticmethod
+    def _compute_delta(previous: GameState, current: GameState, succeeded: bool, fallback_location: str | None) -> dict[str, Any]:
+        return {
+            "added_facts": [],
+            "removed_facts": [],
+            "location": current.location or fallback_location,
+            "score": current.score,
+            "won": current.won,
+            "lost": current.lost,
+            "succeeded": succeeded,
+        }
+
+    def _wrapper_only_turn(
+        self,
+        raw_command: str,
+        translated: str,
+        observation: str,
+        delta: dict[str, Any],
+    ) -> Turn:
+        self.steps_taken += 1
+        delta.setdefault("location", self.current_room_id)
+        turn = Turn(
+            step=self.steps_taken,
+            player_action=raw_command,
+            textworld_command=translated,
+            observation=observation,
+            game_state_delta=delta,
+        )
+        return self._record_turn(turn)
+
+    def _record_turn(self, turn: Turn) -> Turn:
+        self.transcript.append(turn)
+        self.last_state_fingerprint = self.state_fingerprint()
+        if self.turn_listener is not None:
+            self.turn_listener(self, turn)
+        return turn
+
+    def close(self) -> None:
+        if self._closed:
+            return
+        close = getattr(self.env, "close", None)
+        if callable(close):
+            close()
+        self._closed = True
diff --git a/agents/master/snapshots.py b/agents/master/snapshots.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6f705351fa125ccaf5529639af59eb4bf628f50
--- /dev/null
+++ b/agents/master/snapshots.py
@@ -0,0 +1,308 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Protocol
+
+from pydantic import Field
+
+from .base import ARTIFACTS_ROOT
+from .schema import CompiledWorld, DMFeedback, DMObservation, StrictModel, Turn, WorldDefinition
+
+if TYPE_CHECKING:
+    from .session import EpisodeSession
+
+
+STATE_FILENAME = "state.json"
+WORLD_FILENAME = "world.json"
+LIVE_SCHEMA_VERSION = 1
+DEFAULT_LIVE_DIR = ARTIFACTS_ROOT / "live"
+
+
+class LiveMetrics(StrictModel):
+    steps_taken: int = 0
+    min_steps: int | None = None
+    ratio: float | None = None
+    reward: float | None = None
+    player_won: bool | None = None
+
+
+class LiveRuntime(StrictModel):
+    current_room_id: str | None = None
+    inventory_item_ids: list[str] = Field(default_factory=list)
+    discovered_clue_ids: list[str] = Field(default_factory=list)
+    traded_npc_ids: list[str] = Field(default_factory=list)
+    visited_room_ids: list[str] = Field(default_factory=list)
+    available_commands: list[str] = Field(default_factory=list)
+    invalid_command_count: int = 0
+    wrong_submit_count: int = 0
+    open_node_ids: list[str] = Field(default_factory=list)
+    locked_node_ids: list[str] = Field(default_factory=list)
+
+
+class LiveCurrentRoom(StrictModel):
+    id: str | None = None
+    label: str | None = None
+    description: str | None = None
+    visible_node_ids: list[str] = Field(default_factory=list)
+    visible_item_ids: list[str] = Field(default_factory=list)
+
+
+class LiveStateSnapshot(StrictModel):
+    schema_version: int = LIVE_SCHEMA_VERSION
+    episode_id: str
+    status: str
+    updated_at: str
+    title: str | None = None
+    runner: str | None = None
+    error: str | None = None
+    transcript: list[Turn] = Field(default_factory=list)
+    metrics: LiveMetrics = Field(default_factory=LiveMetrics)
+    feedback: DMFeedback | None = None
+    runtime: LiveRuntime = Field(default_factory=LiveRuntime)
+    current_room: LiveCurrentRoom | None = None
+
+
+class LiveObserver(Protocol):
+    def on_run_start(self, episode_id: str, world_input: WorldDefinition | dict[str, Any]) -> None:
+        ...
+
+    def on_compile_success(self, compiled: CompiledWorld, session: EpisodeSession) -> None:
+        ...
+
+    def on_turn(self, session: EpisodeSession, turn: Turn) -> None:
+        ...
+
+    def on_complete(self, compiled: CompiledWorld, session: EpisodeSession, observation: DMObservation) -> None:
+        ...
+
+    def on_error(
+        self,
+        *,
+        episode_id: str,
+        error: str,
+        world_input: WorldDefinition | dict[str, Any],
+        compiled: CompiledWorld | None = None,
+        session: EpisodeSession | None = None,
+    ) -> None:
+        ...
+
+
+class LiveSnapshotWriter:
+    def __init__(self, live_dir: Path | None = None, runner_name: str | None = None) -> None:
+        self.live_dir = live_dir or DEFAULT_LIVE_DIR
+        self.runner_name = runner_name
+        self.live_dir.mkdir(parents=True, exist_ok=True)
+
+    def on_run_start(self, episode_id: str, world_input: WorldDefinition | dict[str, Any]) -> None:
+        self._remove_world()
+        snapshot = LiveStateSnapshot(
+            episode_id=episode_id,
+            status="compiling",
+            updated_at=self._timestamp(),
+            title=self._extract_title(world_input),
+            runner=self.runner_name,
+        )
+        self._write_state_snapshot(snapshot)
+
+    def on_compile_success(self, compiled: CompiledWorld, session: EpisodeSession) -> None:
+        self._write_world(compiled.world)
+        snapshot = LiveStateSnapshot(
+            episode_id=compiled.episode_id,
+            status="running",
+            updated_at=self._timestamp(),
+            title=compiled.world.meta.title,
+            runner=self.runner_name,
+            metrics=self._metrics(min_steps=len(compiled.solver_policy), steps_taken=session.steps_taken),
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+
+    def on_turn(self, session: EpisodeSession, turn: Turn) -> None:
+        del turn
+        snapshot = LiveStateSnapshot(
+            episode_id=session.compiled.episode_id,
+            status="running",
+            updated_at=self._timestamp(),
+            title=session.compiled.world.meta.title,
+            runner=self.runner_name,
+            transcript=list(session.transcript),
+            metrics=self._metrics(
+                min_steps=len(session.compiled.solver_policy),
+                steps_taken=session.steps_taken,
+            ),
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+
+    def on_complete(self, compiled: CompiledWorld, session: EpisodeSession, observation: DMObservation) -> None:
+        status = "complete" if observation.player_won else "failed"
+        snapshot = LiveStateSnapshot(
+            episode_id=compiled.episode_id,
+            status=status,
+            updated_at=self._timestamp(),
+            title=compiled.world.meta.title,
+            runner=self.runner_name,
+            transcript=list(session.transcript),
+            metrics=self._metrics(
+                min_steps=observation.min_steps,
+                steps_taken=observation.steps_taken or session.steps_taken,
+                ratio=observation.ratio,
+                reward=observation.reward,
+                player_won=observation.player_won,
+            ),
+            feedback=observation.feedback,
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+
+    def on_error(
+        self,
+        *,
+        episode_id: str,
+        error: str,
+        world_input: WorldDefinition | dict[str, Any],
+        compiled: CompiledWorld | None = None,
+        session: EpisodeSession | None = None,
+    ) -> None:
+        title = compiled.world.meta.title if compiled is not None else self._extract_title(world_input)
+        snapshot = LiveStateSnapshot(
+            episode_id=episode_id,
+            status="compile_error",
+            updated_at=self._timestamp(),
+            title=title,
+            runner=self.runner_name,
+            error=error,
+            transcript=list(session.transcript) if session is not None else [],
+            metrics=self._metrics(
+                min_steps=len(compiled.solver_policy) if compiled is not None else None,
+                steps_taken=session.steps_taken if session is not None else 0,
+            ),
+            runtime=self._runtime(session),
+            current_room=self._current_room(session),
+        )
+        self._write_state_snapshot(snapshot)
+
+    def _write_world(self, world: WorldDefinition) -> None:
+        self._write_json(self.live_dir / WORLD_FILENAME, world.model_dump_json(indent=2))
+
+    def _write_state_snapshot(self, snapshot: LiveStateSnapshot) -> None:
+        self._write_json(self.live_dir / STATE_FILENAME, snapshot.model_dump_json(indent=2))
+
+    def _write_json(self, path: Path, payload: str) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        tmp_path = path.with_suffix(path.suffix + ".tmp")
+        tmp_path.write_text(payload + "\n", encoding="utf-8")
+        tmp_path.replace(path)
+
+    def _remove_world(self) -> None:
+        world_path = self.live_dir / WORLD_FILENAME
+        if world_path.exists():
+            world_path.unlink()
+
+    @staticmethod
+    def _timestamp() -> str:
+        return datetime.now(timezone.utc).isoformat()
+
+    @staticmethod
+    def _extract_title(world_input: WorldDefinition | dict[str, Any]) -> str | None:
+        if isinstance(world_input, WorldDefinition):
+            return world_input.meta.title
+        meta = world_input.get("meta") if isinstance(world_input, dict) else None
+        title = meta.get("title") if isinstance(meta, dict) else None
+        return title if isinstance(title, str) else None
+
+    @staticmethod
+    def _metrics(
+        *,
+        min_steps: int | None,
+        steps_taken: int,
+        ratio: float | None = None,
+        reward: float | None = None,
+        player_won: bool | None = None,
+    ) -> LiveMetrics:
+        computed_ratio = ratio
+        if computed_ratio is None and min_steps:
+            computed_ratio = steps_taken / min_steps
+        return LiveMetrics(
+            steps_taken=steps_taken,
+            min_steps=min_steps,
+            ratio=computed_ratio,
+            reward=reward,
+            player_won=player_won,
+        )
+
+    @staticmethod
+    def _runtime(session: EpisodeSession | None) -> LiveRuntime:
+        if session is None:
+            return LiveRuntime()
+        room_ids = {
+            node.id
+            for node in session.compiled.world.nodes
+            if node.type in {"location", "junction"}
+        }
+        commands = [] if session.done else session.available_commands()
+        return LiveRuntime(
+            current_room_id=session.current_room_id,
+            inventory_item_ids=sorted(session.inventory),
+            discovered_clue_ids=sorted(session.discovered_clues),
+            traded_npc_ids=sorted(session.traded_npcs),
+            visited_room_ids=sorted(room_ids & session.visited_nodes),
+            available_commands=commands,
+            invalid_command_count=session.invalid_command_count,
+            wrong_submit_count=session.wrong_submit_count,
+            open_node_ids=sorted(session.open_nodes),
+            locked_node_ids=sorted(session.locked_nodes),
+        )
+
+    @staticmethod
+    def _current_room(session: EpisodeSession | None) -> LiveCurrentRoom | None:
+        if session is None:
+            return None
+        node_by_id = {node.id: node for node in session.compiled.world.nodes}
+        room = node_by_id.get(session.current_room_id)
+        if room is None:
+            return None
+        visible_nodes = [
+            node.id
+            for node in session.compiled.world.nodes
+            if getattr(node, "parent_id", None) == session.current_room_id
+            and (node.type != "readable" or node.id in session.revealed_readables)
+        ]
+        visible_nodes.extend(
+            sorted(
+                door_id
+                for door_id, rooms in session.compiled.door_rooms.items()
+                if session.current_room_id in rooms
+            )
+        )
+        visible_items = sorted(
+            item_id
+            for item_id, location in session.item_locations.items()
+            if location == session.current_room_id
+        )
+        return LiveCurrentRoom(
+            id=room.id,
+            label=room.label,
+            description=room.description,
+            visible_node_ids=sorted(set(visible_nodes)),
+            visible_item_ids=visible_items,
+        )
+
+
+def load_live_payload(live_dir: Path, filename: str) -> bytes | None:
+    path = live_dir / filename
+    if not path.exists():
+        return None
+    return path.read_bytes()
+
+
+def load_live_state(live_dir: Path) -> dict[str, Any] | None:
+    payload = load_live_payload(live_dir, STATE_FILENAME)
+    if payload is None:
+        return None
+    return json.loads(payload)
diff --git a/agents/master/templates.py b/agents/master/templates.py
new file mode 100644
index 0000000000000000000000000000000000000000..88f7c48d5d181a17603bfb7e1e95d7f094fcb2a4
--- /dev/null
+++ b/agents/master/templates.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+
+DIST_INDEX = Path(__file__).resolve().parents[2] / "www" / "dist" / "index.html"
+
+
+def render_index() -> str:
+    if DIST_INDEX.is_file():
+        return DIST_INDEX.read_text(encoding="utf-8")
+    return """<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Viewer Not Built</title>
+    <style>
+      body {
+        margin: 0;
+        min-height: 100vh;
+        display: grid;
+        place-items: center;
+        font-family: ui-monospace, Menlo, Monaco, monospace;
+        background: #0a0d14;
+        color: #e7ebf2;
+      }
+      main {
+        max-width: 48rem;
+        padding: 2rem;
+      }
+      code {
+        color: #ffd86b;
+      }
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>Frontend build not found.</h1>
+      <p>Run <code>npm run dev</code> for the Vite app or <code>npm run build</code> to let the Python server serve the built site from <code>www/dist</code>.</p>
+    </main>
+  </body>
+</html>
+"""
diff --git a/agents/openenv_server/__init__.py b/agents/openenv_server/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5333ea08491d68e5d93b47cdc9c1a01e3178d6bd
--- /dev/null
+++ b/agents/openenv_server/__init__.py
@@ -0,0 +1,2 @@
+"""OpenEnv HTTP server entrypoints for dungeon environments."""
+
diff --git a/agents/openenv_server/__main__.py b/agents/openenv_server/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8aa990cb18d12b4daada68b1380771062e371a3
--- /dev/null
+++ b/agents/openenv_server/__main__.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import uvicorn
+from openenv.core.env_server import create_fastapi_app
+
+from agents.hero.env import HeroEnvironment
+from agents.hero.schema import HeroObservation, HeroServerAction
+from agents.master.env import DMEnvironment
+from agents.master.sample import load_world
+from agents.master.schema import DMAction, DMObservation
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Serve dungeon environments over OpenEnv HTTP/WebSocket APIs.")
+    parser.add_argument("role", choices=["dm", "hero"])
+    parser.add_argument("--host", default="127.0.0.1")
+    parser.add_argument("--port", type=int)
+    parser.add_argument("--world", type=Path, help="Optional world definition JSON for hero serving.")
+    parser.add_argument("--artifacts-root", type=Path)
+    parser.add_argument("--max-concurrent-envs", type=int, default=1)
+    parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    parser.add_argument("--interface-model")
+    parser.add_argument("--interface-narrate", action="store_true")
+    parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    args = parser.parse_args(argv)
+
+    interface_config = resolve_interface_config(
+        provider=args.interface_provider,
+        model_name=args.interface_model,
+        narrate_observations=args.interface_narrate,
+        translation_mode="corporate_app" if args.translate_corporate_env else None,
+    )
+
+    if args.role == "dm":
+        env_factory = lambda: DMEnvironment(
+            artifacts_root=args.artifacts_root,
+            interface_adapter=build_interface_adapter(interface_config),
+        )
+        action_cls = DMAction
+        observation_cls = DMObservation
+        default_port = 8001
+    else:
+        world_input = load_world(str(args.world)) if args.world is not None else None
+        env_factory = lambda: HeroEnvironment(
+            artifacts_root=args.artifacts_root,
+            world_input=world_input,
+            interface_adapter=build_interface_adapter(interface_config),
+        )
+        action_cls = HeroServerAction
+        observation_cls = HeroObservation
+        default_port = 8002
+
+    app = create_fastapi_app(
+        env=env_factory,
+        action_cls=action_cls,
+        observation_cls=observation_cls,
+        max_concurrent_envs=args.max_concurrent_envs,
+    )
+    uvicorn.run(app, host=args.host, port=args.port or default_port)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/shared/__init__.py b/agents/shared/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4415f9ed8c8a558350f437861ee94800be049a97
--- /dev/null
+++ b/agents/shared/__init__.py
@@ -0,0 +1,43 @@
+"""Shared helpers for agent environments and model adapters."""
+
+from .llm_client import (
+    DEFAULT_HF_DM_MODEL,
+    DEFAULT_HF_HERO_MODEL,
+    GeminiStructuredClient,
+    HuggingFaceStructuredClient,
+    StructuredModelClient,
+)
+from .model_schema import ModelMessage
+from .openenv_compat import OPENENV_AVAILABLE
+from .runtime import (
+    DEFAULT_INTERFACE_MODEL,
+    DEFAULT_INTERFACE_PROVIDER,
+    DEFAULT_INTERFACE_TRANSLATION_MODE,
+    InterfaceConfig,
+    InterfaceTranslationMode,
+    StructuredClientConfig,
+    build_interface_adapter,
+    create_structured_client,
+    resolve_interface_config,
+    resolve_structured_client_config,
+)
+
+__all__ = [
+    "build_interface_adapter",
+    "create_structured_client",
+    "DEFAULT_HF_DM_MODEL",
+    "DEFAULT_HF_HERO_MODEL",
+    "DEFAULT_INTERFACE_MODEL",
+    "DEFAULT_INTERFACE_PROVIDER",
+    "DEFAULT_INTERFACE_TRANSLATION_MODE",
+    "GeminiStructuredClient",
+    "HuggingFaceStructuredClient",
+    "InterfaceConfig",
+    "InterfaceTranslationMode",
+    "ModelMessage",
+    "OPENENV_AVAILABLE",
+    "resolve_interface_config",
+    "resolve_structured_client_config",
+    "StructuredModelClient",
+    "StructuredClientConfig",
+]
diff --git a/agents/shared/llm_client.py b/agents/shared/llm_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b9bdaf6161bb668b81e50f4ac7fe2b34ea7a805
--- /dev/null
+++ b/agents/shared/llm_client.py
@@ -0,0 +1,415 @@
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Protocol, TypeVar
+
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+from pydantic import BaseModel
+
+from .model_schema import ModelMessage
+
+try:
+    from trl.chat_template_utils import qwen3_chat_template
+except Exception:  # pragma: no cover - optional runtime dependency
+    qwen3_chat_template = None  # type: ignore[assignment]
+
+ResponseModelT = TypeVar("ResponseModelT", bound=BaseModel)
+
+DEFAULT_GEMINI_DM_MODEL = "gemini-2.5-flash"
+DEFAULT_GEMINI_HERO_MODEL = "gemini-2.5-flash"
+DEFAULT_HF_DM_MODEL = "Qwen/Qwen3-32B"
+DEFAULT_HF_HERO_MODEL = "Qwen/Qwen3-32B"
+PROVIDER_GEMINI = "gemini"
+PROVIDER_HF_LOCAL = "hf_local"
+
+
+class StructuredModelClient(Protocol):
+    def generate_structured(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        ...
+
+
+class GeminiStructuredClient:
+    def __init__(self, api_key: str | None = None) -> None:
+        self._client = self._create_client(api_key)
+
+    def generate_structured(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        failures: list[str] = []
+        strategies = (
+            self._generate_with_response_schema,
+            self._generate_with_json_mode,
+            self._generate_with_prompt_only,
+        )
+        for strategy in strategies:
+            try:
+                return strategy(
+                    messages,
+                    response_model,
+                    model_name=model_name,
+                    temperature=temperature,
+                    max_output_tokens=max_output_tokens,
+                )
+            except Exception as exc:
+                failures.append(f"{strategy.__name__}: {self._normalize_error(exc)}")
+        raise RuntimeError("Gemini structured generation failed. " + " | ".join(failures))
+
+    def _generate_with_response_schema(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        system_instruction, contents = self._split_messages(messages)
+        response = self._client.models.generate_content(
+            model=model_name,
+            contents=contents,
+            config=types.GenerateContentConfig(
+                system_instruction=system_instruction,
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                response_mime_type="application/json",
+                response_schema=response_model,
+                candidate_count=1,
+            ),
+        )
+        parsed = getattr(response, "parsed", None)
+        if parsed is not None:
+            return response_model.model_validate(parsed)
+        text = getattr(response, "text", None)
+        if isinstance(text, str) and text.strip():
+            return response_model.model_validate_json(text)
+        raise RuntimeError("Gemini returned an empty structured response.")
+
+    def _generate_with_json_mode(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        prompt = self._json_prompt(messages, response_model)
+        response = self._client.models.generate_content(
+            model=model_name,
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                response_mime_type="application/json",
+                candidate_count=1,
+            ),
+        )
+        text = getattr(response, "text", None)
+        if not isinstance(text, str) or not text.strip():
+            raise RuntimeError("Gemini returned an empty JSON-mode response.")
+        return response_model.model_validate_json(text)
+
+    def _generate_with_prompt_only(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        prompt = self._json_prompt(messages, response_model)
+        response = self._client.models.generate_content(
+            model=model_name,
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                candidate_count=1,
+            ),
+        )
+        text = getattr(response, "text", None)
+        if not isinstance(text, str) or not text.strip():
+            raise RuntimeError("Gemini returned an empty prompt-only response.")
+        return response_model.model_validate_json(self._extract_json_object(text))
+
+    def _create_client(self, api_key: str | None) -> genai.Client:
+        load_dotenv(self._repo_root() / ".env", override=False)
+        key = api_key or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        if not key:
+            raise RuntimeError("Missing GEMINI_API_KEY or GOOGLE_API_KEY.")
+        return genai.Client(api_key=key)
+
+    @staticmethod
+    def _repo_root() -> Path:
+        return Path(__file__).resolve().parents[2]
+
+    @staticmethod
+    def _split_messages(messages: list[ModelMessage]) -> tuple[str | None, list[str]]:
+        system_parts: list[str] = []
+        content_parts: list[str] = []
+        for message in messages:
+            if message.role == "system":
+                system_parts.append(message.content)
+                continue
+            content_parts.append(f"{message.role.upper()}:\n{message.content}")
+        system_instruction = "\n\n".join(system_parts) if system_parts else None
+        contents = ["\n\n".join(content_parts)] if content_parts else [""]
+        return system_instruction, contents
+
+    @staticmethod
+    def _json_prompt(
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+    ) -> str:
+        message_blocks = [f"{message.role.upper()}:\n{message.content}" for message in messages]
+        schema = _schema_prompt_snippet(response_model)
+        conversation = "\n\n".join(message_blocks)
+        return (
+            "Return exactly one valid JSON object and nothing else.\n"
+            "Do not use markdown fences.\n"
+            "Use compact JSON with no commentary.\n"
+            f"JSON Schema:\n{schema}\n\n"
+            f"Conversation:\n{conversation}\n"
+        )
+
+    @staticmethod
+    def _extract_json_object(text: str) -> str:
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            cleaned = cleaned.strip("`")
+            if cleaned.startswith("json"):
+                cleaned = cleaned[4:].lstrip()
+        start = cleaned.find("{")
+        end = cleaned.rfind("}")
+        if start == -1 or end == -1 or end < start:
+            raise RuntimeError("Gemini response did not contain a JSON object.")
+        return cleaned[start : end + 1]
+
+    @staticmethod
+    def _normalize_error(exc: Exception) -> str:
+        return " ".join(str(exc).split()) or exc.__class__.__name__
+
+
+class HuggingFaceStructuredClient:
+    def __init__(
+        self,
+        *,
+        adapter_path: str | None = None,
+        cache_dir: str | None = None,
+        load_in_4bit: bool = True,
+        trust_remote_code: bool = False,
+        device_map: str | None = "auto",
+    ) -> None:
+        self.adapter_path = adapter_path
+        self.cache_dir = cache_dir
+        self.load_in_4bit = load_in_4bit
+        self.trust_remote_code = trust_remote_code
+        self.device_map = device_map
+        self._loaded_model_name: str | None = None
+        self._model: Any | None = None
+        self._tokenizer: Any | None = None
+
+    def generate_structured(
+        self,
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+        *,
+        model_name: str,
+        temperature: float,
+        max_output_tokens: int,
+    ) -> ResponseModelT:
+        tokenizer, model = self._ensure_model(model_name)
+        prompt = self._hf_prompt(messages, response_model)
+        rendered = self._render_prompt(tokenizer, prompt)
+        tokenized = tokenizer(rendered, return_tensors="pt")
+        tokenized = {key: value.to(model.device) for key, value in tokenized.items()}
+        generate_kwargs: dict[str, Any] = {
+            "max_new_tokens": max_output_tokens,
+            "do_sample": temperature > 0.0,
+            "temperature": max(temperature, 1e-5) if temperature > 0.0 else None,
+            "pad_token_id": getattr(tokenizer, "pad_token_id", None) or getattr(tokenizer, "eos_token_id", None),
+            "eos_token_id": getattr(tokenizer, "eos_token_id", None),
+        }
+        generate_kwargs = {key: value for key, value in generate_kwargs.items() if value is not None}
+
+        import torch
+
+        with torch.inference_mode():
+            output_ids = model.generate(**tokenized, **generate_kwargs)
+        prompt_length = tokenized["input_ids"].shape[1]
+        completion_ids = output_ids[0][prompt_length:]
+        text = tokenizer.decode(completion_ids, skip_special_tokens=True)
+        if not text.strip():
+            raise RuntimeError("Hugging Face model returned an empty response.")
+        return response_model.model_validate_json(self._extract_json_object(text))
+
+    def _ensure_model(self, model_name: str) -> tuple[Any, Any]:
+        if self._model is not None and self._tokenizer is not None and self._loaded_model_name == model_name:
+            return self._tokenizer, self._model
+
+        load_dotenv(self._repo_root() / ".env", override=False)
+
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            cache_dir=self.cache_dir,
+            trust_remote_code=self.trust_remote_code,
+            token=_hf_token(),
+        )
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        tokenizer = self._canonicalize_chat_template(tokenizer)
+
+        model_kwargs: dict[str, Any] = {
+            "cache_dir": self.cache_dir,
+            "trust_remote_code": self.trust_remote_code,
+            "token": _hf_token(),
+        }
+        model_kwargs.update(_hf_model_init_kwargs(load_in_4bit=self.load_in_4bit, device_map=self.device_map))
+        model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
+        if self.adapter_path:
+            from peft import PeftModel
+
+            model = PeftModel.from_pretrained(model, self.adapter_path, is_trainable=False)
+        model.eval()
+        self._loaded_model_name = model_name
+        self._model = model
+        self._tokenizer = tokenizer
+        return tokenizer, model
+
+    @staticmethod
+    def _repo_root() -> Path:
+        return Path(__file__).resolve().parents[2]
+
+    @staticmethod
+    def _render_prompt(tokenizer: Any, prompt: str) -> str:
+        if hasattr(tokenizer, "apply_chat_template"):
+            chat_template_kwargs = HuggingFaceStructuredClient._chat_template_kwargs(tokenizer)
+            return tokenizer.apply_chat_template(
+                [
+                    {"role": "system", "content": "Return exactly one valid JSON object and nothing else."},
+                    {"role": "user", "content": prompt},
+                ],
+                tokenize=False,
+                add_generation_prompt=True,
+                **chat_template_kwargs,
+            )
+        return prompt
+
+    @staticmethod
+    def _canonicalize_chat_template(tokenizer: Any) -> Any:
+        chat_template = getattr(tokenizer, "chat_template", "") or ""
+        if qwen3_chat_template is None:
+            return tokenizer
+        if "<|im_start|>" not in chat_template or "<|im_end|>" not in chat_template:
+            return tokenizer
+        tokenizer.chat_template = qwen3_chat_template
+        return tokenizer
+
+    @staticmethod
+    def _chat_template_kwargs(tokenizer: Any) -> dict[str, Any]:
+        if not hasattr(tokenizer, "apply_chat_template"):
+            return {}
+        try:
+            tokenizer.apply_chat_template(
+                [{"role": "user", "content": "ping"}],
+                tokenize=False,
+                add_generation_prompt=True,
+                enable_thinking=False,
+            )
+        except Exception:
+            return {}
+        return {"enable_thinking": False}
+
+    @staticmethod
+    def _hf_prompt(
+        messages: list[ModelMessage],
+        response_model: type[ResponseModelT],
+    ) -> str:
+        schema = _schema_prompt_snippet(response_model)
+        conversation = "\n\n".join(f"{message.role.upper()}:\n{message.content}" for message in messages)
+        return (
+            "Respond with exactly one compact JSON object and no other text.\n"
+            "Do not use markdown fences.\n"
+            f"JSON Schema:\n{schema}\n\n"
+            f"Conversation:\n{conversation}\n"
+        )
+
+    @staticmethod
+    def _extract_json_object(text: str) -> str:
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            cleaned = cleaned.strip("`")
+            if cleaned.startswith("json"):
+                cleaned = cleaned[4:].lstrip()
+        start = cleaned.find("{")
+        end = cleaned.rfind("}")
+        if start == -1 or end == -1 or end < start:
+            raise RuntimeError("Hugging Face response did not contain a JSON object.")
+        return cleaned[start : end + 1]
+
+
+def _schema_prompt_snippet(response_model: type[ResponseModelT]) -> str:
+    schema = response_model.model_json_schema()
+    serialized = json.dumps(schema, separators=(",", ":"))
+    if len(serialized) <= 4000:
+        return serialized
+    summarized = {
+        "title": schema.get("title", response_model.__name__),
+        "type": schema.get("type", "object"),
+        "required": schema.get("required", []),
+        "properties": {
+            key: {
+                field_name: value
+                for field_name, value in property_schema.items()
+                if field_name in {"type", "title", "enum", "items", "required", "$ref", "description"}
+            }
+            for key, property_schema in schema.get("properties", {}).items()
+        },
+        "defs": sorted(schema.get("$defs", {}).keys()),
+    }
+    return json.dumps(summarized, separators=(",", ":"))
+
+
+def _hf_model_init_kwargs(*, load_in_4bit: bool, device_map: str | None) -> dict[str, Any]:
+    import torch
+
+    kwargs: dict[str, Any] = {
+        "torch_dtype": torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+    }
+    if device_map is not None and torch.cuda.is_available():
+        kwargs["device_map"] = device_map
+    if load_in_4bit and torch.cuda.is_available():
+        from transformers import BitsAndBytesConfig
+
+        kwargs["quantization_config"] = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+    return kwargs
+
+
+def _hf_token() -> str | None:
+    return os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
diff --git a/agents/shared/model_schema.py b/agents/shared/model_schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..d406e03f3251cd5eb6bef8b4182748d08970950e
--- /dev/null
+++ b/agents/shared/model_schema.py
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+
+class StrictModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+
+class ModelMessage(StrictModel):
+    role: Literal["system", "user", "assistant"]
+    content: str
diff --git a/agents/shared/openenv_compat.py b/agents/shared/openenv_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..99d3e30973ef8d5bd5b9351f6eee0687da2a43a3
--- /dev/null
+++ b/agents/shared/openenv_compat.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Generic, Optional, TypeVar
+
+from pydantic import BaseModel, ConfigDict, Field
+
+ObsT = TypeVar("ObsT")
+ActT = TypeVar("ActT")
+StateT = TypeVar("StateT")
+
+try:  # pragma: no cover - exercised when openenv-core is installed
+    from openenv.core.client_types import StepResult as OpenEnvStepResult
+    from openenv.core.env_server.interfaces import Environment as OpenEnvEnvironment
+    from openenv.core.env_server.types import (
+        Action as OpenEnvAction,
+        EnvironmentMetadata as OpenEnvEnvironmentMetadata,
+        Observation as OpenEnvObservation,
+        State as OpenEnvState,
+    )
+
+    OPENENV_AVAILABLE = True
+except ImportError:  # pragma: no cover - lightweight fallback for local imports/tests
+    OPENENV_AVAILABLE = False
+
+    class Action(BaseModel):
+        model_config = ConfigDict(
+            extra="forbid",
+            validate_assignment=True,
+            arbitrary_types_allowed=True,
+        )
+
+        metadata: dict[str, Any] = Field(default_factory=dict)
+
+    class Observation(BaseModel):
+        model_config = ConfigDict(
+            extra="forbid",
+            validate_assignment=True,
+            arbitrary_types_allowed=True,
+        )
+
+        done: bool = False
+        reward: bool | int | float | None = None
+        metadata: dict[str, Any] = Field(default_factory=dict)
+
+    class State(BaseModel):
+        model_config = ConfigDict(
+            extra="allow",
+            validate_assignment=True,
+            arbitrary_types_allowed=True,
+        )
+
+        episode_id: str | None = None
+        step_count: int = 0
+
+    class EnvironmentMetadata(BaseModel):
+        model_config = ConfigDict(extra="forbid")
+
+        name: str
+        description: str
+        version: str | None = None
+
+    @dataclass
+    class StepResult(Generic[ObsT]):
+        observation: ObsT
+        reward: Optional[float] = None
+        done: bool = False
+
+    class Environment(Generic[ActT, ObsT, StateT]):
+        SUPPORTS_CONCURRENT_SESSIONS: bool = False
+
+        def __init__(self, transform: Any | None = None) -> None:
+            self.transform = transform
+
+        def reset(
+            self,
+            seed: Optional[int] = None,
+            episode_id: Optional[str] = None,
+            **kwargs: Any,
+        ) -> ObsT:
+            raise NotImplementedError
+
+        def step(
+            self,
+            action: ActT,
+            timeout_s: Optional[float] = None,
+            **kwargs: Any,
+        ) -> ObsT:
+            raise NotImplementedError
+
+        @property
+        def state(self) -> StateT:
+            raise NotImplementedError
+
+        def get_metadata(self) -> EnvironmentMetadata:
+            return EnvironmentMetadata(
+                name=self.__class__.__name__,
+                description=f"{self.__class__.__name__} environment",
+                version="1.0.0",
+            )
+
+        def _apply_transform(self, observation: ObsT) -> ObsT:
+            return observation if self.transform is None else self.transform(observation)
+
+        def close(self) -> None:
+            return None
+
+else:
+    Action = OpenEnvAction
+    Observation = OpenEnvObservation
+    State = OpenEnvState
+    Environment = OpenEnvEnvironment
+    EnvironmentMetadata = OpenEnvEnvironmentMetadata
+    StepResult = OpenEnvStepResult
+
+
+def build_step_result(observation: ObsT) -> StepResult[ObsT]:
+    reward = getattr(observation, "reward", None)
+    if reward is not None:
+        reward = float(reward)
+    return StepResult(
+        observation=observation,
+        reward=reward,
+        done=bool(getattr(observation, "done", False)),
+    )
diff --git a/agents/shared/runtime.py b/agents/shared/runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f6e948ef3cb09cfc820def89aa850e58c79938f
--- /dev/null
+++ b/agents/shared/runtime.py
@@ -0,0 +1,165 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Literal
+
+from .llm_client import (
+    DEFAULT_GEMINI_DM_MODEL,
+    DEFAULT_GEMINI_HERO_MODEL,
+    DEFAULT_HF_DM_MODEL,
+    DEFAULT_HF_HERO_MODEL,
+    GeminiStructuredClient,
+    HuggingFaceStructuredClient,
+    PROVIDER_GEMINI,
+    PROVIDER_HF_LOCAL,
+    StructuredModelClient,
+)
+
+StructuredProvider = Literal["gemini", "hf_local"]
+InterfaceProvider = Literal["strict", "simple", "gemini"]
+InterfaceTranslationMode = Literal["none", "corporate_app"]
+RoleName = Literal["dm", "hero"]
+
+DEFAULT_INTERFACE_PROVIDER: InterfaceProvider = "strict"
+DEFAULT_INTERFACE_MODEL = "gemini-2.5-flash-lite"
+DEFAULT_INTERFACE_TRANSLATION_MODE: InterfaceTranslationMode = "none"
+
+
+@dataclass(frozen=True)
+class StructuredClientConfig:
+    role: RoleName
+    provider: StructuredProvider
+    model_name: str
+    adapter_path: str | None = None
+    cache_dir: str | None = None
+    load_in_4bit: bool = True
+    trust_remote_code: bool = False
+
+
+@dataclass(frozen=True)
+class InterfaceConfig:
+    provider: InterfaceProvider
+    model_name: str = DEFAULT_INTERFACE_MODEL
+    narrate_observations: bool = False
+    translation_mode: InterfaceTranslationMode = DEFAULT_INTERFACE_TRANSLATION_MODE
+
+
+def resolve_structured_client_config(
+    role: RoleName,
+    *,
+    provider: StructuredProvider | None = None,
+    model_name: str | None = None,
+    adapter_path: str | None = None,
+) -> StructuredClientConfig:
+    env_prefix = f"DND_{role.upper()}"
+    resolved_provider = provider or _structured_provider_from_env(os.getenv(f"{env_prefix}_PROVIDER")) or PROVIDER_GEMINI
+    if resolved_provider == PROVIDER_HF_LOCAL:
+        default_model = DEFAULT_HF_DM_MODEL if role == "dm" else DEFAULT_HF_HERO_MODEL
+    else:
+        default_model = DEFAULT_GEMINI_DM_MODEL if role == "dm" else DEFAULT_GEMINI_HERO_MODEL
+    return StructuredClientConfig(
+        role=role,
+        provider=resolved_provider,
+        model_name=model_name or os.getenv(f"{env_prefix}_MODEL") or default_model,
+        adapter_path=adapter_path or os.getenv(f"{env_prefix}_ADAPTER_PATH"),
+        cache_dir=os.getenv("HF_HOME"),
+        load_in_4bit=_env_bool("DND_LOAD_IN_4BIT", default=True),
+        trust_remote_code=_env_bool("DND_TRUST_REMOTE_CODE", default=False),
+    )
+
+
+def create_structured_client(config: StructuredClientConfig) -> StructuredModelClient:
+    if config.provider == PROVIDER_GEMINI:
+        return GeminiStructuredClient()
+    if config.provider == PROVIDER_HF_LOCAL:
+        return HuggingFaceStructuredClient(
+            adapter_path=config.adapter_path,
+            cache_dir=config.cache_dir,
+            load_in_4bit=config.load_in_4bit,
+            trust_remote_code=config.trust_remote_code,
+        )
+    raise ValueError(f"Unsupported structured provider: {config.provider}")
+
+
+def resolve_interface_config(
+    *,
+    provider: InterfaceProvider | None = None,
+    model_name: str | None = None,
+    narrate_observations: bool | None = None,
+    translation_mode: InterfaceTranslationMode | None = None,
+) -> InterfaceConfig:
+    resolved_translation = (
+        translation_mode
+        or _interface_translation_mode_from_env(os.getenv("DND_INTERFACE_TRANSLATION_MODE"))
+        or DEFAULT_INTERFACE_TRANSLATION_MODE
+    )
+    resolved_provider = provider or _interface_provider_from_env(os.getenv("DND_INTERFACE_PROVIDER"))
+    if resolved_provider is None:
+        resolved_provider = "gemini" if resolved_translation != "none" else DEFAULT_INTERFACE_PROVIDER
+    resolved_narrate = narrate_observations
+    if resolved_narrate is None:
+        resolved_narrate = _env_bool("DND_INTERFACE_NARRATE", default=False)
+    if resolved_translation != "none" and resolved_provider != "gemini":
+        raise ValueError("Interface translation mode requires the Gemini interface provider.")
+    return InterfaceConfig(
+        provider=resolved_provider,
+        model_name=model_name or os.getenv("DND_INTERFACE_MODEL") or DEFAULT_INTERFACE_MODEL,
+        narrate_observations=resolved_narrate,
+        translation_mode=resolved_translation,
+    )
+
+
+def build_interface_adapter(config: InterfaceConfig):
+    from agents.master.interface import GeminiInterfaceAdapter, SimpleInterfaceAdapter, StrictCliInterfaceAdapter
+
+    if config.provider == "strict":
+        return StrictCliInterfaceAdapter()
+    if config.provider == "simple":
+        return SimpleInterfaceAdapter()
+    if config.provider == "gemini":
+        return GeminiInterfaceAdapter(
+            model=config.model_name,
+            narrate_observations=config.narrate_observations,
+            translation_mode=config.translation_mode,
+        )
+    raise ValueError(f"Unsupported interface provider: {config.provider}")
+
+
+def _structured_provider_from_env(value: str | None) -> StructuredProvider | None:
+    if value is None:
+        return None
+    normalized = value.strip().lower()
+    if normalized not in {PROVIDER_GEMINI, PROVIDER_HF_LOCAL}:
+        raise ValueError(f"Unsupported structured provider value: {value}")
+    return normalized  # type: ignore[return-value]
+
+
+def _interface_provider_from_env(value: str | None) -> InterfaceProvider | None:
+    if value is None:
+        return None
+    normalized = value.strip().lower()
+    if normalized not in {"strict", "simple", "gemini"}:
+        raise ValueError(f"Unsupported interface provider value: {value}")
+    return normalized  # type: ignore[return-value]
+
+
+def _interface_translation_mode_from_env(value: str | None) -> InterfaceTranslationMode | None:
+    if value is None:
+        return None
+    normalized = value.strip().lower()
+    if normalized not in {"none", "corporate_app"}:
+        raise ValueError(f"Unsupported interface translation mode value: {value}")
+    return normalized  # type: ignore[return-value]
+
+
+def _env_bool(name: str, *, default: bool) -> bool:
+    raw = os.getenv(name)
+    if raw is None:
+        return default
+    normalized = raw.strip().lower()
+    if normalized in {"1", "true", "yes", "on"}:
+        return True
+    if normalized in {"0", "false", "no", "off"}:
+        return False
+    raise ValueError(f"Environment variable {name} must be a boolean value, got {raw!r}")
diff --git a/agents/spaces/__init__.py b/agents/spaces/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9172206cc16b5fa632186ca77c44c5928fe25ba5
--- /dev/null
+++ b/agents/spaces/__init__.py
@@ -0,0 +1,13 @@
+"""Hugging Face Space wrapper apps for the dungeon environments."""
+
+from .dm_space import LatestWorldOutputStore, SpaceDMEnvironment, create_app as create_dm_space_app
+from .hero_space import SpaceHeroEnvironment, UploadedWorldStore, create_app as create_hero_space_app
+
+__all__ = [
+    "LatestWorldOutputStore",
+    "SpaceDMEnvironment",
+    "SpaceHeroEnvironment",
+    "UploadedWorldStore",
+    "create_dm_space_app",
+    "create_hero_space_app",
+]
diff --git a/agents/spaces/dm_space.py b/agents/spaces/dm_space.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8edbae59ea8f52afd6ccb5d62e357c6263090f9
--- /dev/null
+++ b/agents/spaces/dm_space.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from html import escape
+from pathlib import Path
+from threading import Lock
+from typing import Any, Callable
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse, HTMLResponse
+import uvicorn
+
+from agents.master.env import DMEnvironment
+from agents.master.schema import CompiledWorld, DMAction, DMObservation, WorldDefinition
+from agents.shared.openenv_compat import StepResult
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+
+DEFAULT_ARTIFACTS_ROOT = Path("/tmp/dnd_dm_artifacts")
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 8000
+DEFAULT_MAX_CONCURRENT_ENVS = 1
+
+
+@dataclass(frozen=True)
+class LatestWorldSnapshot:
+    episode_id: str
+    title: str
+    path: Path
+    updated_at: str
+
+
+class LatestWorldOutputStore:
+    def __init__(self) -> None:
+        self._lock = Lock()
+        self._snapshot: LatestWorldSnapshot | None = None
+
+    def record(self, compiled: CompiledWorld) -> None:
+        path = compiled.artifacts_dir / "world_definition.normalized.json"
+        if not path.is_file():
+            return
+        snapshot = LatestWorldSnapshot(
+            episode_id=compiled.episode_id,
+            title=compiled.world.meta.title,
+            path=path,
+            updated_at=datetime.now(timezone.utc).isoformat(),
+        )
+        with self._lock:
+            self._snapshot = snapshot
+
+    def latest_path(self) -> Path | None:
+        snapshot = self.snapshot()
+        return None if snapshot is None else snapshot.path
+
+    def snapshot(self) -> LatestWorldSnapshot | None:
+        with self._lock:
+            return self._snapshot
+
+
+class SpaceDMEnvironment(DMEnvironment):
+    def __init__(self, *, world_output_store: LatestWorldOutputStore, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._world_output_store = world_output_store
+
+    def step(  # type: ignore[override]
+        self,
+        action: DMAction | WorldDefinition | dict[str, Any],
+        runner: Any | None = None,
+        observer: Any | None = None,
+        timeout_s: float | None = None,
+    ) -> StepResult[DMObservation]:
+        result = super().step(action, runner=runner, observer=observer, timeout_s=timeout_s)
+        observation = result.observation
+        if observation.compile_error is None and self.last_compiled_world is not None:
+            self._world_output_store.record(self.last_compiled_world)
+        return result
+
+
+def create_app(
+    *,
+    openenv_app_factory: Callable[..., Any] | None = None,
+    world_output_store: LatestWorldOutputStore | None = None,
+    artifacts_root: Path = DEFAULT_ARTIFACTS_ROOT,
+    max_concurrent_envs: int = DEFAULT_MAX_CONCURRENT_ENVS,
+) -> FastAPI:
+    if openenv_app_factory is None:
+        from openenv.core.env_server import create_fastapi_app as openenv_app_factory
+
+    store = world_output_store or LatestWorldOutputStore()
+    interface_adapter = build_interface_adapter(resolve_interface_config(provider="strict"))
+
+    env_app = openenv_app_factory(
+        env=lambda: SpaceDMEnvironment(
+            artifacts_root=artifacts_root,
+            interface_adapter=interface_adapter,
+            world_output_store=store,
+        ),
+        action_cls=DMAction,
+        observation_cls=DMObservation,
+        max_concurrent_envs=max_concurrent_envs,
+    )
+
+    app = FastAPI(title="DND-DM")
+    app.state.world_output_store = store
+    app.mount("/env", env_app)
+
+    @app.get("/", response_class=HTMLResponse)
+    def index() -> str:
+        return _render_index(store.snapshot())
+
+    @app.get("/healthz")
+    def healthz() -> dict[str, bool]:
+        return {"ok": True}
+
+    @app.get("/world-output/latest")
+    def latest_world_output() -> FileResponse:
+        path = store.latest_path()
+        if path is None or not path.is_file():
+            raise HTTPException(status_code=404, detail="No successful normalized world output is available yet.")
+        return FileResponse(
+            path,
+            media_type="application/json",
+            filename="world_definition.normalized.json",
+        )
+
+    return app
+
+
+def _render_index(snapshot: LatestWorldSnapshot | None) -> str:
+    latest_html = (
+        "<p>No successful normalized world output has been recorded yet.</p>"
+        if snapshot is None
+        else (
+            "<p>"
+            f"Latest world: <strong>{escape(snapshot.title)}</strong> "
+            f"(episode <code>{escape(snapshot.episode_id)}</code>, updated {escape(snapshot.updated_at)}). "
+            '<a href="/world-output/latest">Download normalized world JSON</a>.'
+            "</p>"
+        )
+    )
+    return f"""<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>DND-DM</title>
+    <style>
+      body {{
+        font-family: "IBM Plex Sans", "Helvetica Neue", sans-serif;
+        margin: 0;
+        background: #f4efe5;
+        color: #1b1a17;
+      }}
+      main {{
+        max-width: 760px;
+        margin: 0 auto;
+        padding: 48px 24px 64px;
+      }}
+      a {{ color: #0b5c78; }}
+      code {{
+        background: rgba(11, 92, 120, 0.08);
+        padding: 0.15rem 0.35rem;
+        border-radius: 0.3rem;
+      }}
+      .panel {{
+        border: 1px solid rgba(27, 26, 23, 0.12);
+        background: rgba(255, 255, 255, 0.72);
+        border-radius: 18px;
+        padding: 20px 22px;
+        margin-top: 18px;
+      }}
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>DND-DM</h1>
+      <p>This Space hosts the dungeon DM OpenEnv environment as a CPU-only evaluator.</p>
+      <div class="panel">
+        <p>The OpenEnv API is mounted at <a href="/env"><code>/env</code></a>.</p>
+        <p>The DM evaluates submitted world definitions and writes the latest normalized JSON artifact for manual handoff to <code>DND-Hero</code>.</p>
+        {latest_html}
+      </div>
+    </main>
+  </body>
+</html>"""
+
+
+def main() -> int:
+    uvicorn.run("agents.spaces.dm_space:create_app", factory=True, host=DEFAULT_HOST, port=DEFAULT_PORT)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/spaces/hero_space.py b/agents/spaces/hero_space.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ecdd714ae36528b43b88166a03f2fe0014100a3
--- /dev/null
+++ b/agents/spaces/hero_space.py
@@ -0,0 +1,271 @@
+from __future__ import annotations
+
+from copy import deepcopy
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from html import escape
+import json
+from pathlib import Path
+from threading import Lock
+from typing import Any, Callable
+
+from fastapi import FastAPI, File, HTTPException, Request, UploadFile
+from fastapi.responses import HTMLResponse, JSONResponse, Response
+import uvicorn
+
+from agents.hero.env import HeroEnvironment
+from agents.hero.schema import HeroObservation, HeroServerAction
+from agents.master.check import DMCompileError, validate_and_normalize
+from agents.master.schema import WorldDefinition
+from agents.shared.runtime import build_interface_adapter, resolve_interface_config
+
+DEFAULT_ARTIFACTS_ROOT = Path("/tmp/dnd_hero_artifacts")
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 8000
+DEFAULT_MAX_CONCURRENT_ENVS = 1
+
+
+@dataclass(frozen=True)
+class UploadedWorldSnapshot:
+    world_input: dict[str, Any]
+    title: str
+    size_bytes: int
+    updated_at: str
+
+
+class UploadedWorldStore:
+    def __init__(self) -> None:
+        self._lock = Lock()
+        self._snapshot: UploadedWorldSnapshot | None = None
+
+    def set_world(self, world: WorldDefinition | dict[str, Any]) -> UploadedWorldSnapshot:
+        if isinstance(world, dict):
+            world = validate_and_normalize(world)
+        world_input = world.model_dump(mode="json")
+        snapshot = UploadedWorldSnapshot(
+            world_input=world_input,
+            title=world.meta.title,
+            size_bytes=len(json.dumps(world_input).encode("utf-8")),
+            updated_at=datetime.now(timezone.utc).isoformat(),
+        )
+        with self._lock:
+            self._snapshot = snapshot
+        return snapshot
+
+    def clear(self) -> None:
+        with self._lock:
+            self._snapshot = None
+
+    def current_world(self) -> dict[str, Any] | None:
+        snapshot = self.snapshot()
+        return None if snapshot is None else deepcopy(snapshot.world_input)
+
+    def snapshot(self) -> UploadedWorldSnapshot | None:
+        with self._lock:
+            return self._snapshot
+
+    def metadata(self) -> dict[str, Any]:
+        snapshot = self.snapshot()
+        if snapshot is None:
+            return {"configured": False}
+        return {
+            "configured": True,
+            "title": snapshot.title,
+            "size_bytes": snapshot.size_bytes,
+            "updated_at": snapshot.updated_at,
+        }
+
+
+class SpaceHeroEnvironment(HeroEnvironment):
+    def __init__(self, *, uploaded_world_store: UploadedWorldStore, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._uploaded_world_store = uploaded_world_store
+
+    def reset(  # type: ignore[override]
+        self,
+        world_input: Any | None = None,
+        *,
+        seed: int | None = None,
+        episode_id: str | None = None,
+        max_game_steps: int | None = None,
+        max_tool_calls: int | None = None,
+        scratchpad_max_chars: int | None = None,
+        debug: bool | None = None,
+    ) -> HeroObservation:
+        selected_world_input = world_input
+        if selected_world_input is None:
+            selected_world_input = self._uploaded_world_store.current_world()
+        if selected_world_input is None:
+            raise ValueError(
+                "Upload a world JSON to /world-input or pass world_input explicitly before resetting DND-Hero."
+            )
+        return super().reset(
+            selected_world_input,
+            seed=seed,
+            episode_id=episode_id,
+            max_game_steps=max_game_steps,
+            max_tool_calls=max_tool_calls,
+            scratchpad_max_chars=scratchpad_max_chars,
+            debug=debug,
+        )
+
+
+def create_app(
+    *,
+    openenv_app_factory: Callable[..., Any] | None = None,
+    uploaded_world_store: UploadedWorldStore | None = None,
+    artifacts_root: Path = DEFAULT_ARTIFACTS_ROOT,
+    max_concurrent_envs: int = DEFAULT_MAX_CONCURRENT_ENVS,
+) -> FastAPI:
+    if openenv_app_factory is None:
+        from openenv.core.env_server import create_fastapi_app as openenv_app_factory
+
+    store = uploaded_world_store or UploadedWorldStore()
+    interface_adapter = build_interface_adapter(resolve_interface_config(provider="strict"))
+
+    env_app = openenv_app_factory(
+        env=lambda: SpaceHeroEnvironment(
+            artifacts_root=artifacts_root,
+            uploaded_world_store=store,
+            interface_adapter=interface_adapter,
+        ),
+        action_cls=HeroServerAction,
+        observation_cls=HeroObservation,
+        max_concurrent_envs=max_concurrent_envs,
+    )
+
+    app = FastAPI(title="DND-Hero")
+    app.state.uploaded_world_store = store
+    app.mount("/env", env_app)
+
+    @app.get("/", response_class=HTMLResponse)
+    def index() -> str:
+        return _render_index(store.metadata())
+
+    @app.get("/healthz")
+    def healthz() -> dict[str, bool]:
+        return {"ok": True}
+
+    @app.post("/world-input")
+    async def upload_world_input(
+        request: Request,
+        file: UploadFile | None = File(default=None),
+    ) -> JSONResponse:
+        payload = await file.read() if file is not None else await request.body()
+        if not payload:
+            raise HTTPException(status_code=400, detail="Provide a world JSON file upload or a raw JSON request body.")
+        try:
+            raw_world = json.loads(payload.decode("utf-8"))
+        except UnicodeDecodeError as exc:
+            raise HTTPException(status_code=400, detail="World input must be UTF-8 JSON.") from exc
+        except json.JSONDecodeError as exc:
+            raise HTTPException(status_code=400, detail=f"Invalid JSON: {exc.msg}") from exc
+        if not isinstance(raw_world, dict):
+            raise HTTPException(status_code=400, detail="World input JSON must be an object.")
+        try:
+            world = validate_and_normalize(raw_world)
+        except DMCompileError as exc:
+            raise HTTPException(status_code=400, detail=str(exc)) from exc
+        snapshot = store.set_world(world)
+        return JSONResponse(
+            {
+                "configured": True,
+                "title": snapshot.title,
+                "size_bytes": snapshot.size_bytes,
+                "updated_at": snapshot.updated_at,
+            }
+        )
+
+    @app.get("/world-input")
+    def world_input_metadata() -> JSONResponse:
+        return JSONResponse(store.metadata())
+
+    @app.delete("/world-input", status_code=204)
+    def clear_world_input() -> Response:
+        store.clear()
+        return Response(status_code=204)
+
+    return app
+
+
+def _render_index(metadata: dict[str, Any]) -> str:
+    current_world_html = (
+        "<p>No default world is uploaded yet.</p>"
+        if not metadata.get("configured")
+        else (
+            "<p>"
+            f"Current uploaded world: <strong>{escape(str(metadata['title']))}</strong> "
+            f"({escape(str(metadata['size_bytes']))} bytes, updated {escape(str(metadata['updated_at']))})."
+            "</p>"
+        )
+    )
+    return f"""<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>DND-Hero</title>
+    <style>
+      body {{
+        font-family: "IBM Plex Sans", "Helvetica Neue", sans-serif;
+        margin: 0;
+        background: #eef4eb;
+        color: #182118;
+      }}
+      main {{
+        max-width: 760px;
+        margin: 0 auto;
+        padding: 48px 24px 64px;
+      }}
+      a {{ color: #146042; }}
+      code {{
+        background: rgba(20, 96, 66, 0.08);
+        padding: 0.15rem 0.35rem;
+        border-radius: 0.3rem;
+      }}
+      .panel {{
+        border: 1px solid rgba(24, 33, 24, 0.12);
+        background: rgba(255, 255, 255, 0.76);
+        border-radius: 18px;
+        padding: 20px 22px;
+        margin-top: 18px;
+      }}
+      input[type="file"] {{
+        display: block;
+        margin-bottom: 12px;
+      }}
+      button {{
+        background: #146042;
+        color: white;
+        border: 0;
+        border-radius: 999px;
+        padding: 0.7rem 1rem;
+        cursor: pointer;
+      }}
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>DND-Hero</h1>
+      <p>This Space hosts the dungeon Hero OpenEnv environment as a CPU-only evaluator.</p>
+      <div class="panel">
+        <p>The OpenEnv API is mounted at <a href="/env"><code>/env</code></a>.</p>
+        <p>Upload a normalized world-definition JSON file from <code>DND-DM</code> to make it the default world for future hero resets.</p>
+        {current_world_html}
+        <form action="/world-input" method="post" enctype="multipart/form-data">
+          <input type="file" name="file" accept="application/json,.json" required>
+          <button type="submit">Upload World JSON</button>
+        </form>
+      </div>
+    </main>
+  </body>
+</html>"""
+
+
+def main() -> int:
+    uvicorn.run("agents.spaces.hero_space:create_app", factory=True, host=DEFAULT_HOST, port=DEFAULT_PORT)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/train/__init__.py b/agents/train/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..14d46844d4f53b6617a54650f56b31e635cb238b
--- /dev/null
+++ b/agents/train/__init__.py
@@ -0,0 +1,2 @@
+"""Training entrypoints for GRPO-based experiments."""
+
diff --git a/agents/train/__main__.py b/agents/train/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef93073391ffaf1fa9b0c1d3d08976673c21a0f7
--- /dev/null
+++ b/agents/train/__main__.py
@@ -0,0 +1,361 @@
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+from agents.shared.llm_client import DEFAULT_HF_DM_MODEL, DEFAULT_HF_HERO_MODEL
+
+from .grpo import (
+    DMClosedLoopConfig,
+    GRPOLaunchConfig,
+    SUPPORTED_GRPO_LOSS_TYPES,
+    SUPPORTED_IMPORTANCE_SAMPLING_LEVELS,
+    build_dm_grpo_dataset,
+    build_hero_grpo_dataset,
+    run_dm_grpo,
+    run_hero_grpo,
+)
+from .joint import JointTrainingConfig, run_joint_training_loop
+
+
+def main(argv: list[str] | None = None) -> int:
+    _load_repo_dotenv()
+    parser = argparse.ArgumentParser(description="GRPO training harnesses for dungeon agents.")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    dm_parser = subparsers.add_parser("dm-grpo", help="Run GRPO for the dungeon-master generator.")
+    _add_common_args(dm_parser, default_model=DEFAULT_HF_DM_MODEL, default_output_dir="artifacts/grpo/dm")
+    dm_parser.add_argument("--target-ratio", type=float, action="append")
+    dm_parser.add_argument("--artifacts-root", type=Path)
+    dm_parser.add_argument("--hero-provider", choices=["gemini", "hf_local"])
+    dm_parser.add_argument("--hero-model")
+    dm_parser.add_argument("--hero-adapter-path")
+    dm_parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    dm_parser.add_argument("--interface-model")
+    dm_parser.add_argument("--interface-narrate", action="store_true")
+    dm_parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    dm_parser.add_argument("--hero-max-game-steps", type=int, default=40)
+    dm_parser.add_argument("--hero-max-tool-calls", type=int, default=80)
+
+    hero_parser = subparsers.add_parser("hero-grpo", help="Run GRPO for the hero tool-calling policy.")
+    _add_common_args(hero_parser, default_model=DEFAULT_HF_HERO_MODEL, default_output_dir="artifacts/grpo/hero")
+    hero_parser.add_argument("--world", type=Path)
+    hero_parser.add_argument("--artifacts-root", type=Path)
+    hero_parser.add_argument("--max-game-steps", type=int, default=40)
+    hero_parser.add_argument("--max-tool-calls", type=int, default=80)
+    hero_parser.add_argument("--max-tool-calling-iterations", type=int, default=32)
+    hero_parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    hero_parser.add_argument("--interface-model")
+    hero_parser.add_argument("--interface-narrate", action="store_true")
+    hero_parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+
+    joint_parser = subparsers.add_parser("joint-loop", help="Alternate hero and DM GRPO phases with adapter carry-over.")
+    joint_parser.add_argument("--root-dir", type=Path, required=True)
+    joint_parser.add_argument("--cycles", type=int, default=1)
+    joint_parser.add_argument("--target-ratio", type=float, action="append")
+    joint_parser.add_argument("--hero-world", type=Path)
+    joint_parser.add_argument("--interface-provider", choices=["strict", "simple", "gemini"])
+    joint_parser.add_argument("--interface-model")
+    joint_parser.add_argument("--interface-narrate", action="store_true")
+    joint_parser.add_argument(
+        "--translate-corporate-env",
+        action="store_true",
+        help="Rewrite hero-facing observations into a corporate app metaphor and map translated commands back through Gemini.",
+    )
+    joint_parser.add_argument("--hero-max-game-steps", type=int, default=40)
+    joint_parser.add_argument("--hero-max-tool-calls", type=int, default=80)
+    joint_parser.add_argument("--hero-max-tool-calling-iterations", type=int, default=32)
+    _add_prefixed_common_args(
+        joint_parser,
+        prefix="hero",
+        default_model=DEFAULT_HF_HERO_MODEL,
+        default_max_steps=24,
+        default_num_prompts=16,
+        default_max_completion_length=512,
+    )
+    _add_prefixed_common_args(
+        joint_parser,
+        prefix="dm",
+        default_model=DEFAULT_HF_DM_MODEL,
+        default_max_steps=8,
+        default_num_prompts=16,
+        default_max_completion_length=2048,
+    )
+
+    dataset_parser = subparsers.add_parser("smoke-dataset", help="Print smoke dataset rows for inspection.")
+    dataset_parser.add_argument("role", choices=["dm", "hero"])
+    dataset_parser.add_argument("--num-prompts", type=int, default=2)
+    dataset_parser.add_argument("--target-ratio", type=float, action="append")
+    dataset_parser.add_argument("--world", type=Path)
+    dataset_parser.add_argument("--max-game-steps", type=int, default=40)
+    dataset_parser.add_argument("--max-tool-calls", type=int, default=80)
+
+    args = parser.parse_args(argv)
+
+    if args.command == "smoke-dataset":
+        if args.role == "dm":
+            rows = build_dm_grpo_dataset(num_prompts=args.num_prompts, target_ratios=args.target_ratio)
+        else:
+            world_input = None if args.world is None else json.loads(args.world.read_text(encoding="utf-8"))
+            rows = build_hero_grpo_dataset(
+                num_prompts=args.num_prompts,
+                world_input=world_input,
+                max_game_steps=args.max_game_steps,
+                max_tool_calls=args.max_tool_calls,
+            )
+        print(json.dumps(rows, indent=2))
+        return 0
+
+    if args.command == "joint-loop":
+        hero_config = _build_prefixed_grpo_config(args, "hero", default_output_dir=args.root_dir / "hero")
+        dm_config = _build_prefixed_grpo_config(args, "dm", default_output_dir=args.root_dir / "dm")
+        run_joint_training_loop(
+            JointTrainingConfig(
+                root_dir=args.root_dir,
+                cycles=args.cycles,
+                hero_config=hero_config,
+                dm_config=dm_config,
+                target_ratios=args.target_ratio,
+                hero_world_path=args.hero_world,
+                interface_provider=args.interface_provider,
+                interface_model=args.interface_model,
+                interface_narrate=args.interface_narrate,
+                interface_translation_mode="corporate_app" if args.translate_corporate_env else None,
+                hero_max_game_steps=args.hero_max_game_steps,
+                hero_max_tool_calls=args.hero_max_tool_calls,
+                hero_max_tool_calling_iterations=args.hero_max_tool_calling_iterations,
+            )
+        )
+        return 0
+
+    config = GRPOLaunchConfig(
+        model_name=args.model,
+        output_dir=args.output_dir,
+        resume_adapter_path=args.resume_adapter_path,
+        max_steps=args.max_steps,
+        num_prompts=args.num_prompts,
+        learning_rate=args.learning_rate,
+        per_device_train_batch_size=args.per_device_train_batch_size,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        num_generations=args.num_generations,
+        max_completion_length=args.max_completion_length,
+        logging_steps=args.logging_steps,
+        save_steps=args.save_steps,
+        seed=args.seed,
+        rank=args.rank,
+        alpha=args.alpha,
+        dropout=args.dropout,
+        temperature=args.temperature,
+        top_p=args.top_p,
+        top_k=args.top_k,
+        min_p=args.min_p,
+        repetition_penalty=args.repetition_penalty,
+        use_wandb=not args.no_wandb,
+        run_name=args.run_name,
+        trust_remote_code=args.trust_remote_code,
+        load_in_4bit=not args.no_4bit,
+        loss_type=args.loss_type,
+        importance_sampling_level=args.importance_sampling_level,
+        use_transformers_paged=args.use_transformers_paged,
+        cache_implementation=args.cache_implementation,
+        use_vllm=args.use_vllm,
+        vllm_mode=args.vllm_mode,
+        vllm_gpu_memory_utilization=args.vllm_gpu_memory_utilization,
+        vllm_enable_sleep_mode=not args.no_vllm_sleep_mode,
+    )
+
+    if args.command == "dm-grpo":
+        run_dm_grpo(
+            config,
+            target_ratios=args.target_ratio,
+            artifacts_root=args.artifacts_root,
+            closed_loop=DMClosedLoopConfig(
+                hero_provider=args.hero_provider,
+                hero_model=args.hero_model,
+                hero_adapter_path=args.hero_adapter_path,
+                interface_provider=args.interface_provider,
+                interface_model=args.interface_model,
+                interface_narrate=args.interface_narrate,
+                interface_translation_mode="corporate_app" if args.translate_corporate_env else None,
+                hero_max_game_steps=args.hero_max_game_steps,
+                hero_max_tool_calls=args.hero_max_tool_calls,
+            ),
+        )
+        return 0
+
+    run_hero_grpo(
+        config,
+        world_path=args.world,
+        artifacts_root=args.artifacts_root,
+        interface_provider=args.interface_provider,
+        interface_model=args.interface_model,
+        interface_narrate=args.interface_narrate,
+        interface_translation_mode="corporate_app" if args.translate_corporate_env else None,
+        max_game_steps=args.max_game_steps,
+        max_tool_calls=args.max_tool_calls,
+        max_tool_calling_iterations=args.max_tool_calling_iterations,
+    )
+    return 0
+
+
+def _load_repo_dotenv() -> None:
+    load_dotenv(Path(__file__).resolve().parents[2] / ".env", override=False)
+    _normalize_wandb_env()
+
+
+def _normalize_wandb_env() -> None:
+    project = os.getenv("WANDB_PROJECT")
+    entity = os.getenv("WANDB_ENTITY")
+    if entity or not project or "/" not in project:
+        return
+
+    maybe_entity, maybe_project = project.split("/", 1)
+    if not maybe_entity or not maybe_project:
+        return
+
+    os.environ["WANDB_ENTITY"] = maybe_entity
+    os.environ["WANDB_PROJECT"] = maybe_project
+
+
+def _add_common_args(parser: argparse.ArgumentParser, *, default_model: str, default_output_dir: str) -> None:
+    parser.add_argument("--model", default=default_model)
+    parser.add_argument("--output-dir", type=Path, default=Path(default_output_dir))
+    parser.add_argument("--resume-adapter-path")
+    parser.add_argument("--run-name")
+    parser.add_argument("--max-steps", type=int, default=10)
+    parser.add_argument("--num-prompts", type=int, default=16)
+    parser.add_argument("--learning-rate", type=float, default=1e-5)
+    parser.add_argument("--per-device-train-batch-size", type=int, default=2)
+    parser.add_argument("--gradient-accumulation-steps", type=int, default=8)
+    parser.add_argument("--num-generations", type=int, default=2)
+    parser.add_argument("--max-completion-length", type=int, default=512)
+    parser.add_argument("--logging-steps", type=int, default=1)
+    parser.add_argument("--save-steps", type=int, default=10)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--rank", type=int, default=16)
+    parser.add_argument("--alpha", type=int, default=32)
+    parser.add_argument("--dropout", type=float, default=0.05)
+    parser.add_argument("--temperature", type=float, default=0.6)
+    parser.add_argument("--top-p", type=float, default=0.95)
+    parser.add_argument("--top-k", type=int, default=20)
+    parser.add_argument("--min-p", type=float)
+    parser.add_argument("--repetition-penalty", type=float, default=1.0)
+    parser.add_argument("--loss-type", choices=SUPPORTED_GRPO_LOSS_TYPES, default="dapo")
+    parser.add_argument(
+        "--importance-sampling-level",
+        choices=SUPPORTED_IMPORTANCE_SAMPLING_LEVELS,
+        default="token",
+    )
+    parser.add_argument("--use-transformers-paged", action="store_true")
+    parser.add_argument("--cache-implementation")
+    parser.add_argument("--use-vllm", action="store_true")
+    parser.add_argument("--vllm-mode", choices=["server", "colocate"], default="colocate")
+    parser.add_argument("--vllm-gpu-memory-utilization", type=float, default=0.2)
+    parser.add_argument("--no-vllm-sleep-mode", action="store_true")
+    parser.add_argument("--trust-remote-code", action="store_true")
+    parser.add_argument("--no-4bit", action="store_true")
+    parser.add_argument("--no-wandb", action="store_true")
+
+
+def _add_prefixed_common_args(
+    parser: argparse.ArgumentParser,
+    *,
+    prefix: str,
+    default_model: str,
+    default_max_steps: int,
+    default_num_prompts: int,
+    default_max_completion_length: int,
+) -> None:
+    parser.add_argument(f"--{prefix}-model", default=default_model)
+    parser.add_argument(f"--{prefix}-resume-adapter-path")
+    parser.add_argument(f"--{prefix}-run-name")
+    parser.add_argument(f"--{prefix}-max-steps", type=int, default=default_max_steps)
+    parser.add_argument(f"--{prefix}-num-prompts", type=int, default=default_num_prompts)
+    parser.add_argument(f"--{prefix}-learning-rate", type=float, default=1e-5)
+    parser.add_argument(f"--{prefix}-per-device-train-batch-size", type=int, default=2)
+    parser.add_argument(f"--{prefix}-gradient-accumulation-steps", type=int, default=8)
+    parser.add_argument(f"--{prefix}-num-generations", type=int, default=2)
+    parser.add_argument(f"--{prefix}-max-completion-length", type=int, default=default_max_completion_length)
+    parser.add_argument(f"--{prefix}-logging-steps", type=int, default=1)
+    parser.add_argument(f"--{prefix}-save-steps", type=int, default=4)
+    parser.add_argument(f"--{prefix}-seed", type=int, default=42)
+    parser.add_argument(f"--{prefix}-rank", type=int, default=16)
+    parser.add_argument(f"--{prefix}-alpha", type=int, default=32)
+    parser.add_argument(f"--{prefix}-dropout", type=float, default=0.05)
+    parser.add_argument(f"--{prefix}-temperature", type=float, default=0.6)
+    parser.add_argument(f"--{prefix}-top-p", type=float, default=0.95)
+    parser.add_argument(f"--{prefix}-top-k", type=int, default=20)
+    parser.add_argument(f"--{prefix}-min-p", type=float)
+    parser.add_argument(f"--{prefix}-repetition-penalty", type=float, default=1.0)
+    parser.add_argument(f"--{prefix}-loss-type", choices=SUPPORTED_GRPO_LOSS_TYPES, default="dapo")
+    parser.add_argument(
+        f"--{prefix}-importance-sampling-level",
+        choices=SUPPORTED_IMPORTANCE_SAMPLING_LEVELS,
+        default="token",
+    )
+    parser.add_argument(f"--{prefix}-use-transformers-paged", action="store_true")
+    parser.add_argument(f"--{prefix}-cache-implementation")
+    parser.add_argument(f"--{prefix}-use-vllm", action="store_true")
+    parser.add_argument(f"--{prefix}-vllm-mode", choices=["server", "colocate"], default="colocate")
+    parser.add_argument(f"--{prefix}-vllm-gpu-memory-utilization", type=float, default=0.2)
+    parser.add_argument(f"--{prefix}-no-vllm-sleep-mode", action="store_true")
+    parser.add_argument(f"--{prefix}-trust-remote-code", action="store_true")
+    parser.add_argument(f"--{prefix}-no-4bit", action="store_true")
+    parser.add_argument(f"--{prefix}-no-wandb", action="store_true")
+
+
+def _build_prefixed_grpo_config(args: argparse.Namespace, prefix: str, *, default_output_dir: Path) -> GRPOLaunchConfig:
+    def value(name: str):
+        return getattr(args, f"{prefix}_{name}")
+
+    return GRPOLaunchConfig(
+        model_name=value("model"),
+        output_dir=default_output_dir,
+        resume_adapter_path=value("resume_adapter_path"),
+        max_steps=value("max_steps"),
+        num_prompts=value("num_prompts"),
+        learning_rate=value("learning_rate"),
+        per_device_train_batch_size=value("per_device_train_batch_size"),
+        gradient_accumulation_steps=value("gradient_accumulation_steps"),
+        num_generations=value("num_generations"),
+        max_completion_length=value("max_completion_length"),
+        logging_steps=value("logging_steps"),
+        save_steps=value("save_steps"),
+        seed=value("seed"),
+        rank=value("rank"),
+        alpha=value("alpha"),
+        dropout=value("dropout"),
+        temperature=value("temperature"),
+        top_p=value("top_p"),
+        top_k=value("top_k"),
+        min_p=value("min_p"),
+        repetition_penalty=value("repetition_penalty"),
+        use_wandb=not value("no_wandb"),
+        run_name=value("run_name"),
+        trust_remote_code=value("trust_remote_code"),
+        load_in_4bit=not value("no_4bit"),
+        loss_type=value("loss_type"),
+        importance_sampling_level=value("importance_sampling_level"),
+        use_transformers_paged=value("use_transformers_paged"),
+        cache_implementation=value("cache_implementation"),
+        use_vllm=value("use_vllm"),
+        vllm_mode=value("vllm_mode"),
+        vllm_gpu_memory_utilization=value("vllm_gpu_memory_utilization"),
+        vllm_enable_sleep_mode=not value("no_vllm_sleep_mode"),
+    )
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agents/train/grpo.py b/agents/train/grpo.py
new file mode 100644
index 0000000000000000000000000000000000000000..32b03d2fdfa21d3bf51fa8516780a47cca7d35ef
--- /dev/null
+++ b/agents/train/grpo.py
@@ -0,0 +1,2999 @@
+from __future__ import annotations
+
+import hashlib
+import importlib.util
+import json
+import os
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable
+
+from agents.hero.cli import parse_cli_command
+from agents.hero.env import HeroEnvironment
+from agents.hero.policy import HeroLLMPolicy
+from agents.hero.runner import HeroRunner
+from agents.master.base import normalize_answer_text, parser_safe_text
+from agents.master.check import validate_and_normalize
+from agents.hero.prompt import format_hero_grpo_system_prompt
+from agents.hero.schema import validate_hero_action
+from agents.master.env import DMEnvironment
+from agents.master.prompt import build_dm_world_messages
+from agents.master.sample import load_world, sample_world_definition
+from agents.master.schema import WorldDefinition
+from agents.shared.runtime import (
+    build_interface_adapter,
+    create_structured_client,
+    resolve_interface_config,
+    resolve_structured_client_config,
+)
+
+try:
+    import torch
+    from datasets import Dataset
+    from peft import LoraConfig
+    from trl.chat_template_utils import qwen3_chat_template, qwen3_schema
+    from trl.rewards import get_soft_overlong_punishment
+    from trl import GRPOConfig, GRPOTrainer
+    from transformers import AutoTokenizer, BitsAndBytesConfig
+
+    TRAINING_IMPORT_ERROR: Exception | None = None
+except Exception as exc:  # pragma: no cover - exercised when train extras are unavailable
+    torch = None  # type: ignore[assignment]
+    Dataset = None  # type: ignore[assignment]
+    LoraConfig = None  # type: ignore[assignment]
+    GRPOConfig = None  # type: ignore[assignment]
+    GRPOTrainer = None  # type: ignore[assignment]
+    AutoTokenizer = None  # type: ignore[assignment]
+    BitsAndBytesConfig = None  # type: ignore[assignment]
+    qwen3_chat_template = None  # type: ignore[assignment]
+    qwen3_schema = None  # type: ignore[assignment]
+    get_soft_overlong_punishment = None  # type: ignore[assignment]
+    TRAINING_IMPORT_ERROR = exc
+
+
+_DEFAULT_TARGET_RATIOS = [1.25, 1.5, 1.75, 2.0]
+_DM_REQUIRED_TOP_LEVEL_FIELDS = ("meta", "nodes", "edges", "items", "clues", "recipes", "quest_chain")
+_DM_ALLOWED_NODE_TYPES = {"location", "junction", "container", "door", "readable", "fixture", "npc"}
+_DM_ALLOWED_EDGE_TYPES = {"passage", "locked_passage"}
+_DM_ALLOWED_ITEM_TYPES = {"key", "puzzle"}
+_HERO_TOOL_NAMES = {"act", "scratchpad_read", "scratchpad_write"}
+_TOOL_CALL_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
+_EMPTY_THINK_RE = re.compile(r"<think>\s*</think>\s*", re.DOTALL)
+_LOWERCASE_ANSWER_RE = re.compile(r"^[a-z0-9]+(?: [a-z0-9]+)*$")
+_HERO_TASK_PROMPTS = (
+    "Solve the dungeon by using tools until the episode ends.\nInitial observation:\n",
+    "Play the dungeon to completion through tool calls only.\nInitial observation:\n",
+    "Gather every clue and solve the dungeon via tools only.\nInitial observation:\n",
+)
+SUPPORTED_GRPO_LOSS_TYPES = ("grpo", "dapo", "bnpo", "dr_grpo", "cispo", "sapo", "luspo")
+SUPPORTED_IMPORTANCE_SAMPLING_LEVELS = ("token", "sequence")
+
+
+@dataclass(frozen=True)
+class GRPOLaunchConfig:
+    model_name: str
+    output_dir: Path
+    resume_adapter_path: str | None = None
+    max_steps: int = 10
+    num_prompts: int = 16
+    learning_rate: float = 1e-5
+    per_device_train_batch_size: int = 2
+    gradient_accumulation_steps: int = 8
+    num_generations: int = 2
+    max_completion_length: int = 512
+    logging_steps: int = 1
+    save_steps: int = 10
+    seed: int = 42
+    rank: int = 16
+    alpha: int = 32
+    dropout: float = 0.05
+    temperature: float = 0.6
+    top_p: float = 0.95
+    top_k: int = 20
+    min_p: float | None = None
+    repetition_penalty: float = 1.0
+    use_wandb: bool = True
+    run_name: str | None = None
+    trust_remote_code: bool = False
+    load_in_4bit: bool = True
+    loss_type: str = "dapo"
+    importance_sampling_level: str = "token"
+    use_transformers_paged: bool = False
+    cache_implementation: str | None = None
+    use_vllm: bool = False
+    vllm_mode: str = "colocate"
+    vllm_gpu_memory_utilization: float = 0.2
+    vllm_enable_sleep_mode: bool = True
+
+    def __post_init__(self) -> None:
+        if self.loss_type not in SUPPORTED_GRPO_LOSS_TYPES:
+            raise ValueError(
+                f"loss_type must be one of {SUPPORTED_GRPO_LOSS_TYPES!r}; got {self.loss_type!r}."
+            )
+        if self.importance_sampling_level not in SUPPORTED_IMPORTANCE_SAMPLING_LEVELS:
+            raise ValueError(
+                "importance_sampling_level must be one of "
+                f"{SUPPORTED_IMPORTANCE_SAMPLING_LEVELS!r}; got {self.importance_sampling_level!r}."
+            )
+        if self.loss_type == "luspo" and self.importance_sampling_level != "sequence":
+            raise ValueError("luspo requires importance_sampling_level='sequence'.")
+        if self.per_device_train_batch_size < 1:
+            raise ValueError("per_device_train_batch_size must be at least 1.")
+        if self.gradient_accumulation_steps < 1:
+            raise ValueError("gradient_accumulation_steps must be at least 1.")
+        if self.num_generations < 2:
+            raise ValueError("num_generations must be at least 2 for GRPO.")
+        if self.max_steps < 1:
+            raise ValueError("max_steps must be at least 1.")
+        if self.num_prompts < 1:
+            raise ValueError("num_prompts must be at least 1.")
+        if self.temperature <= 0.0:
+            raise ValueError("temperature must be greater than 0.")
+        if not 0.0 < self.top_p <= 1.0:
+            raise ValueError("top_p must be in the interval (0, 1].")
+        if self.top_k < 0:
+            raise ValueError("top_k must be non-negative.")
+        if self.min_p is not None and not 0.0 <= self.min_p <= 1.0:
+            raise ValueError("min_p must be in the interval [0, 1] when provided.")
+        if self.repetition_penalty < 1.0:
+            raise ValueError("repetition_penalty must be at least 1.0.")
+        if self.vllm_mode not in {"server", "colocate"}:
+            raise ValueError("vllm_mode must be 'server' or 'colocate'.")
+        if not 0.0 < self.vllm_gpu_memory_utilization < 1.0:
+            raise ValueError("vllm_gpu_memory_utilization must be in the interval (0, 1).")
+
+        world_size = max(1, int(os.environ.get("WORLD_SIZE", "1")))
+        generation_batch_size = self.per_device_train_batch_size * world_size
+        if generation_batch_size % self.num_generations != 0:
+            raise ValueError(
+                "generation_batch_size "
+                f"({generation_batch_size}) must be divisible by num_generations ({self.num_generations}). "
+                "Increase --per-device-train-batch-size, reduce --num-generations, or launch with more processes."
+            )
+        minimum_prompt_rows = generation_batch_size * self.gradient_accumulation_steps
+        if self.num_prompts < minimum_prompt_rows:
+            raise ValueError(
+                "num_prompts "
+                f"({self.num_prompts}) must be at least generation_batch_size * gradient_accumulation_steps "
+                f"({minimum_prompt_rows}) so GRPO can complete one optimizer step."
+            )
+
+
+@dataclass(frozen=True)
+class DMClosedLoopConfig:
+    hero_provider: str | None = None
+    hero_model: str | None = None
+    hero_adapter_path: str | None = None
+    interface_provider: str | None = None
+    interface_model: str | None = None
+    interface_narrate: bool = False
+    interface_translation_mode: str | None = None
+    hero_max_game_steps: int = 40
+    hero_max_tool_calls: int = 80
+
+
+@dataclass(frozen=True)
+class DMRolloutMetrics:
+    reward: float
+    compile_error: str | None
+    requested_ratio: float
+    player_won: bool
+    steps_taken: int | None
+    min_steps: int | None
+    ratio: float | None
+    efficiency_score: float
+    quality_score: float
+    invalid_command_count: int
+    wrong_submit_count: int
+    hero_player_won: bool
+    hero_total_reward: float
+    hero_dense_return: float
+    hero_steps_taken: int
+    hero_tool_calls_total: int
+    hero_policy_error: str | None
+
+
+_DM_ROLLOUT_CACHE_STEP = -1
+_DM_ROLLOUT_CACHE: dict[tuple[Any, ...], DMRolloutMetrics] = {}
+
+
+def build_dm_grpo_dataset(
+    *,
+    num_prompts: int = 8,
+    target_ratios: list[float] | None = None,
+) -> list[dict[str, Any]]:
+    ratios = target_ratios or _DEFAULT_TARGET_RATIOS
+    rows: list[dict[str, Any]] = []
+    for index in range(num_prompts):
+        target_ratio = ratios[index % len(ratios)]
+        reference_world = sample_world_definition(seed=index, difficulty_target=target_ratio)
+        prompt = [
+            {"role": message.role, "content": message.content}
+            for message in build_dm_world_messages(
+                target_ratio=target_ratio,
+                reference_world=reference_world,
+                prompt_style=index,
+            )
+        ]
+        rows.append({"prompt": prompt, "target_ratio": target_ratio, "seed": index})
+    return rows
+
+
+def build_hero_grpo_dataset(
+    *,
+    num_prompts: int = 8,
+    world_input: dict[str, Any] | None = None,
+    target_ratios: list[float] | None = None,
+    max_game_steps: int = 40,
+    max_tool_calls: int = 80,
+) -> list[dict[str, Any]]:
+    ratios = target_ratios or _DEFAULT_TARGET_RATIOS
+    rows: list[dict[str, Any]] = []
+    for index in range(num_prompts):
+        target_ratio = ratios[index % len(ratios)]
+        world = world_input or sample_world_definition(seed=index, difficulty_target=target_ratio)
+        world_title = str(world["meta"]["title"])
+        prompt = [
+            {
+                "role": "system",
+                "content": format_hero_grpo_system_prompt(world_title, max_game_steps, max_tool_calls),
+            },
+            {
+                "role": "user",
+                "content": _HERO_TASK_PROMPTS[index % len(_HERO_TASK_PROMPTS)],
+            },
+        ]
+        rows.append(
+            {
+                "prompt": prompt,
+                "world_definition_json": json.dumps(world, separators=(",", ":")),
+                "seed": index,
+                "target_ratio": target_ratio,
+                "max_game_steps": max_game_steps,
+                "max_tool_calls": max_tool_calls,
+            }
+        )
+    return rows
+
+
+class HeroToolEnvironment:
+    def __init__(
+        self,
+        *,
+        artifacts_root: Path | None = None,
+        interface_provider: str | None = None,
+        interface_model: str | None = None,
+        interface_narrate: bool = False,
+        interface_translation_mode: str | None = None,
+    ) -> None:
+        self.artifacts_root = artifacts_root
+        self.interface_provider = interface_provider
+        self.interface_model = interface_model
+        self.interface_narrate = interface_narrate
+        self.interface_translation_mode = interface_translation_mode
+        self.hero_env: HeroEnvironment | None = None
+        self.last_message = ""
+
+    def reset(
+        self,
+        *,
+        world_definition_json: str,
+        seed: int | None = None,
+        max_game_steps: int = 40,
+        max_tool_calls: int = 80,
+        prompt: Any | None = None,
+        **_: Any,
+    ) -> str:
+        del prompt
+        interface_adapter = build_interface_adapter(
+            resolve_interface_config(
+                provider=self.interface_provider,  # type: ignore[arg-type]
+                model_name=self.interface_model,
+                narrate_observations=self.interface_narrate,
+                translation_mode=self.interface_translation_mode,  # type: ignore[arg-type]
+            )
+        )
+        self.hero_env = HeroEnvironment(
+            artifacts_root=self.artifacts_root,
+            interface_adapter=interface_adapter,
+        )
+        observation = self.hero_env.reset(
+            world_input=json.loads(world_definition_json),
+            seed=seed,
+            max_game_steps=max_game_steps,
+            max_tool_calls=max_tool_calls,
+        )
+        self.last_message = observation.message
+        return observation.message
+
+    def act(self, command: str) -> str:
+        """Act in the dungeon with one strict CLI command.
+
+        Args:
+            command: Lowercase parser-style dungeon command.
+
+        Returns:
+            The environment's next observation message.
+        """
+
+        return self._step({"tool": "act", "command": command})
+
+    def scratchpad_read(self) -> str:
+        """Read the current scratchpad contents.
+
+        Returns:
+            The scratchpad text.
+        """
+
+        return self._step({"tool": "scratchpad_read"})
+
+    def scratchpad_write(self, mode: str, content: str) -> str:
+        """Write to the scratchpad.
+
+        Args:
+            mode: Either append or replace.
+            content: Text to write.
+
+        Returns:
+            The environment's acknowledgement message.
+        """
+
+        return self._step({"tool": "scratchpad_write", "mode": mode, "content": content})
+
+    def _cumulative_reward(self) -> float:
+        if self.hero_env is None:
+            return -1.0
+        return float(self.hero_env.episode_stats.total_reward)
+
+    def _episode_done(self) -> bool:
+        if self.hero_env is None or self.hero_env.session is None:
+            return False
+        return bool(self.hero_env.session.done or self.hero_env.state.status in {"won", "lost", "timed_out"})
+
+    def _episode_won(self) -> bool:
+        if self.hero_env is None:
+            return False
+        return bool(self.hero_env.episode_stats.player_won)
+
+    def _step(self, action: dict[str, Any]) -> str:
+        if self.hero_env is None:
+            raise RuntimeError("HeroToolEnvironment.reset must be called before using tools.")
+        result = self.hero_env.step(action)
+        self.last_message = result.observation.message
+        return result.observation.message
+
+
+def dm_reward_function(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    target_ratio: list[float],
+    trainer_state: Any,
+    hero_policy: Any,
+    interface_provider: str | None = None,
+    interface_model: str | None = None,
+    interface_narrate: bool = False,
+    interface_translation_mode: str | None = None,
+    hero_max_game_steps: int = 40,
+    hero_max_tool_calls: int = 80,
+    artifacts_root: str | None = None,
+    **_: Any,
+) -> list[float]:
+    del prompts
+    rewards: list[float] = []
+    for index, (completion, requested_ratio) in enumerate(zip(completions, target_ratio, strict=True)):
+        metrics = _cached_dm_rollout_metrics(
+            completion=completion,
+            requested_ratio=requested_ratio,
+            trainer_state=trainer_state,
+            completion_index=index,
+            hero_policy=hero_policy,
+            interface_provider=interface_provider,
+            interface_model=interface_model,
+            interface_narrate=interface_narrate,
+            interface_translation_mode=interface_translation_mode,
+            hero_max_game_steps=hero_max_game_steps,
+            hero_max_tool_calls=hero_max_tool_calls,
+            artifacts_root=artifacts_root,
+        )
+        if metrics.compile_error is not None:
+            rewards.append(_compile_error_penalty(metrics.compile_error))
+            continue
+        rewards.append(metrics.reward)
+    return rewards
+
+
+def _dm_reward_artifacts_dir(
+    *,
+    artifacts_root: str | None,
+    trainer_state: Any,
+    completion_index: int,
+) -> Path | None:
+    if artifacts_root is None:
+        return None
+    step = getattr(trainer_state, "global_step", 0)
+    return Path(artifacts_root) / "dm_reward_rollouts" / f"step_{step:05d}" / f"sample_{completion_index:02d}"
+
+
+def dm_hero_success_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    target_ratio: list[float],
+    trainer_state: Any,
+    hero_policy: Any,
+    interface_provider: str | None = None,
+    interface_model: str | None = None,
+    interface_narrate: bool = False,
+    interface_translation_mode: str | None = None,
+    hero_max_game_steps: int = 40,
+    hero_max_tool_calls: int = 80,
+    artifacts_root: str | None = None,
+    **_: Any,
+) -> list[float]:
+    del prompts
+    rewards: list[float] = []
+    for index, (completion, requested_ratio) in enumerate(zip(completions, target_ratio, strict=True)):
+        metrics = _cached_dm_rollout_metrics(
+            completion=completion,
+            requested_ratio=requested_ratio,
+            trainer_state=trainer_state,
+            completion_index=index,
+            hero_policy=hero_policy,
+            interface_provider=interface_provider,
+            interface_model=interface_model,
+            interface_narrate=interface_narrate,
+            interface_translation_mode=interface_translation_mode,
+            hero_max_game_steps=hero_max_game_steps,
+            hero_max_tool_calls=hero_max_tool_calls,
+            artifacts_root=artifacts_root,
+        )
+        if metrics.compile_error is not None:
+            rewards.append(_compile_error_penalty(metrics.compile_error))
+            continue
+        rewards.append(float(metrics.hero_player_won))
+    return rewards
+
+
+def dm_hero_efficiency_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    target_ratio: list[float],
+    trainer_state: Any,
+    hero_policy: Any,
+    interface_provider: str | None = None,
+    interface_model: str | None = None,
+    interface_narrate: bool = False,
+    interface_translation_mode: str | None = None,
+    hero_max_game_steps: int = 40,
+    hero_max_tool_calls: int = 80,
+    artifacts_root: str | None = None,
+    **_: Any,
+) -> list[float]:
+    del prompts
+    rewards: list[float] = []
+    for index, (completion, requested_ratio) in enumerate(zip(completions, target_ratio, strict=True)):
+        metrics = _cached_dm_rollout_metrics(
+            completion=completion,
+            requested_ratio=requested_ratio,
+            trainer_state=trainer_state,
+            completion_index=index,
+            hero_policy=hero_policy,
+            interface_provider=interface_provider,
+            interface_model=interface_model,
+            interface_narrate=interface_narrate,
+            interface_translation_mode=interface_translation_mode,
+            hero_max_game_steps=hero_max_game_steps,
+            hero_max_tool_calls=hero_max_tool_calls,
+            artifacts_root=artifacts_root,
+        )
+        if metrics.compile_error is not None:
+            rewards.append(_compile_error_penalty(metrics.compile_error))
+            continue
+        if not metrics.hero_player_won:
+            rewards.append(0.0)
+            continue
+        rewards.append(_clamp(metrics.efficiency_score, 0.0, 1.0))
+    return rewards
+
+
+def dm_hero_cleanliness_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    target_ratio: list[float],
+    trainer_state: Any,
+    hero_policy: Any,
+    interface_provider: str | None = None,
+    interface_model: str | None = None,
+    interface_narrate: bool = False,
+    interface_translation_mode: str | None = None,
+    hero_max_game_steps: int = 40,
+    hero_max_tool_calls: int = 80,
+    artifacts_root: str | None = None,
+    **_: Any,
+) -> list[float]:
+    del prompts
+    rewards: list[float] = []
+    for index, (completion, requested_ratio) in enumerate(zip(completions, target_ratio, strict=True)):
+        metrics = _cached_dm_rollout_metrics(
+            completion=completion,
+            requested_ratio=requested_ratio,
+            trainer_state=trainer_state,
+            completion_index=index,
+            hero_policy=hero_policy,
+            interface_provider=interface_provider,
+            interface_model=interface_model,
+            interface_narrate=interface_narrate,
+            interface_translation_mode=interface_translation_mode,
+            hero_max_game_steps=hero_max_game_steps,
+            hero_max_tool_calls=hero_max_tool_calls,
+            artifacts_root=artifacts_root,
+        )
+        if metrics.compile_error is not None:
+            rewards.append(_compile_error_penalty(metrics.compile_error))
+            continue
+        step_budget = max(1, metrics.hero_steps_taken or metrics.steps_taken or 0)
+        penalty = (metrics.invalid_command_count + (2 * metrics.wrong_submit_count)) / step_budget
+        score = max(0.0, 1.0 - penalty)
+        if metrics.hero_policy_error is not None:
+            score = min(score, 0.25)
+        rewards.append(_clamp(score, 0.0, 1.0))
+    return rewards
+
+
+def dm_json_format_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, trainer_state
+    rewards: list[float] = []
+    for completion in completions:
+        text = _completion_text(completion)
+        score = 0.0
+        json_text, leading_text, trailing_text = _extract_json_candidate_parts(text)
+        if json_text is None:
+            if "{" in text:
+                score += 0.05
+            if "<think>" in text:
+                score -= 0.10
+            rewards.append(_clamp(score, -0.25, 1.0))
+            continue
+
+        try:
+            json.loads(json_text)
+            score += 0.60
+        except Exception:
+            score += 0.20
+
+        outer_text = (leading_text + trailing_text).strip()
+        if not outer_text:
+            score += 0.25
+        else:
+            ratio = len(json_text) / max(1, len(_strip_code_fences(text).strip()))
+            score += 0.15 * ratio
+        score += 0.10 * _compactness_score(len(json_text), 4500)
+
+        if "<think>" in text:
+            score -= 0.15
+        if "```" in text:
+            score -= 0.05
+        rewards.append(_clamp(score, -0.25, 1.0))
+    return rewards
+
+
+def dm_schema_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    target_ratio: list[float] | None = None,
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, trainer_state
+    target_ratio = target_ratio or [None] * len(completions)
+    rewards: list[float] = []
+    for completion, requested_ratio in zip(completions, target_ratio, strict=True):
+        payload = _try_parse_completion_json(_completion_text(completion))
+        if not isinstance(payload, dict):
+            rewards.append(0.0)
+            continue
+        rewards.append(_dm_structural_prior_score(payload, requested_ratio))
+    return rewards
+
+
+def dm_validation_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, trainer_state
+    rewards: list[float] = []
+    for completion in completions:
+        payload = _try_parse_completion_json(_completion_text(completion))
+        if not isinstance(payload, dict):
+            rewards.append(0.0)
+            continue
+        try:
+            WorldDefinition.model_validate(payload)
+            rewards.append(1.0)
+        except Exception as exc:
+            error_list = exc.errors() if hasattr(exc, "errors") else []
+            rewards.append(_validation_error_score(error_list))
+    return rewards
+
+
+def dm_compile_prior_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, trainer_state
+    rewards: list[float] = []
+    for completion in completions:
+        try:
+            world = _load_dm_world_definition(_completion_text(completion), allow_repair=True)
+        except Exception as exc:
+            rewards.append(_compile_error_penalty(str(exc)))
+            continue
+        try:
+            validate_and_normalize(world)
+            rewards.append(1.0)
+        except Exception as exc:
+            rewards.append(_compile_error_penalty(str(exc)))
+    return rewards
+
+
+def _bind_dm_reward_function(
+    *,
+    artifacts_root: str | None,
+    hero_policy: Any,
+    interface_provider: str | None,
+    interface_model: str | None,
+    interface_narrate: bool,
+    interface_translation_mode: str | None = None,
+    hero_max_game_steps: int,
+    hero_max_tool_calls: int,
+) -> Any:
+    return _bind_dm_rollout_reward(
+        dm_reward_function,
+        artifacts_root=artifacts_root,
+        hero_policy=hero_policy,
+        interface_provider=interface_provider,
+        interface_model=interface_model,
+        interface_narrate=interface_narrate,
+        interface_translation_mode=interface_translation_mode,
+        hero_max_game_steps=hero_max_game_steps,
+        hero_max_tool_calls=hero_max_tool_calls,
+    )
+
+
+def _bind_dm_rollout_reward(
+    reward_impl: Callable[..., list[float]],
+    *,
+    artifacts_root: str | None,
+    hero_policy: Any,
+    interface_provider: str | None,
+    interface_model: str | None,
+    interface_narrate: bool,
+    interface_translation_mode: str | None = None,
+    hero_max_game_steps: int,
+    hero_max_tool_calls: int,
+) -> Any:
+    def reward_func(**kwargs: Any) -> list[float]:
+        return reward_impl(
+            artifacts_root=artifacts_root,
+            hero_policy=hero_policy,
+            interface_provider=interface_provider,
+            interface_model=interface_model,
+            interface_narrate=interface_narrate,
+            interface_translation_mode=interface_translation_mode,
+            hero_max_game_steps=hero_max_game_steps,
+            hero_max_tool_calls=hero_max_tool_calls,
+            **kwargs,
+        )
+
+    reward_func.__name__ = reward_impl.__name__
+    return reward_func
+
+
+def _make_named_overlong_reward(*, name: str, max_completion_len: int) -> Callable[..., list[float]] | None:
+    if get_soft_overlong_punishment is None:
+        return None
+    soft_punish_cache = max(16, min(64, max_completion_len // 4))
+    reward_func = get_soft_overlong_punishment(max_completion_len=max_completion_len, soft_punish_cache=soft_punish_cache)
+    reward_func.__name__ = name
+    return reward_func
+
+
+def _canonicalize_qwen_chat_template(tokenizer: Any) -> Any:
+    chat_template = getattr(tokenizer, "chat_template", "") or ""
+    if qwen3_chat_template is None:
+        return tokenizer
+    if "<|im_start|>" not in chat_template or "<|im_end|>" not in chat_template:
+        return tokenizer
+    tokenizer.chat_template = qwen3_chat_template
+    return tokenizer
+
+
+def _chat_template_kwargs(tokenizer: Any) -> dict[str, Any] | None:
+    if not hasattr(tokenizer, "apply_chat_template"):
+        return None
+    try:
+        tokenizer.apply_chat_template(
+            [{"role": "user", "content": "ping"}],
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=False,
+        )
+    except Exception:
+        return None
+    return {"enable_thinking": False}
+
+
+def _ensure_tool_response_schema(tokenizer: Any) -> Any:
+    tokenizer = _canonicalize_qwen_chat_template(tokenizer)
+    chat_template = getattr(tokenizer, "chat_template", "") or ""
+    if qwen3_chat_template is None or qwen3_schema is None:
+        return tokenizer
+    if not hasattr(tokenizer, "parse_response"):
+        return tokenizer
+    if "<tool_call>" not in chat_template or "<|im_start|>" not in chat_template:
+        return tokenizer
+
+    tokenizer.chat_template = qwen3_chat_template
+    if getattr(tokenizer, "response_schema", None) is not None:
+        return tokenizer
+    tokenizer.response_schema = qwen3_schema
+    return tokenizer
+
+
+def hero_tool_format_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, trainer_state
+    rewards: list[float] = []
+    for completion in completions:
+        text = _completion_text(completion)
+        tool_calls = _completion_tool_calls(completion)
+        score = 0.0
+
+        if len(tool_calls) == 1:
+            call = tool_calls[0]
+            score += 0.65 if call["source"] == "tool_call" else 0.30
+            if call["name"] in _HERO_TOOL_NAMES:
+                score += 0.15
+            outer_text = _normalize_outer_completion_text(text)
+            if not outer_text:
+                score += 0.15
+            else:
+                score += 0.10 * (1.0 - min(1.0, len(outer_text) / max(1, len(text.strip()))))
+        elif len(tool_calls) > 1:
+            score += 0.20
+            if all(call["name"] in _HERO_TOOL_NAMES for call in tool_calls):
+                score += 0.05
+        else:
+            if "<tool_call>" in text:
+                score += 0.05
+            elif '{"action"' in text.replace(" ", ""):
+                score += 0.10
+
+        if "<think>" in text:
+            score -= 0.15
+        if "```" in text:
+            score -= 0.05
+        rewards.append(_clamp(score, -0.25, 1.0))
+    return rewards
+
+
+def hero_action_semantics_reward(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, trainer_state
+    rewards: list[float] = []
+    for completion in completions:
+        tool_calls = _completion_tool_calls(completion)
+        if len(tool_calls) != 1:
+            rewards.append(0.10 if len(tool_calls) > 1 else 0.0)
+            continue
+
+        tool_call = tool_calls[0]
+        tool_name = tool_call["name"]
+        arguments = tool_call["arguments"]
+        if tool_name == "act":
+            reward = _hero_act_semantics_reward(arguments)
+        elif tool_name == "scratchpad_read":
+            reward = 1.0 if not arguments else 0.80
+        elif tool_name == "scratchpad_write":
+            reward = _hero_scratchpad_write_reward(arguments)
+        else:
+            reward = -0.25
+
+        if tool_call["source"] != "tool_call":
+            reward *= 0.85
+        rewards.append(_clamp(reward, -0.25, 1.0))
+    return rewards
+
+
+def hero_reward_function(
+    *,
+    prompts: list[Any],
+    completions: list[Any],
+    environments: list[HeroToolEnvironment],
+    trainer_state: Any,
+    **_: Any,
+) -> list[float]:
+    del prompts, completions, trainer_state
+    rewards: list[float] = []
+    for environment in environments:
+        reward = environment._cumulative_reward()
+        if not environment._episode_done():
+            reward -= 0.05
+        rewards.append(reward)
+    return rewards
+
+
+def create_dm_grpo_trainer(
+    config: GRPOLaunchConfig,
+    *,
+    target_ratios: list[float] | None = None,
+    artifacts_root: Path | None = None,
+    closed_loop: DMClosedLoopConfig | None = None,
+):
+    _require_training_dependencies()
+    closed_loop = closed_loop or DMClosedLoopConfig()
+    rows = build_dm_grpo_dataset(num_prompts=config.num_prompts, target_ratios=target_ratios)
+    dataset = Dataset.from_list(rows)
+    tokenizer = AutoTokenizer.from_pretrained(config.model_name, trust_remote_code=config.trust_remote_code)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    tokenizer = _canonicalize_qwen_chat_template(tokenizer)
+    chat_template_kwargs = _chat_template_kwargs(tokenizer)
+    hero_client_config = resolve_structured_client_config(
+        "hero",
+        provider=closed_loop.hero_provider,  # type: ignore[arg-type]
+        model_name=closed_loop.hero_model,
+        adapter_path=closed_loop.hero_adapter_path,
+    )
+    hero_policy = HeroLLMPolicy(
+        create_structured_client(hero_client_config),
+        model_name=hero_client_config.model_name,
+    )
+    reward_funcs: list[Any] = [
+        dm_json_format_reward,
+        dm_schema_reward,
+        dm_validation_reward,
+        dm_compile_prior_reward,
+        _bind_dm_rollout_reward(
+            dm_hero_success_reward,
+            artifacts_root=str(artifacts_root) if artifacts_root is not None else None,
+            hero_policy=hero_policy,
+            interface_provider=closed_loop.interface_provider,
+            interface_model=closed_loop.interface_model,
+            interface_narrate=closed_loop.interface_narrate,
+            interface_translation_mode=closed_loop.interface_translation_mode,
+            hero_max_game_steps=closed_loop.hero_max_game_steps,
+            hero_max_tool_calls=closed_loop.hero_max_tool_calls,
+        ),
+        _bind_dm_rollout_reward(
+            dm_hero_efficiency_reward,
+            artifacts_root=str(artifacts_root) if artifacts_root is not None else None,
+            hero_policy=hero_policy,
+            interface_provider=closed_loop.interface_provider,
+            interface_model=closed_loop.interface_model,
+            interface_narrate=closed_loop.interface_narrate,
+            interface_translation_mode=closed_loop.interface_translation_mode,
+            hero_max_game_steps=closed_loop.hero_max_game_steps,
+            hero_max_tool_calls=closed_loop.hero_max_tool_calls,
+        ),
+        _bind_dm_rollout_reward(
+            dm_hero_cleanliness_reward,
+            artifacts_root=str(artifacts_root) if artifacts_root is not None else None,
+            hero_policy=hero_policy,
+            interface_provider=closed_loop.interface_provider,
+            interface_model=closed_loop.interface_model,
+            interface_narrate=closed_loop.interface_narrate,
+            interface_translation_mode=closed_loop.interface_translation_mode,
+            hero_max_game_steps=closed_loop.hero_max_game_steps,
+            hero_max_tool_calls=closed_loop.hero_max_tool_calls,
+        ),
+        _bind_dm_reward_function(
+            artifacts_root=str(artifacts_root) if artifacts_root is not None else None,
+            hero_policy=hero_policy,
+            interface_provider=closed_loop.interface_provider,
+            interface_model=closed_loop.interface_model,
+            interface_narrate=closed_loop.interface_narrate,
+            interface_translation_mode=closed_loop.interface_translation_mode,
+            hero_max_game_steps=closed_loop.hero_max_game_steps,
+            hero_max_tool_calls=closed_loop.hero_max_tool_calls,
+        ),
+    ]
+    reward_weights = [0.25, 0.20, 0.50, 0.45, 0.0, 0.0, 0.0, 1.0]
+    overlong_reward = _make_named_overlong_reward(name="dm_overlong_reward", max_completion_len=config.max_completion_length)
+    if overlong_reward is not None:
+        reward_funcs.append(overlong_reward)
+        reward_weights.append(0.15)
+    model, peft_config, include_model_init_kwargs = _build_trainable_model(config)
+    return GRPOTrainer(
+        model=model,
+        reward_funcs=reward_funcs,
+        args=_build_grpo_config(
+            config,
+            max_tool_calling_iterations=None,
+            chat_template_kwargs=chat_template_kwargs,
+            reward_weights=reward_weights,
+            include_model_init_kwargs=include_model_init_kwargs,
+        ),
+        train_dataset=dataset,
+        processing_class=tokenizer,
+        peft_config=peft_config,
+    )
+
+
+def create_hero_grpo_trainer(
+    config: GRPOLaunchConfig,
+    *,
+    world_input: dict[str, Any] | None = None,
+    artifacts_root: Path | None = None,
+    interface_provider: str | None = None,
+    interface_model: str | None = None,
+    interface_narrate: bool = False,
+    interface_translation_mode: str | None = None,
+    max_game_steps: int = 40,
+    max_tool_calls: int = 80,
+    max_tool_calling_iterations: int = 32,
+):
+    _require_training_dependencies()
+    rows = build_hero_grpo_dataset(
+        num_prompts=config.num_prompts,
+        world_input=world_input,
+        max_game_steps=max_game_steps,
+        max_tool_calls=max_tool_calls,
+    )
+    dataset = Dataset.from_list(rows)
+    tokenizer = AutoTokenizer.from_pretrained(config.model_name, trust_remote_code=config.trust_remote_code)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    tokenizer = _ensure_tool_response_schema(tokenizer)
+    chat_template_kwargs = _chat_template_kwargs(tokenizer)
+    environment_factory = lambda: HeroToolEnvironment(
+        artifacts_root=artifacts_root,
+        interface_provider=interface_provider,
+        interface_model=interface_model,
+        interface_narrate=interface_narrate,
+        interface_translation_mode=interface_translation_mode,
+    )
+    reward_funcs: list[Any] = [
+        hero_tool_format_reward,
+        hero_action_semantics_reward,
+        hero_reward_function,
+    ]
+    reward_weights = [0.40, 0.30, 1.0]
+    overlong_reward = _make_named_overlong_reward(
+        name="hero_overlong_reward",
+        max_completion_len=config.max_completion_length,
+    )
+    if overlong_reward is not None:
+        reward_funcs.append(overlong_reward)
+        reward_weights.append(0.15)
+    model, peft_config, include_model_init_kwargs = _build_trainable_model(config)
+    return GRPOTrainer(
+        model=model,
+        reward_funcs=reward_funcs,
+        args=_build_grpo_config(
+            config,
+            max_tool_calling_iterations=max_tool_calling_iterations,
+            chat_template_kwargs=chat_template_kwargs,
+            reward_weights=reward_weights,
+            include_model_init_kwargs=include_model_init_kwargs,
+        ),
+        train_dataset=dataset,
+        processing_class=tokenizer,
+        peft_config=peft_config,
+        environment_factory=environment_factory,
+    )
+
+
+def run_dm_grpo(
+    config: GRPOLaunchConfig,
+    *,
+    target_ratios: list[float] | None = None,
+    artifacts_root: Path | None = None,
+    closed_loop: DMClosedLoopConfig | None = None,
+) -> Path:
+    trainer = create_dm_grpo_trainer(
+        config,
+        target_ratios=target_ratios,
+        artifacts_root=artifacts_root,
+        closed_loop=closed_loop,
+    )
+    trainer.train()
+    trainer.save_model()
+    return config.output_dir
+
+
+def run_hero_grpo(
+    config: GRPOLaunchConfig,
+    *,
+    world_path: Path | None = None,
+    artifacts_root: Path | None = None,
+    interface_provider: str | None = None,
+    interface_model: str | None = None,
+    interface_narrate: bool = False,
+    interface_translation_mode: str | None = None,
+    max_game_steps: int = 40,
+    max_tool_calls: int = 80,
+    max_tool_calling_iterations: int = 32,
+) -> Path:
+    world_input = load_world(str(world_path)) if world_path is not None else None
+    trainer = create_hero_grpo_trainer(
+        config,
+        world_input=world_input,
+        artifacts_root=artifacts_root,
+        interface_provider=interface_provider,
+        interface_model=interface_model,
+        interface_narrate=interface_narrate,
+        interface_translation_mode=interface_translation_mode,
+        max_game_steps=max_game_steps,
+        max_tool_calls=max_tool_calls,
+        max_tool_calling_iterations=max_tool_calling_iterations,
+    )
+    trainer.train()
+    trainer.save_model()
+    return config.output_dir
+
+
+def _build_lora_config(config: GRPOLaunchConfig):
+    _require_training_dependencies()
+    return LoraConfig(
+        r=config.rank,
+        lora_alpha=config.alpha,
+        lora_dropout=config.dropout,
+        bias="none",
+        task_type="CAUSAL_LM",
+        target_modules="all-linear",
+    )
+
+
+def _build_trainable_model(config: GRPOLaunchConfig) -> tuple[Any, Any | None, bool]:
+    _require_training_dependencies()
+    if config.resume_adapter_path is None:
+        return config.model_name, _build_lora_config(config), True
+
+    from peft import PeftModel
+    from transformers import AutoModelForCausalLM
+
+    adapter_path = Path(config.resume_adapter_path)
+    if not adapter_path.exists():
+        raise FileNotFoundError(f"resume_adapter_path does not exist: {adapter_path}")
+
+    model = AutoModelForCausalLM.from_pretrained(
+        config.model_name,
+        cache_dir=os.getenv("HF_HOME"),
+        token=os.getenv("HF_TOKEN"),
+        **_model_init_kwargs(config),
+    )
+    model = PeftModel.from_pretrained(model, str(adapter_path), is_trainable=True)
+    model.train()
+    return model, None, False
+
+
+def _build_grpo_config(
+    config: GRPOLaunchConfig,
+    *,
+    max_tool_calling_iterations: int | None,
+    chat_template_kwargs: dict[str, Any] | None,
+    reward_weights: list[float] | None,
+    include_model_init_kwargs: bool = True,
+):
+    _require_training_dependencies()
+    _require_vllm_if_requested(config)
+    config.output_dir.mkdir(parents=True, exist_ok=True)
+    report_to = ["wandb"] if config.use_wandb else []
+    model_init_kwargs = _model_init_kwargs(config) if include_model_init_kwargs else None
+    return GRPOConfig(
+        output_dir=str(config.output_dir),
+        run_name=config.run_name,
+        report_to=report_to,
+        max_steps=config.max_steps,
+        learning_rate=config.learning_rate,
+        per_device_train_batch_size=config.per_device_train_batch_size,
+        gradient_accumulation_steps=config.gradient_accumulation_steps,
+        num_generations=config.num_generations,
+        max_completion_length=config.max_completion_length,
+        temperature=config.temperature,
+        top_p=config.top_p,
+        top_k=config.top_k,
+        min_p=config.min_p,
+        repetition_penalty=config.repetition_penalty,
+        logging_steps=config.logging_steps,
+        save_steps=config.save_steps,
+        seed=config.seed,
+        bf16=torch.cuda.is_available(),
+        gradient_checkpointing=True,
+        remove_unused_columns=False,
+        loss_type=config.loss_type,
+        importance_sampling_level=config.importance_sampling_level,
+        use_transformers_paged=config.use_transformers_paged,
+        cache_implementation=config.cache_implementation,
+        use_vllm=config.use_vllm,
+        vllm_mode=config.vllm_mode,
+        vllm_gpu_memory_utilization=config.vllm_gpu_memory_utilization,
+        vllm_enable_sleep_mode=config.vllm_enable_sleep_mode,
+        log_completions=True,
+        log_unique_prompts=True,
+        num_completions_to_print=1,
+        max_tool_calling_iterations=max_tool_calling_iterations,
+        chat_template_kwargs=chat_template_kwargs,
+        reward_weights=reward_weights,
+        mask_truncated_completions=True,
+        model_init_kwargs=model_init_kwargs,
+    )
+
+
+def _cached_dm_rollout_metrics(
+    *,
+    completion: Any,
+    requested_ratio: float,
+    trainer_state: Any,
+    completion_index: int,
+    hero_policy: Any,
+    interface_provider: str | None,
+    interface_model: str | None,
+    interface_narrate: bool,
+    interface_translation_mode: str | None,
+    hero_max_game_steps: int,
+    hero_max_tool_calls: int,
+    artifacts_root: str | None,
+) -> DMRolloutMetrics:
+    global _DM_ROLLOUT_CACHE_STEP, _DM_ROLLOUT_CACHE
+    step = int(getattr(trainer_state, "global_step", 0) or 0)
+    if step != _DM_ROLLOUT_CACHE_STEP:
+        _DM_ROLLOUT_CACHE_STEP = step
+        _DM_ROLLOUT_CACHE = {}
+
+    completion_text = _completion_text(completion)
+    key = (
+        step,
+        completion_index,
+        requested_ratio,
+        hashlib.sha1(completion_text.encode("utf-8")).hexdigest(),
+        id(hero_policy),
+        interface_provider,
+        interface_model,
+        interface_narrate,
+        interface_translation_mode,
+        hero_max_game_steps,
+        hero_max_tool_calls,
+        artifacts_root,
+    )
+    cached = _DM_ROLLOUT_CACHE.get(key)
+    if cached is not None:
+        return cached
+
+    metrics = _evaluate_dm_rollout(
+        completion_text=completion_text,
+        requested_ratio=requested_ratio,
+        trainer_state=trainer_state,
+        completion_index=completion_index,
+        hero_policy=hero_policy,
+        interface_provider=interface_provider,
+        interface_model=interface_model,
+        interface_narrate=interface_narrate,
+        interface_translation_mode=interface_translation_mode,
+        hero_max_game_steps=hero_max_game_steps,
+        hero_max_tool_calls=hero_max_tool_calls,
+        artifacts_root=artifacts_root,
+    )
+    _DM_ROLLOUT_CACHE[key] = metrics
+    return metrics
+
+
+def _evaluate_dm_rollout(
+    *,
+    completion_text: str,
+    requested_ratio: float,
+    trainer_state: Any,
+    completion_index: int,
+    hero_policy: Any,
+    interface_provider: str | None,
+    interface_model: str | None,
+    interface_narrate: bool,
+    interface_translation_mode: str | None,
+    hero_max_game_steps: int,
+    hero_max_tool_calls: int,
+    artifacts_root: str | None,
+) -> DMRolloutMetrics:
+    try:
+        world = _load_dm_world_definition(completion_text, allow_repair=True)
+    except Exception as exc:
+        return DMRolloutMetrics(
+            reward=_compile_error_penalty(str(exc)),
+            compile_error=str(exc),
+            requested_ratio=requested_ratio,
+            player_won=False,
+            steps_taken=None,
+            min_steps=None,
+            ratio=None,
+            efficiency_score=0.0,
+            quality_score=0.0,
+            invalid_command_count=0,
+            wrong_submit_count=0,
+            hero_player_won=False,
+            hero_total_reward=0.0,
+            hero_dense_return=0.0,
+            hero_steps_taken=0,
+            hero_tool_calls_total=0,
+            hero_policy_error=None,
+        )
+
+    interface_adapter = build_interface_adapter(
+        resolve_interface_config(
+            provider=interface_provider,  # type: ignore[arg-type]
+            model_name=interface_model,
+            narrate_observations=interface_narrate,
+            translation_mode=interface_translation_mode,  # type: ignore[arg-type]
+        )
+    )
+    env = DMEnvironment(
+        artifacts_root=_dm_reward_artifacts_dir(
+            artifacts_root=artifacts_root,
+            trainer_state=trainer_state,
+            completion_index=completion_index,
+        ),
+        interface_adapter=interface_adapter,
+    )
+    runner = HeroRunner(
+        policy=hero_policy,
+        max_game_steps=hero_max_game_steps,
+        max_tool_calls=hero_max_tool_calls,
+    )
+    try:
+        env.reset(difficulty_hint=requested_ratio)
+        result = env.step(world, runner=runner)
+        observation = result.observation
+        reward = float(observation.reward or 0.0)
+        if observation.compile_error is not None:
+            reward = _compile_error_penalty(observation.compile_error)
+        elif abs(world.meta.difficulty_target - requested_ratio) > 1e-6:
+            reward -= 0.25
+        feedback = observation.feedback
+        breakdown = observation.reward_breakdown
+        hero_stats = runner.episode_stats
+        return DMRolloutMetrics(
+            reward=max(-1.0, reward),
+            compile_error=observation.compile_error,
+            requested_ratio=requested_ratio,
+            player_won=bool(observation.player_won),
+            steps_taken=observation.steps_taken,
+            min_steps=observation.min_steps,
+            ratio=observation.ratio,
+            efficiency_score=0.0 if breakdown is None or breakdown.efficiency_score is None else float(breakdown.efficiency_score),
+            quality_score=0.0 if breakdown is None else float(breakdown.quality_score),
+            invalid_command_count=0 if feedback is None else int(feedback.invalid_command_count),
+            wrong_submit_count=0 if feedback is None else int(feedback.wrong_submit_count),
+            hero_player_won=bool(observation.player_won) if hero_stats is None else bool(hero_stats.player_won),
+            hero_total_reward=0.0 if hero_stats is None else float(hero_stats.total_reward),
+            hero_dense_return=0.0 if hero_stats is None else float(hero_stats.dense_return),
+            hero_steps_taken=0 if hero_stats is None else int(hero_stats.steps_taken),
+            hero_tool_calls_total=0 if hero_stats is None else int(hero_stats.tool_calls_total),
+            hero_policy_error=runner.last_error,
+        )
+    except Exception as exc:
+        return DMRolloutMetrics(
+            reward=_compile_error_penalty(str(exc)),
+            compile_error=str(exc),
+            requested_ratio=requested_ratio,
+            player_won=False,
+            steps_taken=None,
+            min_steps=None,
+            ratio=None,
+            efficiency_score=0.0,
+            quality_score=0.0,
+            invalid_command_count=0,
+            wrong_submit_count=0,
+            hero_player_won=False,
+            hero_total_reward=0.0,
+            hero_dense_return=0.0,
+            hero_steps_taken=0,
+            hero_tool_calls_total=0,
+            hero_policy_error=runner.last_error,
+        )
+
+
+def _model_init_kwargs(config: GRPOLaunchConfig) -> dict[str, Any]:
+    model_init_kwargs: dict[str, Any] = {
+        "trust_remote_code": config.trust_remote_code,
+    }
+    quantization_config = _build_quantization_config(config)
+    if quantization_config is not None:
+        model_init_kwargs["quantization_config"] = quantization_config
+    if torch.cuda.is_available():
+        model_init_kwargs["torch_dtype"] = torch.bfloat16
+    return model_init_kwargs
+
+
+def _build_quantization_config(config: GRPOLaunchConfig):
+    _require_training_dependencies()
+    if not config.load_in_4bit or not torch.cuda.is_available():
+        return None
+    return BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.bfloat16,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True,
+    )
+
+
+def _completion_text(completion: Any) -> str:
+    if isinstance(completion, str):
+        return completion
+    if isinstance(completion, list):
+        parts: list[str] = []
+        for message in completion:
+            if isinstance(message, dict) and message.get("role") == "assistant":
+                content = message.get("content")
+                if isinstance(content, str):
+                    parts.append(content)
+        return "\n".join(parts)
+    return str(completion)
+
+
+def _extract_json_object(text: str) -> str:
+    json_text, _, _ = _extract_json_candidate_parts(text)
+    if json_text is None:
+        raise ValueError("Completion did not contain a JSON object.")
+    return json_text
+
+
+def _extract_json_candidate_parts(text: str) -> tuple[str | None, str, str]:
+    cleaned = _strip_code_fences(text).strip()
+    span = _find_json_object_span(cleaned)
+    if span is None:
+        return None, cleaned, ""
+    start, end = span
+    return cleaned[start:end], cleaned[:start], cleaned[end:]
+
+
+def _try_parse_completion_json(text: str) -> Any | None:
+    json_text, _, _ = _extract_json_candidate_parts(text)
+    if json_text is None:
+        return None
+    try:
+        return json.loads(json_text)
+    except Exception:
+        return None
+
+
+def _repair_dm_candidate_payload(payload: Any) -> Any:
+    if isinstance(payload, list):
+        return [_repair_dm_candidate_payload(item) for item in payload]
+    if not isinstance(payload, dict):
+        return payload
+
+    node_type = payload.get("type")
+    repaired: dict[str, Any] = {}
+    for key, value in payload.items():
+        normalized_key = "requires_step_ids" if key == "requires_step_id" else key
+        repaired[normalized_key] = _repair_dm_candidate_payload(value)
+
+    requires_step_ids = repaired.get("requires_step_ids")
+    if requires_step_ids is None and "requires_step_ids" in repaired:
+        repaired["requires_step_ids"] = []
+    elif isinstance(requires_step_ids, str):
+        repaired["requires_step_ids"] = [requires_step_ids]
+
+    if "open" not in repaired and "is_open" in repaired:
+        repaired["open"] = repaired.pop("is_open")
+    if "locked" not in repaired and "is_locked" in repaired:
+        repaired["locked"] = repaired.pop("is_locked")
+    if node_type in {"container", "door"}:
+        closed = repaired.pop("closed", None)
+        if isinstance(closed, bool) and "open" not in repaired:
+            repaired["open"] = not closed
+    if node_type == "fixture":
+        if "reveals_item_id" not in repaired and "reveal_item_id" in repaired:
+            repaired["reveals_item_id"] = repaired.pop("reveal_item_id")
+        if "reveals_readable_id" not in repaired and "reveal_readable_id" in repaired:
+            repaired["reveals_readable_id"] = repaired.pop("reveal_readable_id")
+    if node_type == "npc":
+        if "requires_item_id" not in repaired and "trade_requires_item_id" in repaired:
+            repaired["requires_item_id"] = repaired.pop("trade_requires_item_id")
+        if "gives_item_id" not in repaired and "trade_item_id" in repaired:
+            repaired["gives_item_id"] = repaired.pop("trade_item_id")
+        if "gives_clue_id" not in repaired and "trade_clue_id" in repaired:
+            repaired["gives_clue_id"] = repaired.pop("trade_clue_id")
+
+    if "subtype" not in repaired and repaired.get("type") in _DM_ALLOWED_ITEM_TYPES and "start_node_id" in repaired:
+        repaired["subtype"] = repaired.pop("type")
+    if "id" not in repaired and "clue_id" in repaired and "text" in repaired:
+        repaired["id"] = repaired.pop("clue_id")
+    if "input_item_ids" not in repaired and "input_item_a_id" in repaired and "input_item_b_id" in repaired:
+        repaired["input_item_ids"] = [repaired.pop("input_item_a_id"), repaired.pop("input_item_b_id")]
+    if node_type == "container":
+        repaired.pop("contains_items", None)
+    if "output_item_id" in repaired and (
+        "input_item_ids" in repaired or ("input_item_a_id" in repaired and "input_item_b_id" in repaired)
+    ):
+        repaired.pop("label", None)
+        repaired.pop("description", None)
+
+    if node_type in {"location", "junction", "door"}:
+        repaired.pop("parent_id", None)
+    return repaired
+
+
+def _repair_dm_world_payload(payload: dict[str, Any]) -> dict[str, Any]:
+    repaired = _repair_dm_candidate_payload(payload)
+    if not isinstance(repaired, dict):
+        return payload
+
+    meta = repaired.get("meta")
+    if not isinstance(meta, dict):
+        meta = {}
+    else:
+        meta = dict(meta)
+
+    title = meta.get("title")
+    if not isinstance(title, str) or not title.strip():
+        meta["title"] = _infer_dm_world_title(repaired)
+
+    start_node_id = meta.get("start_node_id")
+    if not isinstance(start_node_id, str) or not start_node_id:
+        inferred_start = _infer_dm_start_node_id(repaired.get("nodes"))
+        if inferred_start is not None:
+            meta["start_node_id"] = inferred_start
+
+    win_condition = meta.get("win_condition")
+    if not isinstance(win_condition, dict):
+        win_condition = {}
+    else:
+        win_condition = dict(win_condition)
+    if not isinstance(win_condition.get("type"), str) or not win_condition.get("type"):
+        win_condition["type"] = "deduce"
+    if not isinstance(win_condition.get("target_npc_id"), str) or not win_condition.get("target_npc_id"):
+        inferred_guardian = _infer_dm_guardian_npc_id(repaired)
+        if inferred_guardian is not None:
+            win_condition["target_npc_id"] = inferred_guardian
+    if not isinstance(win_condition.get("answer_string"), str) or not win_condition.get("answer_string"):
+        inferred_answer = _infer_dm_answer_string(repaired.get("quest_chain"))
+        if inferred_answer:
+            win_condition["answer_string"] = inferred_answer
+    if win_condition:
+        meta["win_condition"] = win_condition
+
+    _repair_guardian_trade_fields(repaired, guardian_id=win_condition.get("target_npc_id"))
+    _repair_submit_actions(repaired)
+    _repair_door_lock_keys_from_edges(repaired)
+    _repair_missing_item_references(repaired)
+    _repair_produced_item_placements(repaired, default_start_node_id=meta.get("start_node_id"))
+    _repair_required_key_item_subtypes(repaired)
+    _repair_duplicate_recipe_ids(repaired)
+    _repair_guardian_room_access(repaired, guardian_id=win_condition.get("target_npc_id"), start_node_id=meta.get("start_node_id"))
+    _repair_missing_readable_clue_ids(repaired)
+    _repair_missing_clue_sources(repaired, guardian_id=win_condition.get("target_npc_id"))
+    _repair_take_action_aliases(repaired)
+    _repair_take_sources_from_room_prereqs(repaired)
+    _repair_locked_room_entry_steps(repaired)
+    _repair_missing_take_steps(repaired)
+    _repair_guardian_ending(
+        repaired,
+        guardian_id=win_condition.get("target_npc_id"),
+        answer_string=win_condition.get("answer_string"),
+    )
+    _repair_guardian_room_access(repaired, guardian_id=win_condition.get("target_npc_id"), start_node_id=meta.get("start_node_id"))
+    repaired["meta"] = meta
+    return repaired
+
+
+def _infer_dm_world_title(payload: dict[str, Any]) -> str:
+    meta = payload.get("meta")
+    if isinstance(meta, dict):
+        for key in ("name", "world_name"):
+            value = meta.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+    nodes = payload.get("nodes")
+    if isinstance(nodes, list):
+        for node in nodes:
+            if not isinstance(node, dict) or node.get("type") not in {"location", "junction"}:
+                continue
+            label = node.get("label")
+            if isinstance(label, str) and label.strip():
+                return f"The {label.strip()}"
+    return "The Hidden Vault"
+
+
+def _infer_dm_start_node_id(nodes: Any) -> str | None:
+    if not isinstance(nodes, list):
+        return None
+    for node in nodes:
+        if not isinstance(node, dict) or node.get("type") not in {"location", "junction"}:
+            continue
+        node_id = node.get("id")
+        if isinstance(node_id, str) and node_id:
+            return node_id
+    return None
+
+
+def _infer_dm_guardian_npc_id(payload: dict[str, Any]) -> str | None:
+    quest_chain = payload.get("quest_chain")
+    if isinstance(quest_chain, list):
+        for step in reversed(quest_chain):
+            action = step.get("action") if isinstance(step, dict) else None
+            npc_id = _extract_single_action_argument(action, "talk")
+            if npc_id:
+                return npc_id
+
+    nodes = payload.get("nodes")
+    if not isinstance(nodes, list):
+        return None
+    first_npc_id: str | None = None
+    for node in nodes:
+        if not isinstance(node, dict) or node.get("type") != "npc":
+            continue
+        node_id = node.get("id")
+        if not isinstance(node_id, str) or not node_id:
+            continue
+        if first_npc_id is None:
+            first_npc_id = node_id
+        if "guardian" in node_id:
+            return node_id
+    return first_npc_id
+
+
+def _infer_dm_answer_string(quest_chain: Any) -> str | None:
+    if not isinstance(quest_chain, list):
+        return None
+    for step in reversed(quest_chain):
+        action = step.get("action") if isinstance(step, dict) else None
+        answer = _extract_single_action_argument(action, "submit")
+        if answer is None:
+            continue
+        normalized = normalize_answer_text(answer)
+        if normalized:
+            return normalized
+    return None
+
+
+def _repair_missing_readable_clue_ids(payload: dict[str, Any]) -> None:
+    nodes = payload.get("nodes")
+    clues = payload.get("clues")
+    if not isinstance(nodes, list) or not isinstance(clues, list):
+        return
+
+    clue_ids = [clue.get("id") for clue in clues if isinstance(clue, dict) and isinstance(clue.get("id"), str)]
+    if not clue_ids:
+        return
+
+    used_clue_ids = {
+        node.get("clue_id")
+        for node in nodes
+        if isinstance(node, dict) and node.get("type") == "readable" and isinstance(node.get("clue_id"), str)
+    }
+    available_clue_ids = [clue_id for clue_id in clue_ids if clue_id not in used_clue_ids]
+    if not available_clue_ids:
+        return
+
+    for node in nodes:
+        if not isinstance(node, dict) or node.get("type") != "readable" or node.get("clue_id"):
+            continue
+        if not available_clue_ids:
+            return
+        node["clue_id"] = available_clue_ids.pop(0)
+
+
+def _repair_guardian_trade_fields(payload: dict[str, Any], *, guardian_id: Any) -> None:
+    if not isinstance(guardian_id, str) or not guardian_id:
+        return
+    nodes = payload.get("nodes")
+    if not isinstance(nodes, list):
+        return
+    for node in nodes:
+        if not isinstance(node, dict) or node.get("type") != "npc" or node.get("id") != guardian_id:
+            continue
+        node["requires_item_id"] = None
+        node["gives_item_id"] = None
+        node["gives_clue_id"] = None
+        return
+
+
+def _repair_submit_actions(payload: dict[str, Any]) -> None:
+    quest_chain = payload.get("quest_chain")
+    if not isinstance(quest_chain, list):
+        return
+    for step in quest_chain:
+        if not isinstance(step, dict):
+            continue
+        action = step.get("action")
+        answer = _extract_single_action_argument(action, "submit")
+        if answer is None:
+            continue
+        if action == f'submit("{answer}")':
+            continue
+        step["action"] = f'submit("{normalize_answer_text(answer)}")'
+
+
+def _repair_door_lock_keys_from_edges(payload: dict[str, Any]) -> None:
+    nodes = payload.get("nodes")
+    edges = payload.get("edges")
+    if not isinstance(nodes, list) or not isinstance(edges, list):
+        return
+
+    door_ids = [
+        node.get("id")
+        for node in nodes
+        if isinstance(node, dict) and node.get("type") == "door" and isinstance(node.get("id"), str)
+    ]
+    sole_door_id = door_ids[0] if len(door_ids) == 1 else None
+    inferred_keys: dict[str, str] = {}
+    for edge in edges:
+        if not isinstance(edge, dict):
+            continue
+        door_node_id = edge.get("door_node_id")
+        required_item_id = edge.get("required_item_id")
+        if sole_door_id is not None and isinstance(door_node_id, str) and door_node_id not in door_ids:
+            edge["door_node_id"] = sole_door_id
+            door_node_id = sole_door_id
+        if not isinstance(door_node_id, str) or not isinstance(required_item_id, str):
+            continue
+        existing_key = inferred_keys.get(door_node_id)
+        if existing_key is None or existing_key == required_item_id:
+            inferred_keys[door_node_id] = required_item_id
+
+    if not inferred_keys:
+        return
+
+    for node in nodes:
+        if not isinstance(node, dict) or node.get("type") != "door":
+            continue
+        door_id = node.get("id")
+        if isinstance(door_id, str) and door_id in inferred_keys:
+            node["lock_key_id"] = inferred_keys[door_id]
+
+
+def _repair_required_key_item_subtypes(payload: dict[str, Any]) -> None:
+    items = payload.get("items")
+    edges = payload.get("edges")
+    nodes = payload.get("nodes")
+    if not isinstance(items, list):
+        return
+
+    required_key_ids: set[str] = set()
+    if isinstance(edges, list):
+        for edge in edges:
+            if not isinstance(edge, dict):
+                continue
+            required_item_id = edge.get("required_item_id")
+            if isinstance(required_item_id, str) and required_item_id:
+                required_key_ids.add(required_item_id)
+    if isinstance(nodes, list):
+        for node in nodes:
+            if not isinstance(node, dict):
+                continue
+            lock_key_id = node.get("lock_key_id")
+            if isinstance(lock_key_id, str) and lock_key_id:
+                required_key_ids.add(lock_key_id)
+
+    if not required_key_ids:
+        return
+
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        item_id = item.get("id")
+        if isinstance(item_id, str) and item_id in required_key_ids:
+            item["subtype"] = "key"
+
+
+def _repair_duplicate_recipe_ids(payload: dict[str, Any]) -> None:
+    recipes = payload.get("recipes")
+    if not isinstance(recipes, list):
+        return
+
+    protected_ids: set[str] = set()
+    for key in ("nodes", "items", "clues", "quest_chain"):
+        values = payload.get(key)
+        if not isinstance(values, list):
+            continue
+        for value in values:
+            if not isinstance(value, dict):
+                continue
+            id_key = "step_id" if key == "quest_chain" else "id"
+            value_id = value.get(id_key)
+            if isinstance(value_id, str) and value_id:
+                protected_ids.add(value_id)
+
+    recipe_ids: set[str] = set()
+    for recipe in recipes:
+        if not isinstance(recipe, dict):
+            continue
+        recipe_id = recipe.get("id")
+        if not isinstance(recipe_id, str) or not recipe_id:
+            continue
+        if recipe_id not in protected_ids and recipe_id not in recipe_ids:
+            recipe_ids.add(recipe_id)
+            continue
+        new_recipe_id = _unique_world_id(recipe_id, protected_ids | recipe_ids)
+        recipe["id"] = new_recipe_id
+        recipe_ids.add(new_recipe_id)
+
+
+def _repair_guardian_room_access(payload: dict[str, Any], *, guardian_id: Any, start_node_id: Any) -> None:
+    if not isinstance(guardian_id, str) or not guardian_id:
+        return
+    nodes = payload.get("nodes")
+    edges = payload.get("edges")
+    quest_chain = payload.get("quest_chain")
+    if not isinstance(nodes, list) or not isinstance(edges, list):
+        return
+
+    reachable_rooms = _reachable_passage_room_ids(payload, start_node_id=start_node_id)
+    if not reachable_rooms:
+        return
+
+    preferred_room_id = _infer_guardian_talk_room_from_quest(quest_chain, guardian_id=guardian_id)
+    if preferred_room_id not in reachable_rooms:
+        preferred_room_id = next(iter(sorted(reachable_rooms)))
+
+    for node in nodes:
+        if not isinstance(node, dict) or node.get("type") != "npc" or node.get("id") != guardian_id:
+            continue
+        parent_id = node.get("parent_id")
+        if isinstance(parent_id, str) and parent_id in reachable_rooms:
+            return
+        node["parent_id"] = preferred_room_id
+        current_guardian_room = _infer_guardian_talk_room_from_quest(quest_chain, guardian_id=guardian_id)
+        if current_guardian_room != preferred_room_id:
+            _insert_quest_step_before_guardian_talk(
+                quest_chain,
+                guardian_id=guardian_id,
+                step_id_base=f"go_{preferred_room_id}",
+                description=f"Go to {_humanize_identifier(preferred_room_id).lower()}.",
+                action=f"go({preferred_room_id})",
+            )
+        return
+
+
+def _repair_missing_item_references(payload: dict[str, Any]) -> None:
+    items = payload.get("items")
+    nodes = payload.get("nodes")
+    edges = payload.get("edges")
+    if not isinstance(items, list):
+        return
+
+    existing_item_ids = {
+        item.get("id")
+        for item in items
+        if isinstance(item, dict) and isinstance(item.get("id"), str) and item.get("id")
+    }
+
+    quest_chain = payload.get("quest_chain")
+
+    def ensure_item(item_id: Any, *, subtype: str, start_node_id: str | None) -> None:
+        if not isinstance(item_id, str) or not item_id or item_id in existing_item_ids:
+            return
+        inferred_start_node_id = _infer_item_start_node_from_quest(quest_chain, item_id) or start_node_id
+        items.append(
+            {
+                "id": item_id,
+                "label": _humanize_identifier(item_id),
+                "description": f"A {_humanize_identifier(item_id).lower()} needed to solve the dungeon.",
+                "subtype": subtype,
+                "start_node_id": inferred_start_node_id,
+            }
+        )
+        existing_item_ids.add(item_id)
+
+    default_start_node_id = _infer_dm_start_node_id(payload.get("nodes"))
+    if isinstance(edges, list):
+        for edge in edges:
+            if not isinstance(edge, dict):
+                continue
+            ensure_item(edge.get("required_item_id"), subtype="key", start_node_id=default_start_node_id)
+
+    if not isinstance(nodes, list):
+        return
+
+    for node in nodes:
+        if not isinstance(node, dict):
+            continue
+        node_type = node.get("type")
+        if node_type in {"container", "door"}:
+            ensure_item(node.get("lock_key_id"), subtype="key", start_node_id=default_start_node_id)
+        elif node_type == "readable":
+            ensure_item(
+                node.get("requires_item_id"),
+                subtype="puzzle",
+                start_node_id=_node_room_start_node_id(node, default_start_node_id),
+            )
+        elif node_type == "fixture":
+            ensure_item(
+                node.get("requires_item_id"),
+                subtype="puzzle",
+                start_node_id=_node_room_start_node_id(node, default_start_node_id),
+            )
+            ensure_item(node.get("reveals_item_id"), subtype="puzzle", start_node_id=None)
+        elif node_type == "npc":
+            ensure_item(
+                node.get("requires_item_id"),
+                subtype="puzzle",
+                start_node_id=_node_room_start_node_id(node, default_start_node_id),
+            )
+            ensure_item(node.get("gives_item_id"), subtype="puzzle", start_node_id=None)
+
+    recipes = payload.get("recipes")
+    if not isinstance(recipes, list):
+        return
+    for recipe in recipes:
+        if not isinstance(recipe, dict):
+            continue
+        input_ids = recipe.get("input_item_ids")
+        if isinstance(input_ids, list):
+            for item_id in input_ids:
+                ensure_item(item_id, subtype="puzzle", start_node_id=default_start_node_id)
+        ensure_item(recipe.get("output_item_id"), subtype="puzzle", start_node_id=None)
+
+
+def _repair_produced_item_placements(payload: dict[str, Any], *, default_start_node_id: Any) -> None:
+    items = payload.get("items")
+    if not isinstance(items, list):
+        return
+
+    produced_item_ids: set[str] = set()
+    recipes = payload.get("recipes")
+    if isinstance(recipes, list):
+        for recipe in recipes:
+            if not isinstance(recipe, dict):
+                continue
+            output_item_id = recipe.get("output_item_id")
+            if isinstance(output_item_id, str) and output_item_id:
+                produced_item_ids.add(output_item_id)
+
+    nodes = payload.get("nodes")
+    if isinstance(nodes, list):
+        for node in nodes:
+            if not isinstance(node, dict):
+                continue
+            if node.get("type") == "npc":
+                gives_item_id = node.get("gives_item_id")
+                if isinstance(gives_item_id, str) and gives_item_id:
+                    produced_item_ids.add(gives_item_id)
+            elif node.get("type") == "fixture":
+                reveals_item_id = node.get("reveals_item_id")
+                if isinstance(reveals_item_id, str) and reveals_item_id:
+                    produced_item_ids.add(reveals_item_id)
+
+    start_node_id = default_start_node_id if isinstance(default_start_node_id, str) and default_start_node_id else None
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        item_id = item.get("id")
+        if not isinstance(item_id, str) or not item_id:
+            continue
+        if item_id in produced_item_ids:
+            item["start_node_id"] = None
+        elif item.get("start_node_id") is None and start_node_id is not None:
+            item["start_node_id"] = start_node_id
+
+
+def _repair_missing_clue_sources(payload: dict[str, Any], *, guardian_id: Any) -> None:
+    clues = payload.get("clues")
+    nodes = payload.get("nodes")
+    items = payload.get("items")
+    quest_chain = payload.get("quest_chain")
+    if not isinstance(clues, list) or not isinstance(nodes, list):
+        return
+
+    clue_text_by_id = {
+        clue.get("id"): clue.get("text")
+        for clue in clues
+        if isinstance(clue, dict) and isinstance(clue.get("id"), str)
+    }
+    if not clue_text_by_id:
+        return
+
+    sourced_clue_ids = set()
+    room_ids: set[str] = set()
+    guardian_room_id: str | None = None
+    for node in nodes:
+        if not isinstance(node, dict):
+            continue
+        node_type = node.get("type")
+        if node_type in {"location", "junction"}:
+            node_id = node.get("id")
+            if isinstance(node_id, str) and node_id:
+                room_ids.add(node_id)
+        elif node_type == "readable":
+            clue_id = node.get("clue_id")
+            if isinstance(clue_id, str) and clue_id:
+                sourced_clue_ids.add(clue_id)
+        elif node_type == "npc":
+            clue_id = node.get("gives_clue_id")
+            if isinstance(clue_id, str) and clue_id:
+                sourced_clue_ids.add(clue_id)
+            if isinstance(guardian_id, str) and guardian_id and node.get("id") == guardian_id:
+                parent_id = node.get("parent_id")
+                if isinstance(parent_id, str) and parent_id:
+                    guardian_room_id = parent_id
+
+    missing_clue_ids = [clue_id for clue_id in clue_text_by_id if clue_id not in sourced_clue_ids]
+    if not missing_clue_ids:
+        return
+
+    target_room_id = guardian_room_id or _infer_dm_start_node_id(nodes)
+    if not isinstance(target_room_id, str) or target_room_id not in room_ids:
+        target_room_id = next(iter(room_ids), None)
+    if target_room_id is None:
+        return
+
+    gating_item_id = _select_synthetic_clue_gate_item_id(items, quest_chain)
+    if gating_item_id is None:
+        if not isinstance(items, list):
+            return
+        gating_item_id = "inspection_lens"
+        items.append(
+            {
+                "id": gating_item_id,
+                "label": "Inspection Lens",
+                "description": "A careful lens for reading faint inscriptions.",
+                "subtype": "puzzle",
+                "start_node_id": _infer_dm_start_node_id(nodes),
+            }
+        )
+
+    existing_node_ids = {
+        node.get("id")
+        for node in nodes
+        if isinstance(node, dict) and isinstance(node.get("id"), str) and node.get("id")
+    }
+    existing_safe_labels = {
+        parser_safe_text(node.get("label"))
+        for node in nodes
+        if isinstance(node, dict) and isinstance(node.get("label"), str) and node.get("label")
+    }
+    synthetic_step_ids: list[str] = []
+    for clue_id in missing_clue_ids:
+        readable_id = _unique_world_id(f"{clue_id}_inscription", existing_node_ids)
+        label = _unique_world_label(f"{_humanize_identifier(clue_id)} Inscription", existing_safe_labels)
+        nodes.append(
+            {
+                "id": readable_id,
+                "type": "readable",
+                "label": label,
+                "description": f"A {label.lower()} can only be deciphered with the right tool.",
+                "parent_id": target_room_id,
+                "clue_id": clue_id,
+                "requires_item_id": gating_item_id,
+                "consumes_item": False,
+                "text_content": clue_text_by_id[clue_id] or f"A fragment about {_humanize_identifier(clue_id).lower()}.",
+            }
+        )
+        step_id = _insert_quest_step_before_guardian_talk(
+            quest_chain,
+            guardian_id=guardian_id,
+            step_id_base=f"inspect_{readable_id}",
+            description=f"Inspect the {label.lower()}.",
+            action=(
+                f"use({gating_item_id},{readable_id})"
+                if isinstance(gating_item_id, str) and gating_item_id
+                else f"read({readable_id})"
+            ),
+        )
+        if step_id is not None:
+            synthetic_step_ids.append(step_id)
+
+
+def _repair_take_action_aliases(payload: dict[str, Any]) -> None:
+    quest_chain = payload.get("quest_chain")
+    nodes = payload.get("nodes")
+    if not isinstance(quest_chain, list) or not isinstance(nodes, list):
+        return
+
+    fixture_by_id: dict[str, dict[str, Any]] = {}
+    npc_by_id: dict[str, dict[str, Any]] = {}
+    for node in nodes:
+        if not isinstance(node, dict):
+            continue
+        node_id = node.get("id")
+        if not isinstance(node_id, str) or not node_id:
+            continue
+        if node.get("type") == "fixture":
+            fixture_by_id[node_id] = node
+        elif node.get("type") == "npc":
+            npc_by_id[node_id] = node
+
+    for step in quest_chain:
+        if not isinstance(step, dict):
+            continue
+        arguments = _extract_action_arguments(step.get("action"), "take")
+        if arguments is None or len(arguments) != 2:
+            continue
+        item_id, source_id = arguments
+        fixture = fixture_by_id.get(source_id)
+        if fixture is not None and fixture.get("reveals_item_id") == item_id:
+            parent_id = fixture.get("parent_id")
+            if isinstance(parent_id, str) and parent_id:
+                step["action"] = f"take({item_id},{parent_id})"
+            continue
+        npc = npc_by_id.get(source_id)
+        if npc is None or npc.get("gives_item_id") != item_id:
+            continue
+        required_item_id = npc.get("requires_item_id")
+        if isinstance(required_item_id, str) and required_item_id:
+            step["action"] = f"give({required_item_id},{source_id})"
+
+
+def _repair_take_sources_from_room_prereqs(payload: dict[str, Any]) -> None:
+    quest_chain = payload.get("quest_chain")
+    items = payload.get("items")
+    nodes = payload.get("nodes")
+    if not isinstance(quest_chain, list) or not isinstance(items, list):
+        return
+
+    node_types: dict[str, str] = {}
+    if isinstance(nodes, list):
+        for node in nodes:
+            if not isinstance(node, dict):
+                continue
+            node_id = node.get("id")
+            node_type = node.get("type")
+            if isinstance(node_id, str) and isinstance(node_type, str):
+                node_types[node_id] = node_type
+
+    item_by_id = {
+        item.get("id"): item
+        for item in items
+        if isinstance(item, dict) and isinstance(item.get("id"), str) and item.get("id")
+    }
+    step_by_id = {
+        step.get("step_id"): step
+        for step in quest_chain
+        if isinstance(step, dict) and isinstance(step.get("step_id"), str) and step.get("step_id")
+    }
+    for step in quest_chain:
+        if not isinstance(step, dict):
+            continue
+        arguments = _extract_action_arguments(step.get("action"), "take")
+        if arguments is None or len(arguments) != 2:
+            continue
+        item_id, source_id = arguments
+        if node_types.get(source_id) == "container":
+            continue
+        requires_step_ids = step.get("requires_step_ids")
+        if not isinstance(requires_step_ids, list):
+            continue
+        required_room_id: str | None = None
+        for dependency in requires_step_ids:
+            if not isinstance(dependency, str):
+                continue
+            dependency_step = step_by_id.get(dependency)
+            if not isinstance(dependency_step, dict):
+                continue
+            room_id = _extract_single_action_argument(dependency_step.get("action"), "go")
+            if room_id:
+                required_room_id = room_id
+        if required_room_id is None or required_room_id == source_id:
+            continue
+        step["action"] = f"take({item_id},{required_room_id})"
+        item = item_by_id.get(item_id)
+        if isinstance(item, dict):
+            item["start_node_id"] = required_room_id
+
+
+def _repair_missing_take_steps(payload: dict[str, Any]) -> None:
+    quest_chain = payload.get("quest_chain")
+    items = payload.get("items")
+    nodes = payload.get("nodes")
+    recipes = payload.get("recipes")
+    if not isinstance(quest_chain, list) or not isinstance(items, list):
+        return
+
+    item_start_nodes = {
+        item.get("id"): item.get("start_node_id")
+        for item in items
+        if isinstance(item, dict) and isinstance(item.get("id"), str)
+    }
+    produced_item_ids = set()
+    recipe_outputs: dict[frozenset[str], str] = {}
+    if isinstance(recipes, list):
+        for recipe in recipes:
+            if not isinstance(recipe, dict):
+                continue
+            output_item_id = recipe.get("output_item_id")
+            input_item_ids = recipe.get("input_item_ids")
+            if isinstance(output_item_id, str) and output_item_id:
+                produced_item_ids.add(output_item_id)
+            if isinstance(output_item_id, str) and isinstance(input_item_ids, list) and len(input_item_ids) == 2:
+                recipe_outputs[frozenset(str(item_id) for item_id in input_item_ids)] = output_item_id
+    npc_rewards: dict[str, str] = {}
+    if isinstance(nodes, list):
+        for node in nodes:
+            if not isinstance(node, dict) or node.get("type") != "npc":
+                continue
+            npc_id = node.get("id")
+            gives_item_id = node.get("gives_item_id")
+            if isinstance(npc_id, str) and npc_id and isinstance(gives_item_id, str) and gives_item_id:
+                produced_item_ids.add(gives_item_id)
+                npc_rewards[npc_id] = gives_item_id
+        for node in nodes:
+            if not isinstance(node, dict) or node.get("type") != "fixture":
+                continue
+            reveals_item_id = node.get("reveals_item_id")
+            if isinstance(reveals_item_id, str) and reveals_item_id:
+                produced_item_ids.add(reveals_item_id)
+
+    inventory: set[str] = set()
+    step_by_id = {
+        step.get("step_id"): step
+        for step in quest_chain
+        if isinstance(step, dict) and isinstance(step.get("step_id"), str) and step.get("step_id")
+    }
+    index = 0
+    while index < len(quest_chain):
+        step = quest_chain[index]
+        if not isinstance(step, dict):
+            index += 1
+            continue
+        required_item_ids = _quest_required_item_ids(step.get("action"))
+        inserted_step = False
+        for item_id in required_item_ids:
+            if item_id in inventory or item_id in produced_item_ids:
+                continue
+            source_node_id = _infer_room_prereq_for_step(step, step_by_id) or item_start_nodes.get(item_id)
+            if not isinstance(source_node_id, str) or not source_node_id:
+                continue
+            new_step_id = _insert_quest_step_before_index(
+                quest_chain,
+                index=index,
+                step_id_base=f"take_{item_id}",
+                description=f"Take the {_humanize_identifier(item_id).lower()}.",
+                action=f"take({item_id},{source_node_id})",
+                allow_existing_action=True,
+            )
+            if new_step_id is not None:
+                inventory.add(item_id)
+                item = next(
+                    (
+                        candidate
+                        for candidate in items
+                        if isinstance(candidate, dict) and candidate.get("id") == item_id
+                    ),
+                    None,
+                )
+                if isinstance(item, dict):
+                    item["start_node_id"] = source_node_id
+                inserted_step = True
+                index += 1
+        if inserted_step:
+            step = quest_chain[index]
+            if not isinstance(step, dict):
+                index += 1
+                continue
+
+        arguments = _extract_action_arguments(step.get("action"), "take")
+        if arguments is not None and len(arguments) == 2:
+            inventory.add(arguments[0])
+            index += 1
+            continue
+
+        arguments = _extract_action_arguments(step.get("action"), "give")
+        if arguments is not None and len(arguments) == 2:
+            inventory.discard(arguments[0])
+            rewarded_item_id = npc_rewards.get(arguments[1])
+            if rewarded_item_id:
+                inventory.add(rewarded_item_id)
+            index += 1
+            continue
+
+        arguments = _extract_action_arguments(step.get("action"), "combine")
+        if arguments is not None and len(arguments) == 2:
+            inventory.discard(arguments[0])
+            inventory.discard(arguments[1])
+            output_item_id = recipe_outputs.get(frozenset(arguments))
+            if output_item_id:
+                inventory.add(output_item_id)
+            index += 1
+            continue
+        index += 1
+
+
+def _repair_guardian_ending(payload: dict[str, Any], *, guardian_id: Any, answer_string: Any) -> None:
+    quest_chain = payload.get("quest_chain")
+    if not isinstance(quest_chain, list) or not quest_chain:
+        return
+
+    submit_index: int | None = None
+    for index in range(len(quest_chain) - 1, -1, -1):
+        step = quest_chain[index]
+        if isinstance(step, dict) and _extract_single_action_argument(step.get("action"), "submit") is not None:
+            submit_index = index
+            break
+    if submit_index is None:
+        return
+
+    submit_step = quest_chain[submit_index]
+    if not isinstance(submit_step, dict):
+        return
+    if isinstance(answer_string, str) and answer_string:
+        submit_step["action"] = f'submit("{normalize_answer_text(answer_string)}")'
+
+    talk_index = _guardian_talk_step_index(quest_chain, guardian_id=guardian_id)
+    if submit_index == len(quest_chain) - 1 and submit_index > 0:
+        penultimate = quest_chain[submit_index - 1]
+        if isinstance(penultimate, dict) and _extract_single_action_argument(penultimate.get("action"), "talk") == guardian_id:
+            return
+
+    if talk_index is None:
+        new_step_id = _insert_quest_step_before_index(
+            quest_chain,
+            index=submit_index,
+            step_id_base=f"talk_{guardian_id}",
+            description=f"Speak to the {_humanize_identifier(str(guardian_id)).lower()}.",
+            action=f"talk({guardian_id})",
+            allow_existing_action=True,
+        )
+        if new_step_id is not None:
+            submit_step["requires_step_ids"] = [new_step_id]
+        return
+
+    talk_step = quest_chain[talk_index]
+    if not isinstance(talk_step, dict):
+        return
+    if talk_index != submit_index - 1:
+        new_step_id = _insert_quest_step_before_index(
+            quest_chain,
+            index=submit_index,
+            step_id_base=talk_step.get("step_id") or f"talk_{guardian_id}",
+            description=talk_step.get("description") or f"Speak to the {_humanize_identifier(str(guardian_id)).lower()}.",
+            action=talk_step.get("action") or f"talk({guardian_id})",
+            allow_existing_action=True,
+        )
+        if new_step_id is not None:
+            submit_step["requires_step_ids"] = [new_step_id]
+
+
+def _repair_locked_room_entry_steps(payload: dict[str, Any]) -> None:
+    quest_chain = payload.get("quest_chain")
+    edges = payload.get("edges")
+    meta = payload.get("meta")
+    if not isinstance(quest_chain, list) or not isinstance(edges, list) or not isinstance(meta, dict):
+        return
+
+    edge_by_rooms = {
+        (edge.get("from_node_id"), edge.get("to_node_id")): edge
+        for edge in edges
+        if isinstance(edge, dict)
+    }
+    start_node_id = meta.get("start_node_id")
+    step_by_id = {
+        step.get("step_id"): step
+        for step in quest_chain
+        if isinstance(step, dict) and isinstance(step.get("step_id"), str) and step.get("step_id")
+    }
+    index = 0
+    while index < len(quest_chain):
+        step = quest_chain[index]
+        if not isinstance(step, dict):
+            index += 1
+            continue
+        target_room_id = _extract_single_action_argument(step.get("action"), "go")
+        if target_room_id is None:
+            index += 1
+            continue
+        current_room_id = _infer_room_prereq_for_step(step, step_by_id) or (
+            start_node_id if isinstance(start_node_id, str) else None
+        )
+        if current_room_id is None:
+            index += 1
+            continue
+        edge = edge_by_rooms.get((current_room_id, target_room_id))
+        if not isinstance(edge, dict) or not isinstance(edge.get("door_node_id"), str):
+            index += 1
+            continue
+        door_id = edge.get("door_node_id")
+        key_id = edge.get("required_item_id")
+        inserted = False
+        if isinstance(door_id, str) and isinstance(key_id, str):
+            unlock_action = f"unlock({door_id},{key_id})"
+            if not _action_exists_before_index(quest_chain, unlock_action, index):
+                if _insert_quest_step_before_index(
+                    quest_chain,
+                    index=index,
+                    step_id_base=f"unlock_{door_id}",
+                    description=f"Unlock the {_humanize_identifier(door_id).lower()}.",
+                    action=unlock_action,
+                    allow_existing_action=True,
+                ):
+                    inserted = True
+                    index += 1
+        if isinstance(door_id, str):
+            open_action = f"open({door_id})"
+            if not _action_exists_before_index(quest_chain, open_action, index):
+                if _insert_quest_step_before_index(
+                    quest_chain,
+                    index=index,
+                    step_id_base=f"open_{door_id}",
+                    description=f"Open the {_humanize_identifier(door_id).lower()}.",
+                    action=open_action,
+                    allow_existing_action=True,
+                ):
+                    inserted = True
+                    index += 1
+        if inserted:
+            step_by_id = {
+                candidate.get("step_id"): candidate
+                for candidate in quest_chain
+                if isinstance(candidate, dict)
+                and isinstance(candidate.get("step_id"), str)
+                and candidate.get("step_id")
+            }
+        index += 1
+
+
+def _select_synthetic_clue_gate_item_id(items: Any, quest_chain: Any) -> str | None:
+    if not isinstance(items, list):
+        return None
+
+    taken_item_ids = _quest_taken_item_ids(quest_chain)
+    prioritized: list[tuple[int, str]] = []
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        item_id = item.get("id")
+        subtype = item.get("subtype")
+        if not isinstance(item_id, str) or not item_id:
+            continue
+        if subtype == "puzzle" and item_id in taken_item_ids:
+            prioritized.append((0, item_id))
+        elif subtype == "puzzle" and item.get("start_node_id") is not None:
+            prioritized.append((0, item_id))
+        elif subtype == "puzzle":
+            prioritized.append((1, item_id))
+        elif subtype == "key":
+            prioritized.append((2, item_id))
+    if not prioritized:
+        return None
+    prioritized.sort()
+    return prioritized[0][1]
+
+
+def _humanize_identifier(identifier: str) -> str:
+    return " ".join(part.capitalize() for part in identifier.split("_") if part) or identifier
+
+
+def _node_room_start_node_id(node: dict[str, Any], default_start_node_id: str | None) -> str | None:
+    parent_id = node.get("parent_id")
+    if isinstance(parent_id, str) and parent_id:
+        return parent_id
+    return default_start_node_id
+
+
+def _unique_world_id(base_id: str, existing_ids: set[str]) -> str:
+    candidate = base_id
+    suffix = 2
+    while candidate in existing_ids:
+        candidate = f"{base_id}_{suffix}"
+        suffix += 1
+    existing_ids.add(candidate)
+    return candidate
+
+
+def _unique_world_label(base_label: str, existing_safe_labels: set[str]) -> str:
+    candidate = base_label
+    suffix = 2
+    while parser_safe_text(candidate) in existing_safe_labels:
+        candidate = f"{base_label} {suffix}"
+        suffix += 1
+    existing_safe_labels.add(parser_safe_text(candidate))
+    return candidate
+
+
+def _insert_quest_step_before_guardian_talk(
+    quest_chain: Any,
+    *,
+    guardian_id: Any,
+    step_id_base: str,
+    description: str,
+    action: str,
+) -> str | None:
+    if not isinstance(quest_chain, list):
+        return None
+
+    existing_step_ids = {
+        step.get("step_id")
+        for step in quest_chain
+        if isinstance(step, dict) and isinstance(step.get("step_id"), str) and step.get("step_id")
+    }
+    if any(isinstance(step, dict) and step.get("action") == action for step in quest_chain):
+        return None
+
+    talk_index: int | None = None
+    talk_index = _guardian_talk_step_index(quest_chain, guardian_id=guardian_id)
+    if talk_index is None:
+        return None
+
+    talk_step = quest_chain[talk_index]
+    return _insert_quest_step_before_index(
+        quest_chain,
+        index=talk_index,
+        step_id_base=step_id_base,
+        description=description,
+        action=action,
+    )
+
+
+def _quest_taken_item_ids(quest_chain: Any) -> set[str]:
+    if not isinstance(quest_chain, list):
+        return set()
+    taken_item_ids: set[str] = set()
+    for step in quest_chain:
+        if not isinstance(step, dict):
+            continue
+        arguments = _extract_action_arguments(step.get("action"), "take")
+        if arguments is None or not arguments:
+            continue
+        item_id = arguments[0]
+        if item_id:
+            taken_item_ids.add(item_id)
+    return taken_item_ids
+
+
+def _infer_item_start_node_from_quest(quest_chain: Any, item_id: str) -> str | None:
+    if not isinstance(quest_chain, list):
+        return None
+    for step in quest_chain:
+        if not isinstance(step, dict):
+            continue
+        arguments = _extract_action_arguments(step.get("action"), "take")
+        if arguments is None or len(arguments) != 2:
+            continue
+        if arguments[0] == item_id:
+            return arguments[1]
+    return None
+
+
+def _infer_guardian_talk_room_from_quest(quest_chain: Any, *, guardian_id: str) -> str | None:
+    talk_index = _guardian_talk_step_index(quest_chain, guardian_id=guardian_id)
+    if talk_index is None or not isinstance(quest_chain, list):
+        return None
+    for index in range(talk_index - 1, -1, -1):
+        step = quest_chain[index]
+        if not isinstance(step, dict):
+            continue
+        room_id = _extract_single_action_argument(step.get("action"), "go")
+        if room_id:
+            return room_id
+    return None
+
+
+def _guardian_talk_step_index(quest_chain: Any, *, guardian_id: Any) -> int | None:
+    if not isinstance(quest_chain, list) or not isinstance(guardian_id, str) or not guardian_id:
+        return None
+    for index, step in enumerate(quest_chain):
+        if not isinstance(step, dict):
+            continue
+        target_id = _extract_single_action_argument(step.get("action"), "talk")
+        if target_id == guardian_id:
+            return index
+    return None
+
+
+def _extract_action_arguments(action: Any, name: str) -> list[str] | None:
+    if not isinstance(action, str):
+        return None
+    prefix = f"{name}("
+    if not action.startswith(prefix) or not action.endswith(")"):
+        return None
+    raw_arguments = action[len(prefix) : -1]
+    arguments = [argument.strip().strip('"').strip("'") for argument in raw_arguments.split(",")]
+    if any(not argument for argument in arguments):
+        return None
+    return arguments
+
+
+def _insert_quest_step_before_index(
+    quest_chain: Any,
+    *,
+    index: int,
+    step_id_base: str,
+    description: str,
+    action: str,
+    allow_existing_action: bool = False,
+) -> str | None:
+    if not isinstance(quest_chain, list) or index < 0 or index >= len(quest_chain):
+        return None
+    current_step = quest_chain[index]
+    if not isinstance(current_step, dict):
+        return None
+    if not allow_existing_action and any(isinstance(step, dict) and step.get("action") == action for step in quest_chain):
+        return None
+    existing_step_ids = {
+        step.get("step_id")
+        for step in quest_chain
+        if isinstance(step, dict) and isinstance(step.get("step_id"), str) and step.get("step_id")
+    }
+    existing_requires = current_step.get("requires_step_ids")
+    if isinstance(existing_requires, list):
+        requires_step_ids = [step_id for step_id in existing_requires if isinstance(step_id, str) and step_id]
+    else:
+        requires_step_ids = []
+    new_step_id = _unique_world_id(step_id_base, existing_step_ids)
+    quest_chain.insert(
+        index,
+        {
+            "step_id": new_step_id,
+            "description": description,
+            "requires_step_ids": requires_step_ids,
+            "action": action,
+        },
+    )
+    current_step["requires_step_ids"] = [new_step_id]
+    return new_step_id
+
+
+def _action_exists_before_index(quest_chain: Any, action: str, index: int) -> bool:
+    if not isinstance(quest_chain, list):
+        return False
+    for current_step in quest_chain[:index]:
+        if isinstance(current_step, dict) and current_step.get("action") == action:
+            return True
+    return False
+
+
+def _quest_required_item_ids(action: Any) -> list[str]:
+    for name, count in (("use", 2), ("unlock", 2), ("give", 2), ("combine", 2)):
+        arguments = _extract_action_arguments(action, name)
+        if arguments is None:
+            continue
+        if name == "combine" and len(arguments) == count:
+            return arguments
+        if len(arguments) == count:
+            return [arguments[0 if name != "unlock" else 1]]
+    return []
+
+
+def _infer_room_prereq_for_step(step: Any, step_by_id: dict[str, Any]) -> str | None:
+    if not isinstance(step, dict):
+        return None
+    step_id = step.get("step_id")
+    if isinstance(step_id, str) and step_id:
+        inferred_room = _infer_step_terminal_room(step_id, step_by_id, set())
+        if inferred_room is not None:
+            return inferred_room
+    requires_step_ids = step.get("requires_step_ids")
+    if not isinstance(requires_step_ids, list):
+        return None
+    room_id: str | None = None
+    for dependency in requires_step_ids:
+        if not isinstance(dependency, str):
+            continue
+        dependency_step = step_by_id.get(dependency)
+        if not isinstance(dependency_step, dict):
+            continue
+        maybe_room_id = _extract_single_action_argument(dependency_step.get("action"), "go")
+        if maybe_room_id:
+            room_id = maybe_room_id
+    return room_id
+
+
+def _infer_step_terminal_room(step_id: str, step_by_id: dict[str, Any], seen: set[str]) -> str | None:
+    if step_id in seen:
+        return None
+    step = step_by_id.get(step_id)
+    if not isinstance(step, dict):
+        return None
+    seen = set(seen)
+    seen.add(step_id)
+
+    target_room = _extract_single_action_argument(step.get("action"), "go")
+    if target_room:
+        return target_room
+
+    requires_step_ids = step.get("requires_step_ids")
+    if not isinstance(requires_step_ids, list):
+        return None
+    inferred_room: str | None = None
+    for dependency in requires_step_ids:
+        if not isinstance(dependency, str):
+            continue
+        dependency_room = _infer_step_terminal_room(dependency, step_by_id, seen)
+        if dependency_room:
+            inferred_room = dependency_room
+    return inferred_room
+
+
+def _reachable_passage_room_ids(payload: dict[str, Any], *, start_node_id: Any) -> set[str]:
+    if not isinstance(start_node_id, str) or not start_node_id:
+        return set()
+    edges = payload.get("edges")
+    if not isinstance(edges, list):
+        return {start_node_id}
+
+    graph: dict[str, set[str]] = {}
+    for edge in edges:
+        if not isinstance(edge, dict) or edge.get("type") != "passage":
+            continue
+        from_node_id = edge.get("from_node_id")
+        to_node_id = edge.get("to_node_id")
+        if not isinstance(from_node_id, str) or not isinstance(to_node_id, str):
+            continue
+        graph.setdefault(from_node_id, set()).add(to_node_id)
+
+    reachable = {start_node_id}
+    frontier = [start_node_id]
+    while frontier:
+        current = frontier.pop()
+        for nxt in graph.get(current, set()):
+            if nxt in reachable:
+                continue
+            reachable.add(nxt)
+            frontier.append(nxt)
+    return reachable
+
+
+def _extract_single_action_argument(action: Any, name: str) -> str | None:
+    if not isinstance(action, str):
+        return None
+    prefix = f"{name}("
+    if not action.startswith(prefix) or not action.endswith(")"):
+        return None
+    raw_argument = action[len(prefix) : -1].strip()
+    if not raw_argument:
+        return None
+    if raw_argument[0] == raw_argument[-1] and raw_argument[0] in {'"', "'"}:
+        raw_argument = raw_argument[1:-1]
+    return raw_argument.strip()
+
+
+def _load_dm_world_definition(text: str, *, allow_repair: bool) -> WorldDefinition:
+    payload = _try_parse_completion_json(text)
+    if not isinstance(payload, dict):
+        raise ValueError("Completion did not contain a JSON object.")
+
+    if allow_repair:
+        payload = _repair_dm_world_payload(payload)
+    try:
+        return WorldDefinition.model_validate(payload)
+    except Exception:
+        raise
+
+
+def _find_json_object_span(text: str) -> tuple[int, int] | None:
+    start: int | None = None
+    depth = 0
+    in_string = False
+    escaped = False
+    for index, character in enumerate(text):
+        if in_string:
+            if escaped:
+                escaped = False
+            elif character == "\\":
+                escaped = True
+            elif character == '"':
+                in_string = False
+            continue
+        if character == '"':
+            in_string = True
+            continue
+        if character == "{":
+            if start is None:
+                start = index
+            depth += 1
+            continue
+        if character == "}":
+            if depth == 0:
+                continue
+            depth -= 1
+            if depth == 0 and start is not None:
+                return start, index + 1
+    return None
+
+
+def _strip_code_fences(text: str) -> str:
+    cleaned = text.strip()
+    if cleaned.startswith("```"):
+        lines = cleaned.splitlines()
+        if lines and lines[0].startswith("```"):
+            lines = lines[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        cleaned = "\n".join(lines).strip()
+    return cleaned
+
+
+def _normalize_outer_completion_text(text: str) -> str:
+    without_tools = _TOOL_CALL_RE.sub("", text)
+    without_tools = _EMPTY_THINK_RE.sub("", without_tools)
+    without_tools = _strip_code_fences(without_tools)
+    return without_tools.strip()
+
+
+def _string_key_coverage(value: Any, keys: tuple[str, ...]) -> float:
+    if not isinstance(value, dict):
+        return 0.0
+    return sum(1 for key in keys if key in value) / len(keys)
+
+
+def _range_score(value: int, lower: int, upper: int) -> float:
+    if lower <= value <= upper:
+        return 1.0
+    if value < lower:
+        return max(0.0, value / max(1, lower))
+    return max(0.0, 1.0 - ((value - upper) / max(1, upper)))
+
+
+def _compactness_score(length: int, target_max: int) -> float:
+    if length <= target_max:
+        return 1.0
+    overflow = length - target_max
+    return max(0.0, 1.0 - (overflow / max(1, target_max)))
+
+
+def _dm_structural_prior_score(world: dict[str, Any], requested_ratio: float | None) -> float:
+    meta = world.get("meta")
+    nodes = world.get("nodes") if isinstance(world.get("nodes"), list) else []
+    edges = world.get("edges") if isinstance(world.get("edges"), list) else []
+    items = world.get("items") if isinstance(world.get("items"), list) else []
+    clues = world.get("clues") if isinstance(world.get("clues"), list) else []
+    recipes = world.get("recipes") if isinstance(world.get("recipes"), list) else []
+    quest_chain = world.get("quest_chain") if isinstance(world.get("quest_chain"), list) else []
+
+    components = [
+        (0.16, _string_key_coverage(world, _DM_REQUIRED_TOP_LEVEL_FIELDS)),
+        (0.08, _string_key_coverage(meta, ("title", "difficulty_target", "start_node_id", "win_condition"))),
+        (0.10, _dm_win_condition_score(meta)),
+        (0.10, _range_score(len(nodes), 10, 16)),
+        (0.07, _range_score(len(items), 5, 8)),
+        (0.09, _range_score(len(clues), 3, 5)),
+        (0.04, _range_score(len(recipes), 0, 1)),
+        (0.10, _range_score(len(quest_chain), 12, 20)),
+        (0.06, _valid_type_fraction(nodes, "type", _DM_ALLOWED_NODE_TYPES)),
+        (0.04, _valid_type_fraction(edges, "type", _DM_ALLOWED_EDGE_TYPES)),
+        (0.04, _valid_type_fraction(items, "subtype", _DM_ALLOWED_ITEM_TYPES)),
+        (0.06, _compact_world_text_score(nodes, items, clues, quest_chain)),
+        (0.06, _guardian_presence_score(meta, nodes)),
+    ]
+    if requested_ratio is not None:
+        components.append((0.10, _difficulty_ratio_score(meta, requested_ratio)))
+
+    weighted_total = sum(weight * score for weight, score in components)
+    total_weight = sum(weight for weight, _ in components)
+    return _clamp(weighted_total / max(1e-6, total_weight), 0.0, 1.0)
+
+
+def _dm_win_condition_score(meta: Any) -> float:
+    if not isinstance(meta, dict):
+        return 0.0
+    win_condition = meta.get("win_condition")
+    if not isinstance(win_condition, dict):
+        return 0.0
+    score = _string_key_coverage(win_condition, ("type", "target_npc_id", "answer_string"))
+    if win_condition.get("type") == "deduce":
+        score += 0.25
+    answer = win_condition.get("answer_string")
+    if isinstance(answer, str) and _LOWERCASE_ANSWER_RE.fullmatch(answer):
+        score += 0.25
+    return min(1.0, score)
+
+
+def _guardian_presence_score(meta: Any, nodes: list[Any]) -> float:
+    if not isinstance(meta, dict):
+        return 0.0
+    win_condition = meta.get("win_condition")
+    if not isinstance(win_condition, dict):
+        return 0.0
+    guardian_id = win_condition.get("target_npc_id")
+    if not isinstance(guardian_id, str):
+        return 0.0
+    return 1.0 if any(isinstance(node, dict) and node.get("type") == "npc" and node.get("id") == guardian_id for node in nodes) else 0.0
+
+
+def _difficulty_ratio_score(meta: Any, requested_ratio: float) -> float:
+    if not isinstance(meta, dict):
+        return 0.0
+    try:
+        actual_ratio = float(meta.get("difficulty_target"))
+    except Exception:
+        return 0.0
+    return max(0.0, 1.0 - abs(actual_ratio - requested_ratio))
+
+
+def _valid_type_fraction(rows: list[Any], key: str, allowed_values: set[str]) -> float:
+    typed_rows = [row for row in rows if isinstance(row, dict)]
+    if not typed_rows:
+        return 0.0
+    valid = sum(1 for row in typed_rows if row.get(key) in allowed_values)
+    return valid / len(typed_rows)
+
+
+def _compact_world_text_score(
+    nodes: list[Any],
+    items: list[Any],
+    clues: list[Any],
+    quest_chain: list[Any],
+) -> float:
+    text_lengths: list[int] = []
+    for collection, keys in (
+        (nodes, ("label", "description")),
+        (items, ("label", "description")),
+        (clues, ("text",)),
+        (quest_chain, ("description", "action")),
+    ):
+        for row in collection:
+            if not isinstance(row, dict):
+                continue
+            for key in keys:
+                value = row.get(key)
+                if isinstance(value, str):
+                    text_lengths.append(len(value))
+    if not text_lengths:
+        return 0.0
+    average_length = sum(text_lengths) / len(text_lengths)
+    return _compactness_score(int(average_length), 80)
+
+
+def _validation_error_score(errors: list[dict[str, Any]]) -> float:
+    if not errors:
+        return 0.0
+    penalty = 0.0
+    for error in errors:
+        error_type = str(error.get("type", ""))
+        location = tuple(str(part) for part in error.get("loc", ()))
+        field_name = location[-1] if location else ""
+        if error_type == "extra_forbidden":
+            penalty += 0.05
+        elif error_type.startswith("missing") and field_name in {"label", "description"}:
+            penalty += 0.02
+        elif error_type.startswith("missing") and field_name == "text_content":
+            penalty += 0.05
+        elif error_type.startswith("missing"):
+            penalty += 0.06
+        else:
+            penalty += 0.08
+    return _clamp(1.0 - penalty, 0.0, 1.0)
+
+
+def _compile_error_penalty(error_message: str) -> float:
+    message = error_message.lower()
+    if not message:
+        return -0.5
+    if "between 3 and 5 clues" in message:
+        return -0.35
+    if "duplicate world id" in message or "duplicate " in message:
+        return -0.45
+    if "requires_step_id" in message or "requires_step_with" in message:
+        return -0.45
+    if "requires requires_item_id" in message:
+        return -0.50
+    if "must live in a location or junction" in message:
+        return -0.55
+    if "fixture" in message and "requires unknown item" in message:
+        return -0.60
+    if "unknown item" in message or "unknown clue" in message or "unknown node" in message:
+        return -0.65
+    if "must reveal exactly one item or readable" in message:
+        return -0.65
+    if "guardian npc cannot have trade fields" in message:
+        return -0.70
+    if "unused decorative items" in message or "clue '" in message:
+        return -0.75
+    if "final quest step" in message or "penultimate quest step" in message:
+        return -0.80
+    if "unreachable" in message or "guardian room" in message:
+        return -0.85
+    if "closed door" in message or "locked door" in message or "does not match key" in message:
+        return -0.85
+    if "quest " in message or "unsupported quest action" in message:
+        return -0.90
+    return -0.75
+
+
+def _completion_tool_calls(completion: Any) -> list[dict[str, Any]]:
+    return _extract_tool_calls_from_text(_completion_text(completion))
+
+
+def _extract_tool_calls_from_text(text: str) -> list[dict[str, Any]]:
+    tool_calls: list[dict[str, Any]] = []
+    for raw_payload in _TOOL_CALL_RE.findall(text):
+        try:
+            payload = json.loads(raw_payload)
+        except Exception:
+            continue
+        normalized = _normalize_tool_call(payload, source="tool_call")
+        if normalized is not None:
+            tool_calls.append(normalized)
+
+    if tool_calls:
+        return tool_calls
+
+    payload = _try_parse_completion_json(text)
+    normalized = _normalize_tool_call(payload, source="json_action")
+    if normalized is None:
+        return []
+    return [normalized]
+
+
+def _normalize_tool_call(payload: Any, *, source: str) -> dict[str, Any] | None:
+    if not isinstance(payload, dict):
+        return None
+
+    if payload.get("type") == "function" and isinstance(payload.get("function"), dict):
+        payload = payload["function"]
+
+    if isinstance(payload.get("name"), str):
+        arguments = payload.get("arguments", {})
+        if not isinstance(arguments, dict):
+            return None
+        return {"name": payload["name"], "arguments": arguments, "source": source}
+
+    action = payload.get("action")
+    if isinstance(action, dict) and isinstance(action.get("tool"), str):
+        arguments = {key: value for key, value in action.items() if key != "tool"}
+        return {"name": action["tool"], "arguments": arguments, "source": source}
+    return None
+
+
+def _hero_act_semantics_reward(arguments: Any) -> float:
+    if not isinstance(arguments, dict):
+        return 0.0
+    command = arguments.get("command")
+    if not isinstance(command, str) or not command.strip():
+        return 0.0
+    normalized_command = command.strip().lower()
+    parsed = parse_cli_command(command)
+    if not parsed.valid:
+        recovered = parse_cli_command(normalized_command)
+        return 0.40 if recovered.valid else 0.0
+    return 1.0 if command == normalized_command else 0.85
+
+
+def _hero_scratchpad_write_reward(arguments: Any) -> float:
+    if not isinstance(arguments, dict):
+        return 0.0
+    mode = arguments.get("mode")
+    content = arguments.get("content")
+    score = 0.0
+    if mode in {"append", "replace"}:
+        score += 0.45
+    if isinstance(content, str) and content.strip():
+        score += 0.35
+        score += 0.20 * _compactness_score(len(content), 240)
+    return min(1.0, score)
+
+
+def _clamp(value: float, lower: float, upper: float) -> float:
+    return max(lower, min(upper, value))
+
+
+def _require_training_dependencies() -> None:
+    if TRAINING_IMPORT_ERROR is not None:
+        raise RuntimeError(
+            "Training dependencies are unavailable. Install the project with the training extras before using GRPO."
+        ) from TRAINING_IMPORT_ERROR
+
+
+def _require_vllm_if_requested(config: GRPOLaunchConfig) -> None:
+    if not config.use_vllm:
+        return
+    if importlib.util.find_spec("vllm") is None:
+        raise RuntimeError(
+            "vLLM is not installed but --use-vllm was requested. Install vllm in the training environment first."
+        )
diff --git a/agents/train/joint.py b/agents/train/joint.py
new file mode 100644
index 0000000000000000000000000000000000000000..3082c8143788cbe63adb9e7ff7e8a89fbd88198d
--- /dev/null
+++ b/agents/train/joint.py
@@ -0,0 +1,278 @@
+from __future__ import annotations
+
+import json
+import os
+from contextlib import contextmanager
+from dataclasses import asdict, dataclass, replace
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Callable, Iterator
+
+from .grpo import DMClosedLoopConfig, GRPOLaunchConfig, run_dm_grpo, run_hero_grpo
+
+
+@dataclass(frozen=True)
+class JointTrainingConfig:
+    root_dir: Path
+    cycles: int
+    hero_config: GRPOLaunchConfig
+    dm_config: GRPOLaunchConfig
+    target_ratios: list[float] | None = None
+    hero_world_path: Path | None = None
+    interface_provider: str | None = None
+    interface_model: str | None = None
+    interface_narrate: bool = False
+    interface_translation_mode: str | None = None
+    hero_max_game_steps: int = 40
+    hero_max_tool_calls: int = 80
+    hero_max_tool_calling_iterations: int = 32
+
+
+def run_joint_training_loop(config: JointTrainingConfig) -> Path:
+    if config.cycles < 1:
+        raise ValueError("cycles must be at least 1.")
+
+    config.root_dir.mkdir(parents=True, exist_ok=True)
+    latest_hero_adapter = _initial_adapter_path(config.hero_config.resume_adapter_path)
+    latest_dm_adapter = _initial_adapter_path(config.dm_config.resume_adapter_path)
+    phases: list[dict[str, Any]] = []
+    _write_manifest(config, phases, status="running")
+
+    try:
+        for cycle_index in range(config.cycles):
+            cycle_number = cycle_index + 1
+            cycle_dir = config.root_dir / f"cycle_{cycle_number:02d}"
+            hero_dir = cycle_dir / "hero"
+            dm_dir = cycle_dir / "dm"
+
+            hero_result = _run_or_resume_hero_phase(
+                config=config,
+                cycle_number=cycle_number,
+                output_dir=hero_dir,
+                resume_adapter_path=latest_hero_adapter,
+                phases=phases,
+                on_phase_state_change=lambda: _write_manifest(config, phases, status="running"),
+            )
+            latest_hero_adapter = hero_result
+            _write_manifest(config, phases, status="running")
+
+            dm_result = _run_or_resume_dm_phase(
+                config=config,
+                cycle_number=cycle_number,
+                output_dir=dm_dir,
+                resume_adapter_path=latest_dm_adapter,
+                hero_adapter_path=latest_hero_adapter,
+                phases=phases,
+                on_phase_state_change=lambda: _write_manifest(config, phases, status="running"),
+            )
+            latest_dm_adapter = dm_result
+            _write_manifest(config, phases, status="running")
+    except Exception as exc:
+        _write_manifest(config, phases, status="failed", error=str(exc))
+        raise
+
+    _write_manifest(
+        config,
+        phases,
+        status="completed",
+        latest_hero_adapter_path=str(latest_hero_adapter) if latest_hero_adapter is not None else None,
+        latest_dm_adapter_path=str(latest_dm_adapter) if latest_dm_adapter is not None else None,
+    )
+    return config.root_dir
+
+
+def _run_or_resume_hero_phase(
+    *,
+    config: JointTrainingConfig,
+    cycle_number: int,
+    output_dir: Path,
+    resume_adapter_path: Path | None,
+    phases: list[dict[str, Any]],
+    on_phase_state_change: Callable[[], None] | None = None,
+) -> Path:
+    state_path = output_dir / "phase_state.json"
+    existing_state = _load_phase_state(state_path)
+    if existing_state is not None and existing_state.get("status") == "completed":
+        phases.append(existing_state)
+        return output_dir
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    run_name = config.hero_config.run_name or f"{config.root_dir.name}-hero-cycle-{cycle_number:02d}"
+    phase_state = {
+        "phase": "hero",
+        "cycle": cycle_number,
+        "status": "running",
+        "run_name": run_name,
+        "output_dir": str(output_dir),
+        "resume_adapter_path": None if resume_adapter_path is None else str(resume_adapter_path),
+        "started_at": _utc_now(),
+    }
+    phases.append(phase_state)
+    _write_json(state_path, phase_state)
+    if on_phase_state_change is not None:
+        on_phase_state_change()
+
+    phase_config = replace(
+        config.hero_config,
+        output_dir=output_dir,
+        run_name=run_name,
+        resume_adapter_path=None if resume_adapter_path is None else str(resume_adapter_path),
+    )
+    with _wandb_phase_env(group=config.root_dir.name, job_type="hero"):
+        run_hero_grpo(
+            phase_config,
+            world_path=config.hero_world_path,
+            artifacts_root=output_dir / "artifacts",
+            interface_provider=config.interface_provider,
+            interface_model=config.interface_model,
+            interface_narrate=config.interface_narrate,
+            interface_translation_mode=config.interface_translation_mode,
+            max_game_steps=config.hero_max_game_steps,
+            max_tool_calls=config.hero_max_tool_calls,
+            max_tool_calling_iterations=config.hero_max_tool_calling_iterations,
+        )
+
+    phase_state["status"] = "completed"
+    phase_state["completed_at"] = _utc_now()
+    _write_json(state_path, phase_state)
+    return output_dir
+
+
+def _run_or_resume_dm_phase(
+    *,
+    config: JointTrainingConfig,
+    cycle_number: int,
+    output_dir: Path,
+    resume_adapter_path: Path | None,
+    hero_adapter_path: Path | None,
+    phases: list[dict[str, Any]],
+    on_phase_state_change: Callable[[], None] | None = None,
+) -> Path:
+    if hero_adapter_path is None:
+        raise RuntimeError("DM phase requires a hero adapter path from a completed hero phase.")
+
+    state_path = output_dir / "phase_state.json"
+    existing_state = _load_phase_state(state_path)
+    if existing_state is not None and existing_state.get("status") == "completed":
+        phases.append(existing_state)
+        return output_dir
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    run_name = config.dm_config.run_name or f"{config.root_dir.name}-dm-cycle-{cycle_number:02d}"
+    phase_state = {
+        "phase": "dm",
+        "cycle": cycle_number,
+        "status": "running",
+        "run_name": run_name,
+        "output_dir": str(output_dir),
+        "resume_adapter_path": None if resume_adapter_path is None else str(resume_adapter_path),
+        "hero_adapter_path": str(hero_adapter_path),
+        "started_at": _utc_now(),
+    }
+    phases.append(phase_state)
+    _write_json(state_path, phase_state)
+    if on_phase_state_change is not None:
+        on_phase_state_change()
+
+    phase_config = replace(
+        config.dm_config,
+        output_dir=output_dir,
+        run_name=run_name,
+        resume_adapter_path=None if resume_adapter_path is None else str(resume_adapter_path),
+    )
+    closed_loop = DMClosedLoopConfig(
+        hero_provider="hf_local",
+        hero_model=config.hero_config.model_name,
+        hero_adapter_path=str(hero_adapter_path),
+        interface_provider=config.interface_provider,
+        interface_model=config.interface_model,
+        interface_narrate=config.interface_narrate,
+        interface_translation_mode=config.interface_translation_mode,
+        hero_max_game_steps=config.hero_max_game_steps,
+        hero_max_tool_calls=config.hero_max_tool_calls,
+    )
+    with _wandb_phase_env(group=config.root_dir.name, job_type="dm"):
+        run_dm_grpo(
+            phase_config,
+            target_ratios=config.target_ratios,
+            artifacts_root=output_dir / "artifacts",
+            closed_loop=closed_loop,
+        )
+
+    phase_state["status"] = "completed"
+    phase_state["completed_at"] = _utc_now()
+    _write_json(state_path, phase_state)
+    return output_dir
+
+
+def _write_manifest(
+    config: JointTrainingConfig,
+    phases: list[dict[str, Any]],
+    *,
+    status: str,
+    error: str | None = None,
+    latest_hero_adapter_path: str | None = None,
+    latest_dm_adapter_path: str | None = None,
+) -> None:
+    payload = {
+        "status": status,
+        "updated_at": _utc_now(),
+        "error": error,
+        "latest_hero_adapter_path": latest_hero_adapter_path,
+        "latest_dm_adapter_path": latest_dm_adapter_path,
+        "config": _to_jsonable(asdict(config)),
+        "phases": phases,
+    }
+    _write_json(config.root_dir / "joint_state.json", payload)
+
+
+@contextmanager
+def _wandb_phase_env(*, group: str, job_type: str) -> Iterator[None]:
+    previous_group = os.getenv("WANDB_RUN_GROUP")
+    previous_job_type = os.getenv("WANDB_JOB_TYPE")
+    os.environ["WANDB_RUN_GROUP"] = group
+    os.environ["WANDB_JOB_TYPE"] = job_type
+    try:
+        yield
+    finally:
+        _restore_env("WANDB_RUN_GROUP", previous_group)
+        _restore_env("WANDB_JOB_TYPE", previous_job_type)
+
+
+def _restore_env(name: str, value: str | None) -> None:
+    if value is None:
+        os.environ.pop(name, None)
+    else:
+        os.environ[name] = value
+
+
+def _load_phase_state(path: Path) -> dict[str, Any] | None:
+    if not path.exists():
+        return None
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def _write_json(path: Path, payload: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(_to_jsonable(payload), indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def _to_jsonable(value: Any) -> Any:
+    if isinstance(value, Path):
+        return str(value)
+    if isinstance(value, dict):
+        return {str(key): _to_jsonable(item) for key, item in value.items()}
+    if isinstance(value, list):
+        return [_to_jsonable(item) for item in value]
+    return value
+
+
+def _initial_adapter_path(raw_path: str | None) -> Path | None:
+    if raw_path is None:
+        return None
+    path = Path(raw_path)
+    return path if path.exists() else None
+
+
+def _utc_now() -> str:
+    return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..0085e57cc98a2fa486243f72aa64d1a6f9fdcdc1
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+requires = ["setuptools>=69", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "dnd-agents"
+version = "0.1.0"
+description = "Dungeon master and hero agent environments built on TextWorld and OpenEnv."
+readme = "SPEC.md"
+requires-python = ">=3.11,<3.12"
+dependencies = [
+  "openenv-core==0.2.1",
+  "textworld==1.7.0",
+  "fastapi>=0.115,<1",
+  "uvicorn>=0.30,<1",
+  "pydantic>=2.12,<3",
+  "python-dotenv>=1.0,<2",
+  "python-multipart>=0.0.9,<1",
+  "google-genai>=1.0,<2",
+  "huggingface-hub>=1.6,<2",
+  "pytest>=8.0,<9",
+]
+
+[project.scripts]
+dnd-master = "agents.master.main:main"
+dnd-hero = "agents.hero.__main__:main"
+dnd-loop = "agents.loop.__main__:main"
+dnd-train = "agents.train.__main__:main"
+dnd-openenv = "agents.openenv_server.__main__:main"
+
+[project.optional-dependencies]
+local-llm = [
+  "accelerate==1.13.0",
+  "bitsandbytes==0.49.2",
+  "huggingface-hub>=1.6,<2",
+  "peft==0.18.1",
+  "transformers==5.3.0",
+  "vllm==0.12.0; platform_system == 'Linux'",
+]
+train = [
+  "accelerate==1.13.0",
+  "bitsandbytes==0.49.2",
+  "datasets==4.6.1",
+  "huggingface-hub>=1.6,<2",
+  "jmespath>=1.0,<2",
+  "peft==0.18.1",
+  "transformers==5.3.0",
+  "trl==0.29.0",
+  "vllm==0.12.0; platform_system == 'Linux'",
+  "wandb==0.25.0",
+]
+
+[tool.setuptools.packages.find]
+include = ["agents*"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+markers = [
+  "live: tests that call live external model APIs",
+]
+filterwarnings = [
+  "ignore:Game '.*' is not fully supported\\..*",
+]
diff --git a/textworld_data/dnd/logic/answer.twl b/textworld_data/dnd/logic/answer.twl
new file mode 100644
index 0000000000000000000000000000000000000000..7ee869530b3e4f7a2f6804b2d9a7c8d94eac293c
--- /dev/null
+++ b/textworld_data/dnd/logic/answer.twl
@@ -0,0 +1,18 @@
+type answer : t {
+    predicates {
+        correct(answer, npc);
+        solved(answer);
+    }
+
+    inform7 {
+        type {
+            kind :: "answer-like";
+            definition :: "answer-like is a kind of thing. answer-like is privately-named. A answer-like can be solved. A answer-like is usually not solved.";
+        }
+
+        predicates {
+            correct(answer, npc) :: "";
+            solved(answer) :: "The {answer} is solved";
+        }
+    }
+}
diff --git a/textworld_data/dnd/logic/clue.twl b/textworld_data/dnd/logic/clue.twl
new file mode 100644
index 0000000000000000000000000000000000000000..837181b01018fb0e44db6c87caae124257b3387b
--- /dev/null
+++ b/textworld_data/dnd/logic/clue.twl
@@ -0,0 +1,16 @@
+type clue : t {
+    predicates {
+        discovered(clue);
+    }
+
+    inform7 {
+        type {
+            kind :: "clue-like";
+            definition :: "clue-like is a kind of thing. clue-like is privately-named.";
+        }
+
+        predicates {
+            discovered(clue) :: "";
+        }
+    }
+}
diff --git a/textworld_data/dnd/logic/fixture.twl b/textworld_data/dnd/logic/fixture.twl
new file mode 100644
index 0000000000000000000000000000000000000000..88d94ddb21316fafe98bc24506f7bb94ac35ba9c
--- /dev/null
+++ b/textworld_data/dnd/logic/fixture.twl
@@ -0,0 +1,53 @@
+type fixture : t {
+    predicates {
+        fixture_requires(fixture, o);
+        reveals_item(fixture, o);
+        reveals_readable(fixture, readable);
+        hidden_readable(readable);
+        sealed(fixture);
+        fixture_keeps_use(fixture);
+        fixture_consumes_use(fixture);
+    }
+
+    rules {
+        use/fixture/item/keep :: $at(P, r) & $at(fixture, r) & sealed(fixture) & $in(o, I) & $fixture_requires(fixture, o) & $fixture_keeps_use(fixture) & stored_item(o') & $reveals_item(fixture, o') -> at(o', r);
+        use/fixture/item/consume :: $at(P, r) & $at(fixture, r) & sealed(fixture) & in(o, I) & $fixture_requires(fixture, o) & $fixture_consumes_use(fixture) & stored_item(o') & $reveals_item(fixture, o') -> at(o', r);
+        use/fixture/readable/keep :: $at(P, r) & $at(fixture, r) & sealed(fixture) & $in(o, I) & $fixture_requires(fixture, o) & $fixture_keeps_use(fixture) & hidden_readable(readable) & $reveals_readable(fixture, readable) -> at(readable, r);
+        use/fixture/readable/consume :: $at(P, r) & $at(fixture, r) & sealed(fixture) & in(o, I) & $fixture_requires(fixture, o) & $fixture_consumes_use(fixture) & hidden_readable(readable) & $reveals_readable(fixture, readable) -> at(readable, r);
+    }
+
+    reverse_rules {
+        use/fixture/item/keep :: use/fixture/item/keep;
+        use/fixture/item/consume :: use/fixture/item/consume;
+        use/fixture/readable/keep :: use/fixture/readable/keep;
+        use/fixture/readable/consume :: use/fixture/readable/consume;
+    }
+
+    inform7 {
+        type {
+            kind :: "fixture-like";
+            definition :: "fixture-like is a kind of thing. fixture-like is fixed in place.";
+        }
+
+        predicates {
+            fixture_requires(fixture, o) :: "";
+            reveals_item(fixture, o) :: "";
+            reveals_readable(fixture, readable) :: "";
+            hidden_readable(readable) :: "";
+            sealed(fixture) :: "";
+            fixture_keeps_use(fixture) :: "";
+            fixture_consumes_use(fixture) :: "";
+        }
+
+        commands {
+            use/fixture/item/keep :: "use {o} on {fixture}" :: "unlocking the {fixture} with the {o}";
+            use/fixture/item/consume :: "use {o} on {fixture}" :: "unlocking the {fixture} with the {o}";
+            use/fixture/readable/keep :: "use {o} on {fixture}" :: "unlocking the {fixture} with the {o}";
+            use/fixture/readable/consume :: "use {o} on {fixture}" :: "unlocking the {fixture} with the {o}";
+        }
+
+        code :: """
+            Understand "use [something] on [something]" as unlocking it with.
+        """;
+    }
+}
diff --git a/textworld_data/dnd/logic/npc.twl b/textworld_data/dnd/logic/npc.twl
new file mode 100644
index 0000000000000000000000000000000000000000..fbf94a4112409348fe01dc9277dd95989b7beb7d
--- /dev/null
+++ b/textworld_data/dnd/logic/npc.twl
@@ -0,0 +1,57 @@
+type npc : t {
+    predicates {
+        consulted(npc);
+        guardian(npc);
+        trade_pending(npc);
+        traded(npc);
+        trade_requires(npc, o);
+        trade_gives_item(npc, o);
+        trade_gives_clue(npc);
+    }
+
+    rules {
+        talk :: $at(P, r) & $at(npc, r) -> consulted(npc);
+
+        give/item :: $at(P, r) & $at(npc, r) & trade_pending(npc) & in(o, I) & $trade_requires(npc, o) & $trade_gives_item(npc, o') & stored_item(o') -> traded(npc) & in(o', I);
+        give/clue :: $at(P, r) & $at(npc, r) & trade_pending(npc) & in(o, I) & $trade_requires(npc, o) & $trade_gives_clue(npc) -> traded(npc);
+    }
+
+    reverse_rules {
+        talk :: talk;
+        give/item :: give/item;
+        give/clue :: give/clue;
+    }
+
+    inform7 {
+        type {
+            kind :: "person";
+            definition :: "A person can be consulted. A person is usually not consulted. A person can be a guardian. A person is usually not a guardian.";
+        }
+
+        predicates {
+            consulted(npc) :: "The {npc} is consulted";
+            guardian(npc) :: "The {npc} is guardian";
+            trade_pending(npc) :: "";
+            traded(npc) :: "";
+            trade_requires(npc, o) :: "";
+            trade_gives_item(npc, o) :: "";
+            trade_gives_clue(npc) :: "";
+        }
+
+        commands {
+            talk :: "talk {npc}" :: "examining the {npc}";
+            give/item :: "give {o} to {npc}" :: "giving the {o} to the {npc}";
+            give/clue :: "give {o} to {npc}" :: "giving the {o} to the {npc}";
+        }
+
+        code :: """
+            Understand "talk [someone]" as examining.
+            Understand "talk to [someone]" as examining.
+            Understand "speak to [someone]" as examining.
+            Understand "give [something] to [someone]" as giving it to.
+
+            After examining a person (called target):
+                now target is consulted;
+        """;
+    }
+}
diff --git a/textworld_data/dnd/logic/object.twl b/textworld_data/dnd/logic/object.twl
new file mode 100644
index 0000000000000000000000000000000000000000..8d446961de9af2d97e395191150d0004f0b47d3a
--- /dev/null
+++ b/textworld_data/dnd/logic/object.twl
@@ -0,0 +1,49 @@
+# object
+type o : t {
+    predicates {
+        combines_with(o, o, o);
+        fresh(o);
+        stored_item(o);
+    }
+
+    rules {
+        combine :: $at(P, r) & in(o, I) & in(o', I) & combines_with(o, o', o'') & fresh(o'') -> in(o'', I);
+    }
+
+    reverse_rules {
+        combine :: combine;
+    }
+
+    constraints {
+        obj1 :: in(o, I) & in(o, c) -> fail();
+        obj2 :: in(o, I) & on(o, s) -> fail();
+        obj3 :: in(o, I) & at(o, r) -> fail();
+        obj4 :: in(o, c) & on(o, s) -> fail();
+        obj5 :: in(o, c) & at(o, r) -> fail();
+        obj6 :: on(o, s) & at(o, r) -> fail();
+        obj7 :: at(o, r) & at(o, r') -> fail();
+        obj8 :: in(o, c) & in(o, c') -> fail();
+        obj9 :: on(o, s) & on(o, s') -> fail();
+    }
+
+    inform7 {
+        type {
+            kind :: "object-like";
+            definition :: "object-like is portable.";
+        }
+
+        predicates {
+            combines_with(o, o', o'') :: "";
+            fresh(o) :: "";
+            stored_item(o) :: "";
+        }
+
+        commands {
+            combine :: "combine {o} with {o'}" :: "unlocking the {o'} with the {o}";
+        }
+
+        code :: """
+            Understand "combine [something] with [something]" as unlocking it with.
+        """;
+    }
+}
diff --git a/textworld_data/dnd/logic/readable.twl b/textworld_data/dnd/logic/readable.twl
new file mode 100644
index 0000000000000000000000000000000000000000..924de8a9f377ba8686a19dd3669540aa97f5028c
--- /dev/null
+++ b/textworld_data/dnd/logic/readable.twl
@@ -0,0 +1,51 @@
+type readable : t {
+    predicates {
+        free_read(readable);
+        read_requires(readable, o);
+        prepared(readable);
+        read_keeps_use(readable);
+        read_consumes_use(readable);
+    }
+
+    rules {
+        read/free :: $at(P, r) & $at(readable, r) & $free_read(readable) -> at(P, r);
+        read/prepared :: $at(P, r) & $at(readable, r) & $prepared(readable) -> at(P, r);
+
+        use/readable/keep :: $at(P, r) & $at(readable, r) & $in(o, I) & $read_requires(readable, o) & $read_keeps_use(readable) -> prepared(readable);
+        use/readable/consume :: $at(P, r) & $at(readable, r) & in(o, I) & $read_requires(readable, o) & $read_consumes_use(readable) -> prepared(readable);
+    }
+
+    reverse_rules {
+        read/free :: read/free;
+        read/prepared :: read/prepared;
+        use/readable/keep :: use/readable/keep;
+        use/readable/consume :: use/readable/consume;
+    }
+
+    inform7 {
+        type {
+            kind :: "readable-like";
+            definition :: "readable-like is a kind of thing. readable-like is fixed in place.";
+        }
+
+        predicates {
+            free_read(readable) :: "";
+            read_requires(readable, o) :: "";
+            prepared(readable) :: "";
+            read_keeps_use(readable) :: "";
+            read_consumes_use(readable) :: "";
+        }
+
+        commands {
+            read/free :: "read {readable}" :: "examining the {readable}";
+            read/prepared :: "read {readable}" :: "examining the {readable}";
+            use/readable/keep :: "use {o} on {readable}" :: "unlocking the {readable} with the {o}";
+            use/readable/consume :: "use {o} on {readable}" :: "unlocking the {readable} with the {o}";
+        }
+
+        code :: """
+            Understand "read [something]" as examining.
+            Understand "use [something] on [something]" as unlocking it with.
+        """;
+    }
+}
diff --git a/textworld_data/dnd/logic/room.twl b/textworld_data/dnd/logic/room.twl
new file mode 100644
index 0000000000000000000000000000000000000000..6c067ee57565c8a9059894c03737134fc79b56c0
--- /dev/null
+++ b/textworld_data/dnd/logic/room.twl
@@ -0,0 +1,81 @@
+# room
+type r {
+    predicates {
+        at(P, r);
+        at(t, r);
+
+        north_of(r, r);
+        south_of(r, r);
+        east_of(r, r);
+        west_of(r, r);
+        up_of(r, r);
+        down_of(r, r);
+        in_of(r, r);
+        out_of(r, r);
+
+        free(r, r);
+    }
+
+    rules {
+        go/north :: at(P, r) & $north_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/south :: at(P, r) & $south_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/east  :: at(P, r) & $east_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/west  :: at(P, r) & $west_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/up    :: at(P, r) & $up_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/down  :: at(P, r) & $down_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/in    :: at(P, r) & $in_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+        go/out   :: at(P, r) & $out_of(r', r) & $free(r, r') & $free(r', r) -> at(P, r');
+    }
+
+    reverse_rules {
+        go/north :: go/south;
+        go/east :: go/west;
+        go/up :: go/down;
+        go/in :: go/out;
+    }
+
+    constraints {
+        player_unique :: at(P, r) & at(P, r') -> fail();
+        room_object_unique :: at(t, r) & at(t, r') -> fail();
+
+        nav_rr1 :: north_of(r, r') & north_of(r'', r') -> fail();
+        nav_rr2 :: south_of(r, r') & south_of(r'', r') -> fail();
+        nav_rr3 :: east_of(r, r') & east_of(r'', r') -> fail();
+        nav_rr4 :: west_of(r, r') & west_of(r'', r') -> fail();
+        nav_rr5 :: up_of(r, r') & up_of(r'', r') -> fail();
+        nav_rr6 :: down_of(r, r') & down_of(r'', r') -> fail();
+        nav_rr7 :: in_of(r, r') & in_of(r'', r') -> fail();
+        nav_rr8 :: out_of(r, r') & out_of(r'', r') -> fail();
+    }
+
+    inform7 {
+        type {
+            kind :: "room";
+        }
+
+        predicates {
+            at(P, r) :: "The player is in {r}";
+            at(t, r) :: "The {t} is in {r}";
+
+            north_of(r, r') :: "The {r} is mapped north of {r'}";
+            south_of(r, r') :: "The {r} is mapped south of {r'}";
+            east_of(r, r') :: "The {r} is mapped east of {r'}";
+            west_of(r, r') :: "The {r} is mapped west of {r'}";
+            up_of(r, r') :: "The {r} is mapped above {r'}";
+            down_of(r, r') :: "The {r} is mapped below {r'}";
+            in_of(r, r') :: "Inside from {r'} is {r}";
+            out_of(r, r') :: "Outside from {r'} is {r}";
+        }
+
+        commands {
+            go/north :: "go north" :: "going north";
+            go/south :: "go south" :: "going south";
+            go/east :: "go east" :: "going east";
+            go/west :: "go west" :: "going west";
+            go/up :: "go up" :: "going up";
+            go/down :: "go down" :: "going down";
+            go/in :: "go in" :: "going inside";
+            go/out :: "go out" :: "going outside";
+        }
+    }
+}
diff --git a/textworld_data/dnd/text_grammars/house_dnd_instruction.twg b/textworld_data/dnd/text_grammars/house_dnd_instruction.twg
new file mode 100644
index 0000000000000000000000000000000000000000..1d5147a14d2c0fabc6f7573df2430519688baca1
--- /dev/null
+++ b/textworld_data/dnd/text_grammars/house_dnd_instruction.twg
@@ -0,0 +1,7 @@
+#-------------------------
+#Dungeon DM Instruction Grammar
+#-------------------------
+go:go to the next room.
+read:read (readable).
+talk:talk to (npc).
+submit/final:submit the final answer to (npc).