Buckets:

hf-doc-build/doc-dev / openenv /pr_749 /en /tutorials /mcp-environment.html
download
raw
62.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;MCP Tools in OpenEnv Environments&quot;,&quot;local&quot;:&quot;mcp-tools-in-openenv-environments&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Why MCP?&quot;,&quot;local&quot;:&quot;why-mcp&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The dual API boundary&quot;,&quot;local&quot;:&quot;the-dual-api-boundary&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Using MCP Tools in a Training Loop&quot;,&quot;local&quot;:&quot;using-mcp-tools-in-a-training-loop&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Framework-agnostic rollout loop&quot;,&quot;local&quot;:&quot;framework-agnostic-rollout-loop&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TRL environment_factory&quot;,&quot;local&quot;:&quot;trl-environmentfactory&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Under the Hood: CallToolAction and ListToolsAction&quot;,&quot;local&quot;:&quot;under-the-hood-calltoolaction-and-listtoolsaction&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Discovering tools&quot;,&quot;local&quot;:&quot;discovering-tools&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Calling a tool&quot;,&quot;local&quot;:&quot;calling-a-tool&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Error handling&quot;,&quot;local&quot;:&quot;error-handling&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;step(CallToolAction(...)) vs call_tool()&quot;,&quot;local&quot;:&quot;stepcalltoolaction-vs-calltool&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Using MCP Tools for Evaluation&quot;,&quot;local&quot;:&quot;using-mcp-tools-for-evaluation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Building an MCP Environment&quot;,&quot;local&quot;:&quot;building-an-mcp-environment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Running the Demo End-to-End&quot;,&quot;local&quot;:&quot;running-the-demo-end-to-end&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next Steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/openenv/pr_749/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/entry/start.85477f45.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/scheduler.2b22cead.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/singletons.63566282.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/paths.dd876c7b.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/entry/app.51835dc5.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/preload-helper.0820fbc7.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/index.1a0e8013.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/nodes/0.167255c0.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/nodes/61.8d50551a.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/Heading.c0d3f116.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.21bcf336.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/CodeBlock.c8d73295.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;MCP Tools in OpenEnv Environments&quot;,&quot;local&quot;:&quot;mcp-tools-in-openenv-environments&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Why MCP?&quot;,&quot;local&quot;:&quot;why-mcp&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The dual API boundary&quot;,&quot;local&quot;:&quot;the-dual-api-boundary&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Using MCP Tools in a Training Loop&quot;,&quot;local&quot;:&quot;using-mcp-tools-in-a-training-loop&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Framework-agnostic rollout loop&quot;,&quot;local&quot;:&quot;framework-agnostic-rollout-loop&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TRL environment_factory&quot;,&quot;local&quot;:&quot;trl-environmentfactory&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Under the Hood: CallToolAction and ListToolsAction&quot;,&quot;local&quot;:&quot;under-the-hood-calltoolaction-and-listtoolsaction&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Discovering tools&quot;,&quot;local&quot;:&quot;discovering-tools&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Calling a tool&quot;,&quot;local&quot;:&quot;calling-a-tool&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Error handling&quot;,&quot;local&quot;:&quot;error-handling&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;step(CallToolAction(...)) vs call_tool()&quot;,&quot;local&quot;:&quot;stepcalltoolaction-vs-calltool&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Using MCP Tools for Evaluation&quot;,&quot;local&quot;:&quot;using-mcp-tools-for-evaluation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Building an MCP Environment&quot;,&quot;local&quot;:&quot;building-an-mcp-environment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Running the Demo End-to-End&quot;,&quot;local&quot;:&quot;running-the-demo-end-to-end&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next Steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="mcp-tools-in-openenv-environments" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mcp-tools-in-openenv-environments"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>MCP Tools in OpenEnv Environments</span></h1> <p data-svelte-h="svelte-10botz"><a href="https://colab.research.google.com/github/huggingface/OpenEnv/blob/main/examples/mcp_environment.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></p> <p data-svelte-h="svelte-1n4razi">Most agentic work ends up needing the same thing: a way for the model to <strong>call tools</strong> and receive structured feedback, whether that is during RL training or offline evaluation. OpenEnv standardises that surface with <strong><a href="https://modelcontextprotocol.io" rel="nofollow">MCP</a></strong> (Model Context Protocol), so the same tool interface works during training, eval, inference, and external serving. This tutorial covers the four paths you will walk in practice — wiring an MCP-backed environment into a training loop, using the same env for offline eval, inspecting the API underneath both of those, and building your own MCP environment when no existing one fits.</p> <h2 class="relative group"><a id="why-mcp" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-mcp"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why MCP?</span></h2> <p data-svelte-h="svelte-quer6q">If your tools are just local Python functions, you do not need MCP — pass them to your trainer directly (TRL, torchforge, whatever) and you are done. MCP earns its complexity when the tool surface has to exist as a <strong>process boundary</strong>, not a function call:</p> <ul data-svelte-h="svelte-bm0v14"><li><strong>The env runs elsewhere</strong> — in a Docker container, a Hugging Face Space, a remote server. MCP is the transport that crosses that boundary.</li> <li><strong>You want to reuse someone else’s env</strong> — the OpenEnv catalog, third-party envs, and community hubs all expose their tools over MCP, so the same env works in your training run without rewriting its interface.</li> <li><strong>You want the env to be callable by other agents</strong> — Claude Desktop, Cursor, inference servers, and any MCP-compatible client can plug into an MCP server. A private Python function doesn’t get that for free.</li> <li><strong>You need tool discovery and schemas</strong><code>list_tools()</code> + auto-generated JSON schemas are part of the protocol; models see them the same way they see any MCP server’s tools.</li></ul> <p data-svelte-h="svelte-17drqpy">In short: MCP is the answer when your env is more than a helper function in your training script — when the same tools have to be usable from training, inference, and external clients without maintaining three interfaces.</p> <h3 class="relative group"><a id="the-dual-api-boundary" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#the-dual-api-boundary"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The dual API boundary</span></h3> <p data-svelte-h="svelte-1x41ulf">Inside OpenEnv, MCP plays a specific role in a two-surface split:</p> <ul data-svelte-h="svelte-aayabq"><li><strong>Training / orchestration infrastructure</strong> uses the Gym-style control plane — <code>reset()</code>, <code>step()</code>, <code>state()</code> — over WebSocket (<code>/ws</code>). This is what the trainer needs to roll out episodes, compute rewards, and enforce termination.</li> <li><strong>Agents</strong> use MCP tools over the <code>/mcp</code> JSON-RPC endpoint. Tools are what the model calls to act on the world.</li></ul> <blockquote class="note" data-svelte-h="svelte-5bkwv7"><p>In simulation mode, MCP tool calls flow <strong>through</strong> <code>step()</code>. The trainer stays in control of timing, rewards, and termination; the MCP action types are just a standardised action schema. The <a href="../guides/mcp-environment-lifecycle">MCP environment lifecycle guide</a> covers the split in depth.</p></blockquote> <blockquote class="note" data-svelte-h="svelte-mj5sew"><p><strong>MCP adoption in OpenEnv is still in flight.</strong> <a href="https://github.com/huggingface/OpenEnv/blob/main/rfcs/003-mcp-support.md" rel="nofollow">RFC 003</a> proposes MCP as the standard interface for <em>all</em> agent-facing actions, but it is still <strong>In Review</strong>. Today only a handful of envs are MCP-backed: <code>echo_env</code> and <code>finqa_env</code> inherit from the canonical <code>openenv.core.env_server.mcp_environment.MCPEnvironment</code>; <code>calendar_env</code> uses a local wrapper with the same shape. The majority (<code>textarena_env</code> / Wordle, <code>openspiel_env</code>, <code>chess_env</code>, <code>browsergym_env</code>, and most others) still use custom action types that you pass through <code>env.step(CustomAction(...))</code> without MCP plumbing. Before using the patterns in this tutorial against a specific env, check whether it inherits from an <code>MCPEnvironment</code> base; if not, the env’s own action schema applies instead.</p></blockquote> <h2 class="relative group"><a id="using-mcp-tools-in-a-training-loop" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-mcp-tools-in-a-training-loop"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using MCP Tools in a Training Loop</span></h2> <p data-svelte-h="svelte-1hqq1e8">An MCP-backed env is consumed like any other OpenEnv env from the trainer’s side. At the atomic level, each agent action is:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->obs = env.step(CallToolAction(tool_name=..., arguments=...))
<span class="hljs-comment"># obs.result — runtime tool result object, or None on error</span>
<span class="hljs-comment"># obs.reward — env&#x27;s reward for this turn (may be None)</span>
<span class="hljs-comment"># obs.done — episode terminated</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1y18e54">That is the only MCP-specific piece. Everything around it — how the trainer generates actions, how tool schemas are surfaced to the model, how rewards are collected — belongs to your training framework, not to MCP.</p> <h3 class="relative group"><a id="framework-agnostic-rollout-loop" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#framework-agnostic-rollout-loop"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Framework-agnostic rollout loop</span></h3> <p data-svelte-h="svelte-1fffovk">If you drive the rollout yourself (a custom loop, <a href="https://github.com/pytorch-labs/torchforge" rel="nofollow">torchforge</a>, an external agent server), you own the full generation path and call <code>env.step()</code> directly:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->obs = env.reset()
total_reward = <span class="hljs-number">0.0</span>
<span class="hljs-keyword">for</span> turn <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(max_turns):
tool_call = model.decide(obs) <span class="hljs-comment"># your agent picks a tool + args from the latest observation</span>
obs = env.step(
CallToolAction(tool_name=tool_call.name, arguments=tool_call.arguments)
)
total_reward += obs.reward <span class="hljs-keyword">or</span> <span class="hljs-number">0.0</span>
<span class="hljs-keyword">if</span> obs.done:
<span class="hljs-keyword">break</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4f1vp6">Whatever policy / generation code you use, <code>env.step(CallToolAction(...))</code> is the only line that talks to the MCP env.</p> <h3 class="relative group"><a id="trl-environmentfactory" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl-environmentfactory"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TRL environment_factory</span></h3> <p data-svelte-h="svelte-lqu02z"><a href="https://huggingface.co/docs/trl" rel="nofollow">TRL</a>’s <code>GRPOTrainer</code> takes an <code>environment_factory</code> class whose public methods auto-register as discoverable tools — the trainer then handles the multi-turn generation loop for you. The <a href="wordle-grpo">Wordle GRPO tutorial</a> shows the full recipe (wrapper class, reward function, <code>GRPOTrainer</code> construction) with a non-MCP env. For an MCP-backed env, only the tool method bodies change; they call through to <code>env.step(CallToolAction(...))</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">echo</span>(<span class="hljs-params">self, message: <span class="hljs-built_in">str</span></span>) -&gt; <span class="hljs-built_in">str</span>:
<span class="hljs-string">&quot;&quot;&quot;Echo back a message.
Args:
message: The message to echo.
&quot;&quot;&quot;</span>
step_result = self.env.step(
CallToolAction(tool_name=<span class="hljs-string">&quot;echo_message&quot;</span>, arguments={<span class="hljs-string">&quot;message&quot;</span>: message})
)
obs = step_result.observation
self.reward = step_result.reward <span class="hljs-keyword">or</span> obs.reward <span class="hljs-keyword">or</span> <span class="hljs-number">0.0</span>
result = obs.result
<span class="hljs-keyword">return</span> result.data <span class="hljs-keyword">if</span> <span class="hljs-built_in">hasattr</span>(result, <span class="hljs-string">&quot;data&quot;</span>) <span class="hljs-keyword">else</span> result<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mn0mt2"><code>environment_factory</code> is a TRL API, not an MCP API. It works equally well with non-MCP envs (Wordle uses it with <code>TextArenaAction</code>), and MCP envs work equally well without it (the rollout-loop path above). They compose, but they are orthogonal.</p> <p data-svelte-h="svelte-1moqhug">The rest of this tutorial is for the other paths: the API <strong>underneath</strong> <code>env.step(CallToolAction(...))</code> (useful when you need the full observation or want to debug), <strong>using the same env for offline eval</strong>, and <strong>building your own MCP environment</strong> from scratch.</p> <h2 class="relative group"><a id="under-the-hood-calltoolaction-and-listtoolsaction" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#under-the-hood-calltoolaction-and-listtoolsaction"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Under the Hood: CallToolAction and ListToolsAction</span></h2> <p data-svelte-h="svelte-udspd8">The two MCP action types are <code>ListToolsAction</code> (discover what’s available) and <code>CallToolAction</code> (invoke one). They behave like any other Gym action — pass them to <code>step()</code> and inspect the returned observation.</p> <h3 class="relative group"><a id="discovering-tools" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#discovering-tools"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Discovering tools</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> echo_env.server.echo_environment <span class="hljs-keyword">import</span> EchoEnvironment
<span class="hljs-keyword">from</span> openenv.core.env_server.mcp_types <span class="hljs-keyword">import</span> ListToolsAction, ListToolsObservation
env = EchoEnvironment()
env.reset()
obs = env.step(ListToolsAction())
<span class="hljs-keyword">assert</span> <span class="hljs-built_in">isinstance</span>(obs, ListToolsObservation)
<span class="hljs-keyword">for</span> tool <span class="hljs-keyword">in</span> obs.tools:
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;<span class="hljs-subst">{tool.name}</span>: <span class="hljs-subst">{tool.description}</span>&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dfwx1i">Each <code>Tool</code> carries a <code>name</code>, a <code>description</code>, and an <code>input_schema</code> (JSON Schema) describing the accepted arguments. The schema is what lets a language-model agent know which parameters to fill in when it emits a tool call.</p> <h3 class="relative group"><a id="calling-a-tool" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#calling-a-tool"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Calling a tool</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.env_server.mcp_types <span class="hljs-keyword">import</span> CallToolAction, CallToolObservation
obs = env.step(
CallToolAction(
tool_name=<span class="hljs-string">&quot;echo_message&quot;</span>,
arguments={<span class="hljs-string">&quot;message&quot;</span>: <span class="hljs-string">&quot;Hello from MCP!&quot;</span>},
)
)
<span class="hljs-keyword">assert</span> <span class="hljs-built_in">isinstance</span>(obs, CallToolObservation)
<span class="hljs-built_in">print</span>(obs.tool_name) <span class="hljs-comment"># &quot;echo_message&quot;</span>
<span class="hljs-built_in">print</span>(obs.error) <span class="hljs-comment"># None</span>
result = obs.result
<span class="hljs-built_in">print</span>(result.data <span class="hljs-keyword">if</span> <span class="hljs-built_in">hasattr</span>(result, <span class="hljs-string">&quot;data&quot;</span>) <span class="hljs-keyword">else</span> result) <span class="hljs-comment"># &quot;Hello from MCP!&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19prv4s"><code>CallToolObservation.result</code> is typed as <code>Any</code> in OpenEnv. At runtime, FastMCP commonly returns a <code>fastmcp.client.client.CallToolResult</code> object with <code>.data</code>, <code>.structured_content</code>, and <code>.content</code> attributes, but JSON round-trips or custom environments can surface a plain dict or value instead. Treat <code>.data</code> as a convenience when it exists, not as an OpenEnv-defined wrapper type. <code>obs.error</code> carries <strong>every</strong> failure mode — transport errors, unknown tool names, malformed arguments, <strong>and</strong> exceptions raised from inside the tool function itself (as <code>ToolErrorType.EXECUTION_ERROR</code>). On an error, <code>obs.result</code> is <code>None</code>. Always branch on <code>obs.error is None</code> before reading a runtime result.</p> <h3 class="relative group"><a id="error-handling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#error-handling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Error handling</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->obs = env.step(
CallToolAction(tool_name=<span class="hljs-string">&quot;does_not_exist&quot;</span>, arguments={}),
)
<span class="hljs-keyword">assert</span> <span class="hljs-built_in">isinstance</span>(obs, CallToolObservation)
<span class="hljs-built_in">print</span>(obs.error.error_type) <span class="hljs-comment"># ToolErrorType.TOOL_NOT_FOUND</span>
<span class="hljs-built_in">print</span>(obs.error.message) <span class="hljs-comment"># human-readable message from FastMCP, e.g. &quot;Unknown tool: &#x27;does_not_exist&#x27;&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k0s96f">The <code>ToolError.error_type</code> enum (<code>TOOL_NOT_FOUND</code>, <code>INVALID_ARGS</code>, <code>EXECUTION_ERROR</code>, <code>TRANSPORT_ERROR</code>, <code>TIMEOUT</code>) lets training code distinguish between bugs in the agent, bugs in the environment, and transient infrastructure issues — which often warrant different reward signals.</p> <h3 class="relative group"><a id="stepcalltoolaction-vs-calltool" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stepcalltoolaction-vs-calltool"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>step(CallToolAction(...)) vs call_tool()</span></h3> <p data-svelte-h="svelte-13kkf3a">Environment clients that inherit from <code>MCPToolClient</code> (such as <code>EchoEnv</code> and <code>FinQAEnv</code>) expose a shorter <strong>async</strong> <code>await env.call_tool(&quot;name&quot;, arg=value)</code> helper for a running environment server. It returns the tool’s raw return value directly instead of a <code>CallToolObservation</code> — and it <strong>raises <code>RuntimeError</code></strong> on any tool error (transport failure, unknown tool, invalid arguments, or a tool exception), so you cannot branch on <code>error_type</code> without a <code>try/except</code>. Use <code>step(CallToolAction(...))</code> when you need the whole observation (reward, done, metadata, or graceful error classification); reach for <code>call_tool()</code> in async production scripts where the raw result is all you care about and a failure is allowed to propagate. The <a href="../guides/mcp-environment-lifecycle">lifecycle guide</a> covers the exact trade-offs.</p> <blockquote class="note" data-svelte-h="svelte-nm8t9w"><p><code>MCPToolClient</code> and its base <code>MCPClientBase</code> only support <code>mode=&quot;production&quot;</code>; construction raises <code>ValueError</code> for other modes. For direct in-process training or eval snippets like the ones above, call <code>env.step(CallToolAction(...))</code> on the environment class itself.</p></blockquote> <h2 class="relative group"><a id="using-mcp-tools-for-evaluation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-mcp-tools-for-evaluation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using MCP Tools for Evaluation</span></h2> <p data-svelte-h="svelte-fx4olr">The same mechanics work outside a training loop. For an offline eval — benchmarking a model’s tool use on a static dataset, regression-testing a deployed agent, or scoring a policy — drop the trainer and drive the step loop yourself:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> echo_env.server.echo_environment <span class="hljs-keyword">import</span> EchoEnvironment
<span class="hljs-keyword">from</span> openenv.core.env_server.mcp_types <span class="hljs-keyword">import</span> CallToolAction
env = EchoEnvironment()
env.reset()
results = []
<span class="hljs-keyword">for</span> sample <span class="hljs-keyword">in</span> eval_dataset:
tool_call = model.decide(sample) <span class="hljs-comment"># your agent picks a tool + arguments</span>
obs = env.step(
CallToolAction(tool_name=tool_call.name, arguments=tool_call.arguments),
)
results.append({
<span class="hljs-string">&quot;prompt&quot;</span>: sample.prompt,
<span class="hljs-string">&quot;reply&quot;</span>: (
obs.result.data <span class="hljs-keyword">if</span> obs.error <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span> <span class="hljs-keyword">and</span> <span class="hljs-built_in">hasattr</span>(obs.result, <span class="hljs-string">&quot;data&quot;</span>)
<span class="hljs-keyword">else</span> obs.result <span class="hljs-keyword">if</span> obs.error <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>
<span class="hljs-keyword">else</span> <span class="hljs-literal">None</span>
),
<span class="hljs-string">&quot;reward&quot;</span>: obs.reward <span class="hljs-keyword">or</span> <span class="hljs-number">0.0</span>,
<span class="hljs-string">&quot;error&quot;</span>: obs.error,
})
env.reset()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11upk12">Pair the loop with a scoring function of your choice — the <a href="../guides/rewards">Reward Design</a> guide covers common patterns (test-pass rate, LLM-as-judge quality, compliance gates) — and aggregate across the dataset. The eval harness integration in <code>src/openenv/core/evals/</code> is still evolving; until that bridge lands, this plain-Python loop is the canonical pattern.</p> <h2 class="relative group"><a id="building-an-mcp-environment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#building-an-mcp-environment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Building an MCP Environment</span></h2> <p data-svelte-h="svelte-1aps0tn">Reach for this path when no existing environment covers the tools your agent needs — e.g. a new coding sandbox, a game, a proprietary API wrapper. The provider side is small: subclass <code>MCPEnvironment</code>, create a <code>FastMCP</code> server, register tools with the <code>@mcp.tool</code> decorator, and pass the server to <code>super().__init__</code>. Here is the echo environment, trimmed from <a href="https://github.com/huggingface/OpenEnv/blob/main/envs/echo_env/server/echo_environment.py" rel="nofollow"><code>envs/echo_env/server/echo_environment.py</code></a> down to the parts this tutorial covers:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> uuid <span class="hljs-keyword">import</span> uuid4
<span class="hljs-keyword">from</span> fastmcp <span class="hljs-keyword">import</span> FastMCP
<span class="hljs-keyword">from</span> openenv.core.env_server.mcp_environment <span class="hljs-keyword">import</span> MCPEnvironment
<span class="hljs-keyword">from</span> openenv.core.env_server.types <span class="hljs-keyword">import</span> Action, Observation, State
<span class="hljs-keyword">class</span> <span class="hljs-title class_">EchoEnvironment</span>(<span class="hljs-title class_ inherited__">MCPEnvironment</span>):
SUPPORTS_CONCURRENT_SESSIONS = <span class="hljs-literal">True</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
mcp = FastMCP(<span class="hljs-string">&quot;echo_env&quot;</span>)
<span class="hljs-meta"> @mcp.tool</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">echo_message</span>(<span class="hljs-params">message: <span class="hljs-built_in">str</span></span>) -&gt; <span class="hljs-built_in">str</span>:
<span class="hljs-string">&quot;&quot;&quot;Echo back the provided message.
Args:
message: The message to echo back
Returns:
The same message that was provided
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> message
<span class="hljs-meta"> @mcp.tool</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">echo_with_length</span>(<span class="hljs-params">message: <span class="hljs-built_in">str</span></span>) -&gt; <span class="hljs-built_in">dict</span>:
<span class="hljs-string">&quot;&quot;&quot;Echo back the message with its length.
Args:
message: The message to echo back
Returns:
Dictionary with the message and its length
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> {<span class="hljs-string">&quot;message&quot;</span>: message, <span class="hljs-string">&quot;length&quot;</span>: <span class="hljs-built_in">len</span>(message)}
<span class="hljs-built_in">super</span>().__init__(mcp)
self._state = State(episode_id=<span class="hljs-built_in">str</span>(uuid4()), step_count=<span class="hljs-number">0</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">reset</span>(<span class="hljs-params">self, seed=<span class="hljs-literal">None</span>, episode_id=<span class="hljs-literal">None</span>, **kwargs</span>) -&gt; Observation:
self._state = State(episode_id=episode_id <span class="hljs-keyword">or</span> <span class="hljs-built_in">str</span>(uuid4()), step_count=<span class="hljs-number">0</span>)
<span class="hljs-keyword">return</span> Observation(done=<span class="hljs-literal">False</span>, reward=<span class="hljs-number">0.0</span>, metadata={<span class="hljs-string">&quot;status&quot;</span>: <span class="hljs-string">&quot;ready&quot;</span>})
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_step_impl</span>(<span class="hljs-params">self, action: Action, timeout_s=<span class="hljs-literal">None</span>, **kwargs</span>) -&gt; Observation:
<span class="hljs-comment"># Called for non-MCP actions. Echo exposes MCP tools only,</span>
<span class="hljs-comment"># so anything that isn&#x27;t ListToolsAction / CallToolAction is an error.</span>
<span class="hljs-keyword">return</span> Observation(
done=<span class="hljs-literal">False</span>,
reward=<span class="hljs-number">0.0</span>,
metadata={<span class="hljs-string">&quot;error&quot;</span>: <span class="hljs-string">f&quot;Unknown action type: <span class="hljs-subst">{<span class="hljs-built_in">type</span>(action).__name__}</span>&quot;</span>},
)
<span class="hljs-meta"> @property</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">state</span>(<span class="hljs-params">self</span>) -&gt; State:
<span class="hljs-keyword">return</span> self._state<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1tds2eg">A few things worth calling out:</p> <ul data-svelte-h="svelte-1q8h9ql"><li><strong>Docstring → schema.</strong> <code>FastMCP</code> inspects each tool’s signature and Google-style docstring to build the <code>input_schema</code> automatically. The <code>Args:</code> block becomes parameter descriptions, and type hints become JSON types. No hand-written schemas.</li> <li><strong>Reserved names.</strong> <code>reset</code>, <code>step</code>, <code>state</code>, and <code>close</code> are reserved and cannot be tool names — they belong to the infrastructure boundary. Trying to register a tool with one of those names raises at construction time.</li> <li><strong><code>_step_impl</code> is required, <code>step</code> is not.</strong> <code>MCPEnvironment.step</code> already routes <code>ListToolsAction</code> and <code>CallToolAction</code> through the FastMCP server for you. Your subclass only has to implement <code>_step_impl</code>, which the base class calls for any <strong>non-MCP</strong> action. In pure-MCP environments like Echo it just returns an error observation; in environments that mix tool calls with other action types (e.g. a terminal “submit” action) it’s where that extra dispatch lives.</li> <li><strong>Rewards and <code>done</code> still work.</strong> Because MCP actions flow through <code>step()</code>, you can compute rewards, flip <code>done</code>, and emit metadata just like in any other OpenEnv environment.</li></ul> <h2 class="relative group"><a id="running-the-demo-end-to-end" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#running-the-demo-end-to-end"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Running the Demo End-to-End</span></h2> <p data-svelte-h="svelte-1o3lzfy">The repo ships a self-contained walkthrough at <a href="https://github.com/huggingface/OpenEnv/blob/main/examples/echo_mcp_demo.py" rel="nofollow"><code>examples/echo_mcp_demo.py</code></a>. Run it directly from the repo root:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-bash "><!-- HTML_TAG_START -->PYTHONPATH=src:envs uv run python examples/echo_mcp_demo.py<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qv1npw">You will see the discovery call, two tool invocations, and an error case printed in sequence — the same four steps the “Under the hood” section covers, end-to-end against the real <code>EchoEnvironment</code>.</p> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next Steps</span></h2> <ul data-svelte-h="svelte-52azt9"><li><strong>End-to-end training recipe</strong> — the <a href="wordle-grpo">Wordle GRPO tutorial</a> walks through a full GRPO training run with <code>environment_factory</code>. The wrapper-class shape is the same for an MCP-backed env; inside each tool method, build a <code>CallToolAction(tool_name=..., arguments={...})</code> instead of Wordle’s single-field <code>TextArenaAction(message=guess)</code>.</li> <li><strong>MCP lifecycle details</strong> — the <a href="../guides/mcp-environment-lifecycle">MCP Environment Lifecycle guide</a> covers <code>step()</code> vs <code>step_async()</code>, the <code>call_tool()</code> convenience path, and common debugging questions.</li> <li><strong>A richer MCP environment</strong><a href="https://github.com/huggingface/OpenEnv/tree/main/envs/finqa_env" rel="nofollow"><code>envs/finqa_env/</code></a> shows tool calls participating in episode progression, rewards, and terminal submission — not just a stateless echo.</li> <li><strong>Design rationale</strong><a href="https://github.com/huggingface/OpenEnv/blob/main/rfcs/003-mcp-support.md" rel="nofollow">RFC 003</a> explains why OpenEnv picked MCP as the agent boundary and how tool-calling and CodeAct styles share the same plumbing.</li> <li><strong>Serving tools to an external agent</strong> — the <code>/mcp</code> JSON-RPC endpoint is available alongside <code>/ws</code> on any MCP environment server. Point an MCP-compatible client at it for production inference without going through the step loop. This direct path bypasses reward computation, step counts, and episode termination, and it exposes only registered MCP tools — not <code>reset</code>, <code>step</code>, or <code>state</code>.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/openenv/blob/main/docs/source/tutorials/mcp-environment.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1qwoa43 = {
assets: "/docs/openenv/pr_749/en",
base: "/docs/openenv/pr_749/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/openenv/pr_749/en/_app/immutable/entry/start.85477f45.js"),
import("/docs/openenv/pr_749/en/_app/immutable/entry/app.51835dc5.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 61],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
62.6 kB
·
Xet hash:
696bbe14d296842c6d0c94fab467d1fd86f3043645088d0be6087d88db31e75e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.