Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"NeMo Gym Integration","local":"nemo-gym-integration","sections":[{"title":"Why NeMo Gym","local":"why-nemo-gym","sections":[],"depth":2},{"title":"Available Environments","local":"available-environments","sections":[],"depth":2},{"title":"Before You Start","local":"before-you-start","sections":[{"title":"Install TRL and NeMo Gym","local":"install-trl-and-nemo-gym","sections":[],"depth":3},{"title":"Prepare a Dataset","local":"prepare-a-dataset","sections":[{"title":"Dataset Format","local":"dataset-format","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Interactive Training","local":"interactive-training","sections":[{"title":"Set Up","local":"set-up","sections":[],"depth":3},{"title":"Run Training","local":"run-training","sections":[],"depth":3}],"depth":2},{"title":"Multi-Node Training with Slurm","local":"multi-node-training-with-slurm","sections":[],"depth":2},{"title":"Multi-Environment Training","local":"multi-environment-training","sections":[],"depth":2},{"title":"Resources","local":"resources","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/trl/pr_5607/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/entry/start.151d81bd.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/scheduler.7b731bd4.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/singletons.2cf51804.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/index.ac28c20f.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/paths.ba01f37d.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/entry/app.3d9a91c0.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/preload-helper.e1689b3a.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/index.cc268345.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/nodes/0.cd288160.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/nodes/35.9fed32cd.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/CodeBlock.169a125f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"NeMo Gym Integration","local":"nemo-gym-integration","sections":[{"title":"Why NeMo Gym","local":"why-nemo-gym","sections":[],"depth":2},{"title":"Available Environments","local":"available-environments","sections":[],"depth":2},{"title":"Before You Start","local":"before-you-start","sections":[{"title":"Install TRL and NeMo Gym","local":"install-trl-and-nemo-gym","sections":[],"depth":3},{"title":"Prepare a Dataset","local":"prepare-a-dataset","sections":[{"title":"Dataset Format","local":"dataset-format","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Interactive Training","local":"interactive-training","sections":[{"title":"Set Up","local":"set-up","sections":[],"depth":3},{"title":"Run Training","local":"run-training","sections":[],"depth":3}],"depth":2},{"title":"Multi-Node Training with Slurm","local":"multi-node-training-with-slurm","sections":[],"depth":2},{"title":"Multi-Environment Training","local":"multi-environment-training","sections":[],"depth":2},{"title":"Resources","local":"resources","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="nemo-gym-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nemo-gym-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>NeMo Gym Integration</span></h1> <p data-svelte-h="svelte-7af6pp">NVIDIA NeMo Gym is a library for building RL environments for large language models. This integration enables training models in NeMo Gym environments using TRL’s GRPOTrainer with vLLM server mode.</p> <p data-svelte-h="svelte-iz4kji">The integration supports multi-step and multi-turn rollouts, multi-environment training, and any NeMo Gym environment (thoroughly tested: workplace assistant, reasoning gym, MCQA, and math with judge).</p> <h2 class="relative group"><a id="why-nemo-gym" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-nemo-gym"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why NeMo Gym</span></h2> <ul data-svelte-h="svelte-1yst2fh"><li><strong>Production-Ready Scale</strong>: Tested for frontier model training with diverse environments running in parallel across math, coding, tool use, reasoning, and more.</li> <li><strong>Multi-Verifier Training</strong>: Supports algorithmic verification, LLM-as-a-judge, and custom verification logic in a single training run.</li> <li><strong>Decoupled Architecture</strong>: Build agents and environments independently from the training loop—no RL framework expertise required.</li> <li><strong>OpenAI-Compatible API</strong>: All environments use the standardized OpenAI Responses API for seamless integration with vLLM, OpenAI models, and other endpoints.</li></ul> <h2 class="relative group"><a id="available-environments" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#available-environments"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Available Environments</span></h2> <p data-svelte-h="svelte-yszfde">NeMo Gym provides training-ready environments across multiple domains, including but not limited to:</p> <table data-svelte-h="svelte-13a2t9r"><thead><tr><th>Environment</th> <th>Domain</th> <th>Description</th></tr></thead> <tbody><tr><td>Workplace Assistant</td> <td>Agent</td> <td>Multi-step tool calling in common office scenarios (calendar, email, and more)</td></tr> <tr><td>Math with Judge</td> <td>Math</td> <td>Math problems with algorithmic or judge-based verification</td></tr> <tr><td>Code Gen</td> <td>Coding</td> <td>Competitive programming problems with code execution</td></tr> <tr><td>MCQA</td> <td>Knowledge</td> <td>Multiple-choice question answering</td></tr> <tr><td>Instruction Following</td> <td>Instruction Following</td> <td>IFEval/IFBench style tasks</td></tr> <tr><td>Reasoning Gym</td> <td>Multiple</td> <td>Single-step procedurally generated verifiable tasks across domains</td></tr></tbody></table> <p data-svelte-h="svelte-1ah28g6">For a complete list of available training environments, refer to the <a href="https://github.com/NVIDIA-NeMo/Gym#-available-resource-servers" rel="nofollow">NeMo Gym repository</a>.</p> <h2 class="relative group"><a id="before-you-start" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#before-you-start"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Before You Start</span></h2> <p data-svelte-h="svelte-j2hll8">Complete these one-time setup steps before running training.</p> <h3 class="relative group"><a id="install-trl-and-nemo-gym" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#install-trl-and-nemo-gym"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Install TRL and NeMo Gym</span></h3> <ol><li><p data-svelte-h="svelte-1fllfb8"><strong>Install TRL with vLLM extras</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> trl/ | |
| uv venv | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| uv <span class="hljs-built_in">sync</span> --extra vllm<!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-wroi5s"><strong>Install NeMo Gym</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># deactivate trl venv</span> | |
| deactivate | |
| git <span class="hljs-built_in">clone</span> https://github.com/NVIDIA-NeMo/Gym.git | |
| <span class="hljs-built_in">cd</span> Gym | |
| uv venv --python 3.12 | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| uv <span class="hljs-built_in">sync</span><!-- HTML_TAG_END --></pre></div></li></ol> <h3 class="relative group"><a id="prepare-a-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prepare-a-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prepare a Dataset</span></h3> <p data-svelte-h="svelte-xwoeod">Many NeMo Gym datasets used to train Nemotron models are available on Hugging Face. Use <code>ng_prepare_data</code> to download and prepare datasets. This command:</p> <ul data-svelte-h="svelte-1pj586x"><li>Downloads the dataset from Hugging Face</li> <li>Validates the data format</li> <li>Adds an <code>agent_ref</code> field to each example that tells NeMo Gym which agent server should handle that example</li></ul> <blockquote data-svelte-h="svelte-prptho"><p><strong>Note</strong>: <code>train_multi_environment.py</code> adds the <code>agent_ref</code> field when loading datasets, so this step is optional if datasets are created another way.</p></blockquote> <ol><li><p data-svelte-h="svelte-1ej3gnm"><strong>Set Hugging Face Token</strong></p> <p data-svelte-h="svelte-dolnl9">Create <code>env.yaml</code> in <code>Gym/</code> with your HF token:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">hf_token:</span> <span class="hljs-string"><your_hf_token></span><!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-1t97quy"><strong>Prepare Dataset</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Enter Gym and activate the venv</span> | |
| <span class="hljs-built_in">cd</span> Gym | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| <span class="hljs-comment"># Set config paths</span> | |
| config_paths=<span class="hljs-string">"responses_api_models/vllm_model/configs/vllm_model.yaml,\ | |
| resources_servers/workplace_assistant/configs/workplace_assistant.yaml"</span> | |
| <span class="hljs-comment"># Download data and prep for training</span> | |
| ng_prepare_data <span class="hljs-string">"+config_paths=[<span class="hljs-variable">${config_paths}</span>]"</span> \ | |
| +output_dirpath=data/workplace_assistant \ | |
| +mode=train_preparation \ | |
| +should_download=<span class="hljs-literal">true</span> \ | |
| +data_source=huggingface<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1a2biir">This creates <code>train.jsonl</code> and <code>validation.jsonl</code> files in <code>data/workplace_assistant/</code>.</p></li></ol> <p data-svelte-h="svelte-fceadt">To create a new environment, refer to the <a href="https://docs.nvidia.com/nemo/gym/latest/contribute/environments/new-environment.html" rel="nofollow">environment creation guide</a>. We suggest running an existing one first!</p> <h4 class="relative group"><a id="dataset-format" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dataset-format"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dataset Format</span></h4> <p data-svelte-h="svelte-1f9su50">NeMo Gym datasets are stored as JSONL. Each line contains a task with input messages, tool definitions, metadata such as ground truth for verification, and an agent server reference. The following example shows the workplace dataset structure. Metadata fields can differ between datasets, as long as the corresponding resources server uses the fields appropriately.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"responses_create_params"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"input"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span><span class="hljs-attr">"role"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"system"</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">"content"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"..."</span><span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">{</span><span class="hljs-attr">"role"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"user"</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">"content"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"Move any of jinsoo's tasks that are in review to completed"</span><span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"tools"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>...<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"parallel_tool_calls"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"temperature"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"ground_truth"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span><span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"project_management_update_task"</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">"arguments"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"{...}"</span><span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| ... | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"category"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"workbench_project_management"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"environment_name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"workbench"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"agent_ref"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"responses_api_agents"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"workplace_assistant_simple_agent"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="interactive-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#interactive-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Interactive Training</span></h2> <p data-svelte-h="svelte-doprer">For development and testing on a single node.</p> <h3 class="relative group"><a id="set-up" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#set-up"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Set Up</span></h3> <ol><li><p data-svelte-h="svelte-1jza2zv"><strong>Update Environment Config</strong></p> <p data-svelte-h="svelte-5yzoe6">Update <code>env.yaml</code> in <code>Gym/</code> to include model information:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">policy_base_url:</span> <span class="hljs-string">http://127.0.0.1:8000/v1</span> | |
| <span class="hljs-attr">policy_api_key:</span> <span class="hljs-string">EMPTY</span> | |
| <span class="hljs-attr">policy_model_name:</span> <span class="hljs-string">Qwen/Qwen2.5-1.5B-Instruct</span> | |
| <span class="hljs-attr">hf_token:</span> <span class="hljs-string">...</span><!-- HTML_TAG_END --></pre></div></li> <li data-svelte-h="svelte-18xbdkv"><p><strong>Update Training Config</strong></p> <p>Update <code>examples/scripts/nemo_gym/config.yaml</code> to point to the dataset generated above, and any other optional modifications.</p></li></ol> <h3 class="relative group"><a id="run-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#run-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Run Training</span></h3> <p data-svelte-h="svelte-1ji7mz">The following steps run in 3 terminals. It can also be ran with processes in the background, or using tmux.</p> <ol><li><p data-svelte-h="svelte-1i1pb51"><strong>Start NeMo Gym Servers</strong> (Terminal 1)</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> Gym/ | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| config_paths=<span class="hljs-string">"resources_servers/workplace_assistant/configs/workplace_assistant.yaml,\ | |
| responses_api_models/vllm_model/configs/vllm_model_for_training.yaml"</span> | |
| ng_run <span class="hljs-string">"+config_paths=[<span class="hljs-variable">${config_paths}</span>]"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1nv1hcv">This starts:</p> <ul data-svelte-h="svelte-1dlzpa1"><li><strong>Agent server</strong>: Orchestrates rollouts using resource servers and model servers</li> <li><strong>Resources server</strong>: Supports environment logic such as state-management, tool implementations, and task verification</li> <li><strong>Model server</strong>: Adapts vLLM server requests to support NeMo Gym agents and on-policy RL training while ensuring OpenAI API compatibility</li> <li><strong>Head server</strong>: Manages servers used in training enabling their discovery</li></ul></li> <li><p data-svelte-h="svelte-ntp91"><strong>Start TRL vLLM Server on GPU 0</strong> (Terminal 2)</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> trl/ | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| CUDA_VISIBLE_DEVICES=0 trl vllm-serve \ | |
| --model Qwen/Qwen2.5-1.5B-Instruct \ | |
| --max-model-len 16384 \ | |
| --host 0.0.0.0 \ | |
| --port 8000<!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-1d1ejwe"><strong>Run Training on GPU 1</strong> (Terminal 3)</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">source</span> trl/.venv/bin/activate | |
| <span class="hljs-built_in">cd</span> trl/examples/scripts/nemo_gym | |
| <span class="hljs-built_in">export</span> WANDB_API_KEY=... | |
| uv add omegaconf | |
| CUDA_VISIBLE_DEVICES=1 python train_multi_environment.py --config config.yaml<!-- HTML_TAG_END --></pre></div></li></ol> <h2 class="relative group"><a id="multi-node-training-with-slurm" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multi-node-training-with-slurm"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multi-Node Training with Slurm</span></h2> <p data-svelte-h="svelte-h26aax">An example five-node training script is provided in <code>submit.sh</code>. Nodes one through four run the training algorithm, while node five runs vLLM inference for NeMo Gym agent rollouts.</p> <ol><li data-svelte-h="svelte-skqx2a"><p><strong>Configure the Script</strong></p> <p>Update <code>submit.sh</code> with your Slurm account, partition, paths to your project directory, and updated training configs.</p></li> <li><p data-svelte-h="svelte-13q6911"><strong>Submit the Job</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->sbatch submit.sh<!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-i5tzjh"><strong>Monitor Training</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">tail</span> -f logs/<job_id>/*<!-- HTML_TAG_END --></pre></div></li></ol> <blockquote data-svelte-h="svelte-14ze16n"><p><strong>Tip</strong>: Set up wandb logging for detailed training metrics. For more details on TRL’s vLLM integration, refer to the vLLM integration page.</p></blockquote> <h2 class="relative group"><a id="multi-environment-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multi-environment-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multi-Environment Training</span></h2> <p data-svelte-h="svelte-b0bbwm">Train on multiple NeMo Gym environments simultaneously. This allows learning diverse capabilities, such as tool calling and math reasoning, in a single training run.</p> <ol><li><p data-svelte-h="svelte-1upr22m"><strong>Prepare Individual Datasets</strong></p> <p data-svelte-h="svelte-14v8vct">Prepare datasets for each environment. The workplace assistant dataset was prepared above. Now lets create a dataset for the mini sudoku environment implemented by the reasoning gym resources server in NeMo Gym:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> Gym | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| uv add reasoning-gym | |
| <span class="hljs-built_in">cd</span> resources_servers/reasoning_gym | |
| python scripts/create_dataset.py \ | |
| --task mini_sudoku \ | |
| --size 2000 \ | |
| --seed 42 \ | |
| --output data/reasoning_gym/train_mini_sudoku.jsonl | |
| python scripts/create_dataset.py \ | |
| --task mini_sudoku \ | |
| --size 50 \ | |
| --seed 24 \ | |
| --output data/reasoning_gym/val_mini_sudoku.jsonl<!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-1w7geiq"><strong>Create Combined Dataset</strong></p> <p data-svelte-h="svelte-1atlcqa">Combine datasets into a single file with tasks from both environments:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cat</span> data/workplace_assistant/train_workplace.jsonl data/reasoning_gym/train_mini_sudoku.jsonl | <span class="hljs-built_in">shuf</span> > train_multi_env.jsonl<!-- HTML_TAG_END --></pre></div> <blockquote data-svelte-h="svelte-3gj9yl"><p><strong>Tip</strong>: Ensure datasets are the same size before shuffling for an even blend of tasks. Repeat for the validation dataset.</p></blockquote></li> <li><p data-svelte-h="svelte-1r6uj0q"><strong>Update Training Config</strong></p> <p data-svelte-h="svelte-9zp78s">Update the config to point to the combined dataset:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">model_name:</span> <span class="hljs-string">"Qwen/Qwen3-4B-Instruct-2507"</span> | |
| <span class="hljs-attr">dataset_path:</span> <span class="hljs-string">"/path/to/data/train_multi_env.jsonl"</span> | |
| <span class="hljs-attr">eval_dataset_path:</span> <span class="hljs-string">"/path/to/data/val_multi_env.jsonl"</span> | |
| <span class="hljs-attr">task:</span> <span class="hljs-string">"workplace-sudoku"</span> <span class="hljs-comment"># used in wandb run name</span> | |
| <span class="hljs-attr">output_dir:</span> <span class="hljs-string">"outputs/nemo_gym_multi_env"</span> | |
| <span class="hljs-comment"># ... rest of config same</span><!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-1jr36l7"><strong>Update ng_run</strong></p> <p data-svelte-h="svelte-1l0rsb0">Whether training interactively or via Slurm, update the <code>ng_run</code> command to include config files from each resources server:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> Gym | |
| <span class="hljs-built_in">source</span> .venv/bin/activate | |
| config_paths=<span class="hljs-string">"responses_api_models/vllm_model/configs/vllm_model.yaml,\ | |
| resources_servers/workplace_assistant/configs/workplace_assistant.yaml,\ | |
| resources_servers/reasoning_gym/configs/reasoning_gym.yaml"</span> | |
| ng_run <span class="hljs-string">"+config_paths=[<span class="hljs-variable">${config_paths}</span>]"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1e41fqn">This starts servers for both environments. The training script automatically routes each example to the correct agent server based on its <code>agent_ref</code> field.</p></li> <li data-svelte-h="svelte-21uz4m"><p><strong>Run Training</strong></p> <p>Update the Slurm submission script to use the new training config and both <code>ng_run</code> resources server configs, then submit the job as before.</p> <p>The training script reads <code>agent_ref</code> from each example’s metadata, routes requests to the correct NeMo Gym agent server, and handles different agents and environments in the same batch.</p></li></ol> <h2 class="relative group"><a id="resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Resources</span></h2> <ul data-svelte-h="svelte-3m8o0j"><li><a href="https://github.com/NVIDIA-NeMo/Gym" rel="nofollow">NeMo Gym GitHub</a></li> <li><a href="https://docs.nvidia.com/nemo/gym/latest/" rel="nofollow">NeMo Gym Documentation</a></li> <li><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/nemo_gym/train_multi_environment.py" rel="nofollow">Training Script</a></li> <li><a href="grpo_trainer">TRL GRPO Trainer</a></li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/trl/blob/main/docs/source/nemo_gym.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1hqaf25 = { | |
| assets: "/docs/trl/pr_5607/en", | |
| base: "/docs/trl/pr_5607/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/trl/pr_5607/en/_app/immutable/entry/start.151d81bd.js"), | |
| import("/docs/trl/pr_5607/en/_app/immutable/entry/app.3d9a91c0.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 35], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 58.5 kB
- Xet hash:
- cc583d29f37bfcdacec590ee8e311da7aff8e616f8d69bd59fd4212521139687
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.