Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Rubrics: Composable Reward Computation","local":"rubrics-composable-reward-computation","sections":[{"title":"Why Rubrics?","local":"why-rubrics","sections":[],"depth":2},{"title":"Your First Rubric","local":"your-first-rubric","sections":[{"title":"Optional hooks for observability","local":"optional-hooks-for-observability","sections":[],"depth":3},{"title":"State dict","local":"state-dict","sections":[],"depth":3}],"depth":2},{"title":"Composing Rubrics","local":"composing-rubrics","sections":[{"title":"WeightedSum — multi-criteria averaging","local":"weightedsum--multi-criteria-averaging","sections":[],"depth":3},{"title":"Gate — hard constraints","local":"gate--hard-constraints","sections":[],"depth":3},{"title":"Sequential — fail-fast pipeline","local":"sequential--fail-fast-pipeline","sections":[],"depth":3},{"title":"RubricList and RubricDict — dynamic dispatch","local":"rubriclist-and-rubricdict--dynamic-dispatch","sections":[],"depth":3},{"title":"Introspection: named_rubrics()","local":"introspection-namedrubrics","sections":[],"depth":3}],"depth":2},{"title":"LLM-as-judge: LLMJudge","local":"llm-as-judge-llmjudge","sections":[],"depth":2},{"title":"Delayed Rewards: TrajectoryRubric","local":"delayed-rewards-trajectoryrubric","sections":[],"depth":2},{"title":"Wiring a Rubric into an Environment","local":"wiring-a-rubric-into-an-environment","sections":[{"title":"Inspecting rewards from training code","local":"inspecting-rewards-from-training-code","sections":[],"depth":3},{"title":"Where the reward ends up during training","local":"where-the-reward-ends-up-during-training","sections":[],"depth":3}],"depth":2},{"title":"Using Rubrics for Evaluation","local":"using-rubrics-for-evaluation","sections":[],"depth":2},{"title":"Next Steps","local":"next-steps","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/openenv/pr_749/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/entry/start.85477f45.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/scheduler.2b22cead.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/singletons.63566282.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/paths.dd876c7b.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/entry/app.51835dc5.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/preload-helper.0820fbc7.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/index.1a0e8013.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/nodes/0.167255c0.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/nodes/64.34c0d928.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/Heading.c0d3f116.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.21bcf336.js"> | |
| <link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/CodeBlock.c8d73295.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Rubrics: Composable Reward Computation","local":"rubrics-composable-reward-computation","sections":[{"title":"Why Rubrics?","local":"why-rubrics","sections":[],"depth":2},{"title":"Your First Rubric","local":"your-first-rubric","sections":[{"title":"Optional hooks for observability","local":"optional-hooks-for-observability","sections":[],"depth":3},{"title":"State dict","local":"state-dict","sections":[],"depth":3}],"depth":2},{"title":"Composing Rubrics","local":"composing-rubrics","sections":[{"title":"WeightedSum — multi-criteria averaging","local":"weightedsum--multi-criteria-averaging","sections":[],"depth":3},{"title":"Gate — hard constraints","local":"gate--hard-constraints","sections":[],"depth":3},{"title":"Sequential — fail-fast pipeline","local":"sequential--fail-fast-pipeline","sections":[],"depth":3},{"title":"RubricList and RubricDict — dynamic dispatch","local":"rubriclist-and-rubricdict--dynamic-dispatch","sections":[],"depth":3},{"title":"Introspection: named_rubrics()","local":"introspection-namedrubrics","sections":[],"depth":3}],"depth":2},{"title":"LLM-as-judge: LLMJudge","local":"llm-as-judge-llmjudge","sections":[],"depth":2},{"title":"Delayed Rewards: TrajectoryRubric","local":"delayed-rewards-trajectoryrubric","sections":[],"depth":2},{"title":"Wiring a Rubric into an Environment","local":"wiring-a-rubric-into-an-environment","sections":[{"title":"Inspecting rewards from training code","local":"inspecting-rewards-from-training-code","sections":[],"depth":3},{"title":"Where the reward ends up during training","local":"where-the-reward-ends-up-during-training","sections":[],"depth":3}],"depth":2},{"title":"Using Rubrics for Evaluation","local":"using-rubrics-for-evaluation","sections":[],"depth":2},{"title":"Next Steps","local":"next-steps","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="rubrics-composable-reward-computation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rubrics-composable-reward-computation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Rubrics: Composable Reward Computation</span></h1> <p data-svelte-h="svelte-1laso4n"><a href="https://colab.research.google.com/github/huggingface/OpenEnv/blob/main/examples/rubrics.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></p> <p data-svelte-h="svelte-1xlkudf">Rubrics are OpenEnv’s first-class abstraction for computing rewards. They let you build multi-criteria reward functions from small reusable pieces. This tutorial walks through the API end-to-end, from a one-line rubric to a full environment that introspects its reward signal at training time.</p> <h2 class="relative group"><a id="why-rubrics" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-rubrics"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why Rubrics?</span></h2> <p data-svelte-h="svelte-f9hwlt">Before rubrics, each environment rolled its own reward logic. Three pain points surfaced repeatedly:</p> <ol data-svelte-h="svelte-1qwlix3"><li><strong>No standard interface</strong>. Every environment author invented their own <code>compute_reward(...)</code> shape, so reusing a reward component across environments meant copy-pasting.</li> <li><strong>Multi-criteria evaluation was ad-hoc</strong>. “Code must compile, tests must pass, style matters a bit” becomes a tangle of nested <code>if</code>/<code>else</code> and hand-rolled weighted averages. There was no consistent way to ask <em>which</em> criterion caused a low reward.</li> <li><strong>LLM judges and sandboxed checks are slow</strong>. Without a framework-level concept of “reward component”, batch evaluation couldn’t parallelise the I/O-bound pieces.</li></ol> <p data-svelte-h="svelte-xru7q2">The Rubric API is small: you subclass, implement <code>forward</code>, and the framework gives you composition, introspection, and parallel evaluation for free.</p> <h2 class="relative group"><a id="your-first-rubric" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#your-first-rubric"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Your First Rubric</span></h2> <p data-svelte-h="svelte-olozwl">A rubric is a callable with a <code>forward(action, observation) -> float</code> method.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> Rubric | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">MessageLengthRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-string">"""Reward 1.0 if the message is 5–20 characters long, else 0.0."""</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| length = <span class="hljs-built_in">len</span>(action.message) | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">1.0</span> <span class="hljs-keyword">if</span> <span class="hljs-number">5</span> <= length <= <span class="hljs-number">20</span> <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-aunoh0">That’s the whole contract. Instantiate it and call it:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->rubric = MessageLengthRubric() | |
| score = rubric(action, observation) <span class="hljs-comment"># runs forward + hooks</span> | |
| <span class="hljs-built_in">print</span>(rubric.last_score) <span class="hljs-comment"># latest score is cached on the rubric</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jq8fpu"><code>Rubric.__call__</code> runs pre- and post-hooks around your <code>forward</code>, caches the result on <code>self.last_score</code>, and supports async <code>forward</code> implementations transparently. (If you’ve used <code>torch.nn.Module</code>, the subclass-and-implement-<code>forward</code> pattern will feel familiar — children assigned as instance attributes auto-register with the parent.)</p> <h3 class="relative group"><a id="optional-hooks-for-observability" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#optional-hooks-for-observability"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Optional hooks for observability</span></h3> <p data-svelte-h="svelte-17klfxh">You can attach hooks without subclassing — useful for logging every component’s score without polluting <code>forward</code>. Post-hooks run after <code>forward</code> completes and see the returned score; pre-hooks run before <code>forward</code> and are handy for input validation or instrumentation. When a rubric is async, hooks are awaited transparently.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">log_score</span>(<span class="hljs-params">rubric, action, obs, result</span>): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{<span class="hljs-built_in">type</span>(rubric).__name__}</span>: <span class="hljs-subst">{result:<span class="hljs-number">.2</span>f}</span>"</span>) | |
| rubric.register_forward_hook(log_score) <span class="hljs-comment"># fires after forward()</span> | |
| rubric.register_forward_pre_hook(<span class="hljs-keyword">lambda</span> r, a, o: <span class="hljs-literal">None</span>) <span class="hljs-comment"># fires before forward()</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="state-dict" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#state-dict"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>State dict</span></h3> <p data-svelte-h="svelte-1bw6joe">Rubrics implement <code>state_dict()</code> / <code>load_state_dict(state)</code> so their configuration (thresholds, prompt templates, etc.) can be serialised alongside model checkpoints. The default implementations return an empty dict — override them when your rubric has tunable parameters.</p> <h2 class="relative group"><a id="composing-rubrics" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#composing-rubrics"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Composing Rubrics</span></h2> <p data-svelte-h="svelte-klg2vy">The real power shows up when you stack rubrics. <code>openenv.core.rubrics</code> ships with four containers.</p> <h3 class="relative group"><a id="weightedsum--multi-criteria-averaging" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#weightedsum--multi-criteria-averaging"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>WeightedSum — multi-criteria averaging</span></h3> <p data-svelte-h="svelte-lobvd6">Use when several independent criteria each contribute to the final score.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> WeightedSum | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">TestsPassRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-keyword">return</span> observation.tests_passed / <span class="hljs-built_in">max</span>(observation.tests_total, <span class="hljs-number">1</span>) | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">StyleRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">1.0</span> <span class="hljs-keyword">if</span> action.code.count(<span class="hljs-string">"\n\n\n"</span>) == <span class="hljs-number">0</span> <span class="hljs-keyword">else</span> <span class="hljs-number">0.6</span> | |
| reward = WeightedSum( | |
| [TestsPassRubric(), StyleRubric()], | |
| weights=[<span class="hljs-number">0.7</span>, <span class="hljs-number">0.3</span>], | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-mhec84">Weights must sum to <code>1.0</code>. <code>WeightedSum</code> evaluates its children with <code>asyncio.gather</code> when any of them is async, so an LLM-backed child does not block the synchronous ones.</p> <h3 class="relative group"><a id="gate--hard-constraints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#gate--hard-constraints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Gate — hard constraints</span></h3> <p data-svelte-h="svelte-bdmrwx">Use when a child score below a threshold should short-circuit the reward to zero.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> Gate | |
| reward = Gate(TestsPassRubric(), threshold=<span class="hljs-number">0.5</span>) <span class="hljs-comment"># 0.0 if fewer than half the tests pass</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1yo4yv7"><code>Gate</code> returns <code>0.0</code> when the child score is below the threshold, and passes the child score through unchanged otherwise.</p> <h3 class="relative group"><a id="sequential--fail-fast-pipeline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sequential--fail-fast-pipeline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Sequential — fail-fast pipeline</span></h3> <p data-svelte-h="svelte-1cdt1bs">Use when criteria are ordered: a later criterion only matters if the earlier ones passed. Sequential returns <code>0.0</code> the moment any child returns <code>0.0</code> and does not evaluate the remaining children — great for gating expensive checks like sandboxed test runs or LLM calls.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> Sequential | |
| reward = Sequential( | |
| Gate(CompilesRubric(), threshold=<span class="hljs-number">1.0</span>), <span class="hljs-comment"># skip everything if it doesn't compile</span> | |
| Gate(TestsPassRubric(), threshold=<span class="hljs-number">0.5</span>), <span class="hljs-comment"># and skip style if tests are failing</span> | |
| WeightedSum([TestsPassRubric(), StyleRubric()], [<span class="hljs-number">0.7</span>, <span class="hljs-number">0.3</span>]), | |
| )<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="rubriclist-and-rubricdict--dynamic-dispatch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rubriclist-and-rubricdict--dynamic-dispatch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>RubricList and RubricDict — dynamic dispatch</span></h3> <p data-svelte-h="svelte-5pycgi">When the right rubric depends on the current observation (e.g. one rubric per game in a multi-game environment), wrap the options in a <code>RubricList</code> or <code>RubricDict</code> and dispatch in your parent rubric’s <code>forward</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> Rubric, RubricDict | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">MultiGameRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-built_in">super</span>().__init__() | |
| self.games = RubricDict({ | |
| <span class="hljs-string">"pong"</span>: PongRubric(), | |
| <span class="hljs-string">"breakout"</span>: BreakoutRubric(), | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-keyword">return</span> self.games[observation.game_id](action, observation)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-i67p6v"><code>RubricList</code> and <code>RubricDict</code> do not aggregate on their own — calling them directly raises. Their job is auto-registration (so their children show up in <code>named_rubrics()</code>) and indexed access. Reach for them when the parent rubric needs to pick a child <em>at runtime</em> based on the observation — if the set of children is fixed, plain attributes are simpler.</p> <h3 class="relative group"><a id="introspection-namedrubrics" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#introspection-namedrubrics"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Introspection: named_rubrics()</span></h3> <p data-svelte-h="svelte-id7406">Assigning a child rubric as an attribute auto-registers it with the parent. Training code can then walk the tree:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->composite = WeightedSum( | |
| [Gate(CompilesRubric(), <span class="hljs-number">1.0</span>), TestsPassRubric(), StyleRubric()], | |
| [<span class="hljs-number">0.2</span>, <span class="hljs-number">0.5</span>, <span class="hljs-number">0.3</span>], | |
| ) | |
| <span class="hljs-keyword">for</span> name, child <span class="hljs-keyword">in</span> composite.named_rubrics(): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{name:30s}</span> last_score=<span class="hljs-subst">{child.last_score}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1lhf4ru">After running the composite once, every component’s most recent score is cached on <code>last_score</code> — no manual bookkeeping.</p> <h2 class="relative group"><a id="llm-as-judge-llmjudge" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#llm-as-judge-llmjudge"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LLM-as-judge: LLMJudge</span></h2> <p data-svelte-h="svelte-jvs3uj">When a criterion is too subjective for a handwritten heuristic (“is this argument persuasive?”, “is this explanation clear?”), use an LLM as the judge. <code>LLMJudge</code> wraps an <code>LLMClient</code> with a prompt template and a score extractor.</p> <p data-svelte-h="svelte-156gl6o">Any OpenAI-compatible endpoint works: hosted OpenAI / Anthropic, or open-weight models served through vLLM, Ollama, Hugging Face Inference Providers, etc. Pick a client and hand it to <code>LLMJudge</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> os | |
| <span class="hljs-keyword">from</span> openenv.core.llm_client <span class="hljs-keyword">import</span> OpenAIClient, create_llm_client | |
| <span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> LLMJudge | |
| <span class="hljs-comment"># Option 1 — hosted OpenAI (the factory also supports "anthropic").</span> | |
| client = create_llm_client( | |
| <span class="hljs-string">"openai"</span>, | |
| model=<span class="hljs-string">"gpt-4.1-mini"</span>, | |
| api_key=os.environ[<span class="hljs-string">"OPENAI_API_KEY"</span>], | |
| ) | |
| <span class="hljs-comment"># Option 2 — open-weight model served via a local OpenAI-compatible endpoint</span> | |
| <span class="hljs-comment"># (vLLM, Ollama, Hugging Face Inference Providers, …). Point OpenAIClient</span> | |
| <span class="hljs-comment"># at the base URL and the model id the server exposes. `api_key` is optional</span> | |
| <span class="hljs-comment"># and defaults to "not-needed" for local endpoints.</span> | |
| client = OpenAIClient( | |
| endpoint=<span class="hljs-string">"http://localhost"</span>, | |
| port=<span class="hljs-number">8000</span>, | |
| model=<span class="hljs-string">"Qwen/Qwen3-1.7B"</span>, | |
| ) | |
| clarity_judge = LLMJudge( | |
| client=client, | |
| prompt_template=( | |
| <span class="hljs-string">"Rate the clarity of this explanation on a 0-10 scale. "</span> | |
| <span class="hljs-string">"Reply with the number only.\n\n"</span> | |
| <span class="hljs-string">"Explanation:\n{action}\n"</span> | |
| ), | |
| score_pattern=<span class="hljs-string">r"(\d+(?:\.\d+)?)"</span>, | |
| normalize=<span class="hljs-literal">True</span>, <span class="hljs-comment"># clamps extracted score to [0, 1]</span> | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1656tou"><code>LLMJudge.forward</code> is async. When you put it inside <code>WeightedSum</code> or <code>Sequential</code>, the container awaits it transparently. A few caveats worth stating up front:</p> <ul data-svelte-h="svelte-1hepnsr"><li><strong>Cost and latency</strong> scale with the number of episodes and the number of rubric calls per step. <code>Sequential</code> + <code>Gate</code> earlier in the pipeline is the usual answer.</li> <li><strong>Determinism</strong> is not free. Cache scores when you can, and consider temperature 0 for repeatable eval runs.</li> <li><strong>API keys</strong> belong in environment variables (<code>OPENAI_API_KEY</code>, <code>ANTHROPIC_API_KEY</code>, …), not in code that ships to the Hub.</li></ul> <h2 class="relative group"><a id="delayed-rewards-trajectoryrubric" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#delayed-rewards-trajectoryrubric"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Delayed Rewards: TrajectoryRubric</span></h2> <p data-svelte-h="svelte-etza88">Some signals only materialise at the end of an episode — chess win/loss, unit-test suite success, a goal reached after many steps. <code>TrajectoryRubric</code> accumulates <code>(action, observation)</code> pairs internally and only invokes your scoring logic on the terminal observation.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> TrajectoryRubric | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">WinLossRubric</span>(<span class="hljs-title class_ inherited__">TrajectoryRubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">score_trajectory</span>(<span class="hljs-params">self, trajectory</span>) -> <span class="hljs-built_in">float</span>: | |
| _, final_obs = trajectory[-<span class="hljs-number">1</span>] | |
| <span class="hljs-keyword">return</span> final_obs.reward <span class="hljs-comment"># +1 win, -1 loss, 0 draw</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">compute_step_rewards</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-comment"># Credit assignment: distribute the final score across steps however you like.</span> | |
| final = self.score_trajectory(self._trajectory) | |
| <span class="hljs-keyword">return</span> [final] * <span class="hljs-built_in">len</span>(self._trajectory)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-at62ia"><code>forward(action, obs)</code> returns <code>intermediate_reward</code> (default <code>0.0</code>) until <code>observation.done</code> is <code>True</code>, then calls <code>score_trajectory</code>. After the episode ends, call <code>rubric.compute_step_rewards()</code> to get one reward per step — same length as the trajectory. This is the hook for credit assignment: training code feeds these per-step rewards back into advantage estimation, return-to-go, or whatever your optimizer expects. <code>ExponentialDiscountingTrajectoryRubric</code> precomputes <code>gamma^(T-1-t) * final_score</code> for you; override <code>compute_step_rewards</code> in your subclass if you want a different strategy (all-to-last, equal split, task-specific shaping).</p> <blockquote class="caution" data-svelte-h="svelte-1dmzgc1"><p>If <code>observation.done</code> never becomes <code>True</code>, <code>score_trajectory</code> is never called and the trajectory grows unbounded in memory. Make sure <code>step</code> flips <code>done</code> on every terminal transition, and call <code>self._reset_rubric()</code> in <code>Environment.reset</code> so trajectories do not leak across episodes.</p></blockquote> <p data-svelte-h="svelte-yf2if4">For the common exponentially-discounted case, subclass <code>ExponentialDiscountingTrajectoryRubric</code> instead and only implement <code>score_trajectory</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> ExponentialDiscountingTrajectoryRubric | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">ChessOutcomeRubric</span>(<span class="hljs-title class_ inherited__">ExponentialDiscountingTrajectoryRubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">score_trajectory</span>(<span class="hljs-params">self, trajectory</span>) -> <span class="hljs-built_in">float</span>: | |
| _, final_obs = trajectory[-<span class="hljs-number">1</span>] | |
| <span class="hljs-keyword">return</span> final_obs.reward <span class="hljs-comment"># already +1 / 0 / -1 from the engine</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1egxi4w">This is exactly the pattern the built-in <code>envs/chess_env/</code> uses — see <code>envs/chess_env/server/rubrics.py</code> for the complete real-world example.</p> <blockquote class="caution" data-svelte-h="svelte-14pt5q7"><p>The <code>TrajectoryRubric</code> keeps the trajectory in CPU memory. If your observation carries GPU tensors (images, embeddings), detach and move them to CPU before returning from <code>step()</code> — otherwise the trajectory holds onto GPU memory across the whole episode.</p></blockquote> <h2 class="relative group"><a id="wiring-a-rubric-into-an-environment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wiring-a-rubric-into-an-environment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Wiring a Rubric into an Environment</span></h2> <p data-svelte-h="svelte-1wdgjtw">Rubrics are <strong>server-side</strong>. Each environment declares its rubric in <code>__init__</code>, and <code>step</code> runs it via the <code>_apply_rubric</code> helper. The base <code>Environment</code> class accepts the rubric through its constructor and stores it as <code>self.rubric</code>.</p> <p data-svelte-h="svelte-d7nldd">Here is a complete minimal environment that composes a <code>Sequential</code> gate-then-<code>WeightedSum</code> pipeline and exposes the reward through its observation:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> openenv.core.env_server.interfaces <span class="hljs-keyword">import</span> Environment | |
| <span class="hljs-keyword">from</span> openenv.core.env_server.types <span class="hljs-keyword">import</span> Action, Observation, State | |
| <span class="hljs-keyword">from</span> openenv.core.rubrics <span class="hljs-keyword">import</span> Gate, Rubric, Sequential, WeightedSum | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CodeAction</span>(<span class="hljs-title class_ inherited__">Action</span>): | |
| code: <span class="hljs-built_in">str</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CodeObservation</span>(<span class="hljs-title class_ inherited__">Observation</span>): | |
| compiles: <span class="hljs-built_in">bool</span> = <span class="hljs-literal">False</span> | |
| tests_passed: <span class="hljs-built_in">int</span> = <span class="hljs-number">0</span> | |
| tests_total: <span class="hljs-built_in">int</span> = <span class="hljs-number">0</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CodeState</span>(<span class="hljs-title class_ inherited__">State</span>): | |
| attempts: <span class="hljs-built_in">int</span> = <span class="hljs-number">0</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CompilesRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">1.0</span> <span class="hljs-keyword">if</span> observation.compiles <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">TestsPassRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-keyword">if</span> observation.tests_total == <span class="hljs-number">0</span>: | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-keyword">return</span> observation.tests_passed / observation.tests_total | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">StyleRubric</span>(<span class="hljs-title class_ inherited__">Rubric</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, action, observation</span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">1.0</span> <span class="hljs-keyword">if</span> action.code.count(<span class="hljs-string">"\n\n\n"</span>) == <span class="hljs-number">0</span> <span class="hljs-keyword">else</span> <span class="hljs-number">0.6</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">build_code_rubric</span>() -> Rubric: | |
| <span class="hljs-keyword">return</span> Sequential( | |
| Gate(CompilesRubric(), threshold=<span class="hljs-number">1.0</span>), <span class="hljs-comment"># gate everything on compilation</span> | |
| WeightedSum( | |
| [ | |
| TestsPassRubric(), | |
| StyleRubric(), | |
| ], | |
| weights=[<span class="hljs-number">0.7</span>, <span class="hljs-number">0.3</span>], | |
| ), | |
| ) | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CodeEnvironment</span>(Environment[CodeAction, CodeObservation, CodeState]): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-built_in">super</span>().__init__(rubric=build_code_rubric()) | |
| self._state = CodeState() | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">reset</span>(<span class="hljs-params">self, seed=<span class="hljs-literal">None</span>, episode_id=<span class="hljs-literal">None</span>, **kwargs</span>) -> CodeObservation: | |
| self._reset_rubric() <span class="hljs-comment"># clear any trajectory / cached last_score</span> | |
| self._state = CodeState() | |
| <span class="hljs-keyword">return</span> CodeObservation() | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">step</span>(<span class="hljs-params">self, action: CodeAction, timeout_s=<span class="hljs-literal">None</span>, **kwargs</span>) -> CodeObservation: | |
| self._state.attempts += <span class="hljs-number">1</span> | |
| obs = self._run_code(action) <span class="hljs-comment"># your domain-specific execution</span> | |
| obs.reward = self._apply_rubric(action, obs) | |
| <span class="hljs-keyword">return</span> obs | |
| <span class="hljs-meta"> @property</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">state</span>(<span class="hljs-params">self</span>) -> CodeState: | |
| <span class="hljs-keyword">return</span> self._state | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">_run_code</span>(<span class="hljs-params">self, action: CodeAction</span>) -> CodeObservation: | |
| <span class="hljs-comment"># Placeholder for whatever your environment actually does.</span> | |
| compiles = <span class="hljs-string">"def "</span> <span class="hljs-keyword">in</span> action.code | |
| <span class="hljs-keyword">return</span> CodeObservation( | |
| compiles=compiles, | |
| tests_passed=<span class="hljs-number">3</span> <span class="hljs-keyword">if</span> compiles <span class="hljs-keyword">else</span> <span class="hljs-number">0</span>, | |
| tests_total=<span class="hljs-number">3</span>, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gy5svv">The three pieces the base class expects from you:</p> <ol data-svelte-h="svelte-1r3ijbt"><li><strong>Pass the rubric to <code>super().__init__(rubric=...)</code></strong> so <code>self.rubric</code> is set.</li> <li><strong>Call <code>self._reset_rubric()</code> from <code>reset</code></strong> so trajectory state does not leak between episodes.</li> <li><strong>Call <code>self._apply_rubric(action, obs)</code> from <code>step</code></strong> and attach the result to <code>obs.reward</code>. There is also <code>_apply_rubric_async</code> for <code>step_async</code>.</li></ol> <blockquote class="note" data-svelte-h="svelte-9vcx4d"><p>Some environments already compute <code>obs.reward</code> from game mechanics or a handcrafted multi-component signal (see <code>envs/chess_env/</code> and <code>envs/carla_env/</code>). In that case, call <code>self._apply_rubric(action, obs)</code> without assigning its return value — the rubric still accumulates the trajectory for <code>compute_step_rewards()</code> and still exposes per-component scores via <code>named_rubrics()</code>, but <code>obs.reward</code> stays authoritative.</p></blockquote> <h3 class="relative group"><a id="inspecting-rewards-from-training-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inspecting-rewards-from-training-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inspecting rewards from training code</span></h3> <p data-svelte-h="svelte-1rkkkyv">Because children are auto-registered, the training loop can walk the rubric tree and log component-level diagnostics without the environment exposing a custom API:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->env = CodeEnvironment() | |
| obs = env.reset() | |
| obs = env.step(CodeAction(code=<span class="hljs-string">"def solution(): return 42"</span>)) | |
| <span class="hljs-keyword">for</span> name, component <span class="hljs-keyword">in</span> env.rubric.named_rubrics(): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{name:30s}</span> last_score=<span class="hljs-subst">{component.last_score:<span class="hljs-number">.2</span>f}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-kpezmh">That snippet works for <em>any</em> OpenEnv environment that sets <code>self.rubric</code>, regardless of whether the rubric is a single scalar or a deeply nested composition.</p> <h3 class="relative group"><a id="where-the-reward-ends-up-during-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#where-the-reward-ends-up-during-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Where the reward ends up during training</span></h3> <p data-svelte-h="svelte-1e8hy2q">Training frameworks consume the reward through the same channel as any other OpenEnv observation field: <code>step()</code> returns an <code>Observation</code> whose <code>reward</code> is the rubric’s output, and the client delivers it via <code>result.reward</code>.</p> <p data-svelte-h="svelte-orqibu">With <a href="https://huggingface.co/docs/trl/main/en/openenv" rel="nofollow">TRL</a>, the recommended path is <code>GRPOTrainer</code>’s <code>environment_factory</code>: you define a thin wrapper class with tool methods that call the OpenEnv client, store <code>self.reward = result.observation.reward</code> after each step, and a plain reward function reads it off the <code>environments</code> parameter. The <a href="https://huggingface.co/docs/trl/main/en/openenv" rel="nofollow">TRL OpenEnv integration guide</a> has the full recipe, and <a href="https://github.com/huggingface/trl/tree/main/examples/scripts/openenv" rel="nofollow"><code>examples/scripts/openenv/</code></a> ships ready-to-run scripts. The same observation shape works with <a href="https://github.com/pytorch-labs/torchforge" rel="nofollow">torchforge</a> and other OpenEnv-compatible training stacks.</p> <p data-svelte-h="svelte-rubsu0"><code>named_rubrics()</code> is orthogonal: use it to <strong>log per-component scores</strong> (to Weights & Biases, TensorBoard, trackio, …) while training, without changing the reward the optimiser sees.</p> <h2 class="relative group"><a id="using-rubrics-for-evaluation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-rubrics-for-evaluation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using Rubrics for Evaluation</span></h2> <p data-svelte-h="svelte-blel7i">A rubric is just a callable — nothing forces you to run it inside a training loop. Drop it into a for-loop over a static dataset and you have a multi-criteria scoring function for offline eval:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->rubric = build_code_rubric() | |
| scores = [] | |
| <span class="hljs-keyword">for</span> action, obs <span class="hljs-keyword">in</span> eval_dataset: | |
| scores.append(rubric(action, obs)) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"mean reward: <span class="hljs-subst">{<span class="hljs-built_in">sum</span>(scores) / <span class="hljs-built_in">len</span>(scores):<span class="hljs-number">.3</span>f}</span>"</span>) | |
| <span class="hljs-keyword">for</span> name, component <span class="hljs-keyword">in</span> rubric.named_rubrics(): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f" <span class="hljs-subst">{name:30s}</span> last_score=<span class="hljs-subst">{component.last_score:<span class="hljs-number">.3</span>f}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-7mo41g">The same rubric object used to compute training rewards doubles as the eval metric — one source of truth for “what is a good response”. Per-component <code>last_score</code> gives you a per-criterion breakdown for free (useful for regression dashboards and failure analysis). When a component like <code>LLMJudge</code> is async, wrap the loop with <code>asyncio.run(...)</code> and <code>await rubric(action, obs)</code> so the judge calls can overlap.</p> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next Steps</span></h2> <ul data-svelte-h="svelte-wcm2j7"><li><strong>Real-world trajectory example</strong> — walk through <code>envs/chess_env/server/rubrics.py</code> and <code>chess_environment.py</code> to see <code>ExponentialDiscountingTrajectoryRubric</code> wired into a game environment.</li> <li><strong>Design details</strong> — <a href="https://github.com/huggingface/OpenEnv/blob/main/rfcs/004-rubrics.md" rel="nofollow">RFC 004</a> covers the rationale for the composable API and the “rewards inside the environment” invariant.</li> <li><strong>Reward design basics</strong> — the <a href="../guides/rewards">Reward Design</a> guide covers sparse-vs-dense signals and common pitfalls that still apply on top of any rubric composition.</li> <li><strong>Training loop integration</strong> — see the <a href="../guides/rl-integration">RL Framework Integration</a> guide and the <a href="https://huggingface.co/docs/trl/main/en/openenv" rel="nofollow">TRL OpenEnv integration guide</a> for the recommended <code>environment_factory</code> pattern.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/openenv/blob/main/docs/source/tutorials/rubrics.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1qwoa43 = { | |
| assets: "/docs/openenv/pr_749/en", | |
| base: "/docs/openenv/pr_749/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/openenv/pr_749/en/_app/immutable/entry/start.85477f45.js"), | |
| import("/docs/openenv/pr_749/en/_app/immutable/entry/app.51835dc5.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 64], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 79.1 kB
- Xet hash:
- 576c48154214ff3f235a9792c74c26a7165f35b006145efa6211e12260d26cbd
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.