Buckets:
| import{s as Xs,o as Bs,n as fs}from"../chunks/scheduler.7b731bd4.js";import{S as Vs,i as Fs,e as g,s as r,c as d,q as Zs,H as Hs,h as Ys,a as u,d as e,b as i,f as S,g as y,j as x,r as Ws,u as Ss,k as E,l as h,m as p,n as w,t as M,o as f,p as b}from"../chunks/index.cc268345.js";import{C as Ds,H as ns,E as Ps}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{D as ws}from"../chunks/Docstring.03f7b462.js";import{C as bs}from"../chunks/CodeBlock.169a125f.js";import{E as Ms}from"../chunks/ExampleCodeBlock.415f9452.js";function Os(I){let t,v="Example:",o,l,m;return l=new bs({props:{code:"ZnJvbSUyMHRybC5yZXdhcmRzJTIwaW1wb3J0JTIwYWNjdXJhY3lfcmV3YXJkJTBBJTBBc29sdXRpb25zJTIwJTNEJTIwJTVCciUyMiU1Q2ZyYWMlN0IxJTdEJTdCMyU3RCUyMiUyQyUyMHIlMjIlNUNmcmFjJTdCMSU3RCU3QjMlN0QlMjIlNUQlMEFjb21wbGV0aW9ucyUyMCUzRCUyMCU1QiUwQSUyMCUyMCUyMCUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjAlMjJhc3Npc3RhbnQlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwciUyMk15JTIwYW5zd2VyJTIwaXMlMjAlNUNib3hlZCU3QiU1Q2ZyYWMlN0IxJTdEJTdCMyU3RCU3RCUyMiU3RCU1RCUyQyUwQSUyMCUyMCUyMCUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjAlMjJhc3Npc3RhbnQlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwciUyMk15JTIwYW5zd2VyJTIwaXMlMjAlNUNib3hlZCU3QiU1Q2ZyYWMlN0IxJTdEJTdCMiU3RCU3RCUyMiU3RCU1RCUyQyUwQSU1RCUwQWFjY3VyYWN5X3Jld2FyZChjb21wbGV0aW9ucyUyQyUyMHNvbHV0aW9ucyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> trl.rewards <span class="hljs-keyword">import</span> accuracy_reward | |
| <span class="hljs-meta">>>> </span>solutions = [<span class="hljs-string">r"\\frac{1}{3}"</span>, <span class="hljs-string">r"\\frac{1}{3}"</span>] | |
| <span class="hljs-meta">>>> </span>completions = [ | |
| <span class="hljs-meta">... </span> [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">r"My answer is \\boxed{\\frac{1}{3}}"</span>}], | |
| <span class="hljs-meta">... </span> [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">r"My answer is \\boxed{\\frac{1}{2}}"</span>}], | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>accuracy_reward(completions, solutions) | |
| [<span class="hljs-number">1.0</span>, <span class="hljs-number">0.0</span>]`,wrap:!1}}),{c(){t=g("p"),t.textContent=v,o=r(),d(l.$$.fragment)},l(a){t=u(a,"P",{"data-svelte-h":!0}),x(t)!=="svelte-11lpom8"&&(t.textContent=v),o=i(a),y(l.$$.fragment,a)},m(a,c){p(a,t,c),p(a,o,c),w(l,a,c),m=!0},p:fs,i(a){m||(M(l.$$.fragment,a),m=!0)},o(a){f(l.$$.fragment,a),m=!1},d(a){a&&(e(t),e(o)),b(l,a)}}}function Ks(I){let t,v="Example:",o,l,m;return l=new bs({props:{code:"ZnJvbSUyMHRybC5yZXdhcmRzJTIwaW1wb3J0JTIwcmVhc29uaW5nX2FjY3VyYWN5X3Jld2FyZCUwQSUwQXJlYXNvbmluZ19kZWxpbWl0ZXJzJTIwJTNEJTIwJTVCJTIyJTNDJTJGdGhpbmslM0UlMjIlNUQlMEFzb2x1dGlvbnMlMjAlM0QlMjAlNUJyJTIyJTVDZnJhYyU3QjElN0QlN0IzJTdEJTIyJTJDJTIwciUyMiU1Q2ZyYWMlN0IxJTdEJTdCMyU3RCUyMiUyQyUyMHIlMjIlNUNmcmFjJTdCMSU3RCU3QjMlN0QlMjIlNUQlMEFjb21wbGV0aW9ucyUyMCUzRCUyMCU1QiUwQSUyMCUyMCUyMCUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJvbGUlMjIlM0ElMjAlMjJhc3Npc3RhbnQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJjb250ZW50JTIyJTNBJTIwciUyMiUzQ3RoaW5rJTNFJTIwUmVhc29uaW5nJTIwY29udGVudCUyMCUzQyUyRnRoaW5rJTNFJTIwVGhlJTIwZmluYWwlMjBhbnN3ZXIlMjBpcyUyMCU1Q2JveGVkJTdCJTVDZnJhYyU3QjElN0QlN0IzJTdEJTdEJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmNvbnRlbnQlMjIlM0ElMjByJTIyJTNDdGhpbmslM0UlMjBSZWFzb25pbmclMjBjb250ZW50JTIwJTNDJTJGdGhpbmslM0UlMjBUaGUlMjBmaW5hbCUyMGFuc3dlciUyMGlzJTIwJTVDYm94ZWQlN0IlNUNmcmFjJTdCMSU3RCU3QjIlN0QlN0QlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJyb2xlJTIyJTNBJTIwJTIyYXNzaXN0YW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMHIlMjIlM0N0aGluayUzRSUyMFJlYXNvbmluZyUyMGNvbnRlbnQlMjB3aXRoJTIwcGFydGlhbCUyMGFuc3dlcnMlMjAlNUNib3hlZCU3QiU1Q2ZyYWMlN0IxJTdEJTdCMyU3RCU3RCUyMGJ1dCUyMG5vJTIwZmluYWwlMjBhbnN3ZXIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElNUQlMEFyZWFzb25pbmdfYWNjdXJhY3lfcmV3YXJkKGNvbXBsZXRpb25zJTJDJTIwc29sdXRpb25zJTJDJTIwcmVhc29uaW5nX2RlbGltaXRlcnMlM0RyZWFzb25pbmdfZGVsaW1pdGVycyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> trl.rewards <span class="hljs-keyword">import</span> reasoning_accuracy_reward | |
| <span class="hljs-meta">>>> </span>reasoning_delimiters = [<span class="hljs-string">"</think>"</span>] | |
| <span class="hljs-meta">>>> </span>solutions = [<span class="hljs-string">r"\\frac{1}{3}"</span>, <span class="hljs-string">r"\\frac{1}{3}"</span>, <span class="hljs-string">r"\\frac{1}{3}"</span>] | |
| <span class="hljs-meta">>>> </span>completions = [ | |
| <span class="hljs-meta">... </span> [ | |
| <span class="hljs-meta">... </span> { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"content"</span>: <span class="hljs-string">r"<think> Reasoning content </think> The final answer is \\boxed{\\frac{1}{3}}"</span>, | |
| <span class="hljs-meta">... </span> } | |
| <span class="hljs-meta">... </span> ], | |
| <span class="hljs-meta">... </span> [ | |
| <span class="hljs-meta">... </span> { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"content"</span>: <span class="hljs-string">r"<think> Reasoning content </think> The final answer is \\boxed{\\frac{1}{2}}"</span>, | |
| <span class="hljs-meta">... </span> } | |
| <span class="hljs-meta">... </span> ], | |
| <span class="hljs-meta">... </span> [ | |
| <span class="hljs-meta">... </span> { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"content"</span>: <span class="hljs-string">r"<think> Reasoning content with partial answers \\boxed{\\frac{1}{3}} but no final answer"</span>, | |
| <span class="hljs-meta">... </span> } | |
| <span class="hljs-meta">... </span> ], | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>reasoning_accuracy_reward(completions, solutions, reasoning_delimiters=reasoning_delimiters) | |
| [<span class="hljs-number">1.0</span>, <span class="hljs-number">0.0</span>, <span class="hljs-number">0.0</span>]`,wrap:!1}}),{c(){t=g("p"),t.textContent=v,o=r(),d(l.$$.fragment)},l(a){t=u(a,"P",{"data-svelte-h":!0}),x(t)!=="svelte-11lpom8"&&(t.textContent=v),o=i(a),y(l.$$.fragment,a)},m(a,c){p(a,t,c),p(a,o,c),w(l,a,c),m=!0},p:fs,i(a){m||(M(l.$$.fragment,a),m=!0)},o(a){f(l.$$.fragment,a),m=!1},d(a){a&&(e(t),e(o)),b(l,a)}}}function sa(I){let t,v="Example:",o,l,m;return l=new bs({props:{code:"ZnJvbSUyMHRybC5yZXdhcmRzJTIwaW1wb3J0JTIwdGhpbmtfZm9ybWF0X3Jld2FyZCUwQSUwQWNvbXBsZXRpb25zJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTVCJTdCJTIyY29udGVudCUyMiUzQSUyMCUyMiUzQ3RoaW5rJTNFJTVDblRoaXMlMjBpcyUyMG15JTIwcmVhc29uaW5nLiU1Q24lM0MlMkZ0aGluayUzRSU1Q25UaGlzJTIwaXMlMjBteSUyMGFuc3dlci4lMjIlN0QlNUQlMkMlMEElMjAlMjAlMjAlMjAlNUIlN0IlMjJjb250ZW50JTIyJTNBJTIwJTIyJTNDdGhpbmslM0UlNUNuVGhpcyUyMGlzJTIwbXklMjByZWFzb25pbmcuJTVDblRoaXMlMjBpcyUyMG15JTIwYW5zd2VyLiUyMiU3RCU1RCUyQyUwQSU1RCUwQXRoaW5rX2Zvcm1hdF9yZXdhcmQoY29tcGxldGlvbnMp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> trl.rewards <span class="hljs-keyword">import</span> think_format_reward | |
| <span class="hljs-meta">>>> </span>completions = [ | |
| <span class="hljs-meta">... </span> [{<span class="hljs-string">"content"</span>: <span class="hljs-string">"<think>\\nThis is my reasoning.\\n</think>\\nThis is my answer."</span>}], | |
| <span class="hljs-meta">... </span> [{<span class="hljs-string">"content"</span>: <span class="hljs-string">"<think>\\nThis is my reasoning.\\nThis is my answer."</span>}], | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>think_format_reward(completions) | |
| [<span class="hljs-number">1.0</span>, <span class="hljs-number">0.0</span>]`,wrap:!1}}),{c(){t=g("p"),t.textContent=v,o=r(),d(l.$$.fragment)},l(a){t=u(a,"P",{"data-svelte-h":!0}),x(t)!=="svelte-11lpom8"&&(t.textContent=v),o=i(a),y(l.$$.fragment,a)},m(a,c){p(a,t,c),p(a,o,c),w(l,a,c),m=!0},p:fs,i(a){m||(M(l.$$.fragment,a),m=!0)},o(a){f(l.$$.fragment,a),m=!1},d(a){a&&(e(t),e(o)),b(l,a)}}}function aa(I){let t,v="Example:",o,l,m;return l=new bs({props:{code:"ZnJvbSUyMHRybC5yZXdhcmRzJTIwaW1wb3J0JTIwZ2V0X3NvZnRfb3ZlcmxvbmdfcHVuaXNobWVudCUwQSUwQXNvZnRfb3ZlcmxvbmdfcHVuaXNobWVudCUyMCUzRCUyMGdldF9zb2Z0X292ZXJsb25nX3B1bmlzaG1lbnQobWF4X2NvbXBsZXRpb25fbGVuJTNEMTAwJTJDJTIwc29mdF9wdW5pc2hfY2FjaGUlM0QyMCklMEFjb21wbGV0aW9uX2lkcyUyMCUzRCUyMCU1QiU1QjElNUQlMjAqJTIwOTAlNUQlMjAlMjAlMjMlMjBzaW11bGF0aW5nJTIwYSUyMGNvbXBsZXRpb24lMjB3aXRoJTIwOTAlMjB0b2tlbnMuJTIwOTAlMjBpcyUyMGJldHdlZW4lMjA4MCUyMGFuZCUyMDEwMC4lMEFyZXdhcmRzJTIwJTNEJTIwc29mdF9vdmVybG9uZ19wdW5pc2htZW50KGNvbXBsZXRpb25faWRzKSUwQXByaW50KHJld2FyZHMpJTIwJTIwJTIzJTIwJTVCLTAuNSU1RA==",highlighted:`<span class="hljs-keyword">from</span> trl.rewards <span class="hljs-keyword">import</span> get_soft_overlong_punishment | |
| soft_overlong_punishment = get_soft_overlong_punishment(max_completion_len=<span class="hljs-number">100</span>, soft_punish_cache=<span class="hljs-number">20</span>) | |
| completion_ids = [[<span class="hljs-number">1</span>] * <span class="hljs-number">90</span>] <span class="hljs-comment"># simulating a completion with 90 tokens. 90 is between 80 and 100.</span> | |
| rewards = soft_overlong_punishment(completion_ids) | |
| <span class="hljs-built_in">print</span>(rewards) <span class="hljs-comment"># [-0.5]</span>`,wrap:!1}}),{c(){t=g("p"),t.textContent=v,o=r(),d(l.$$.fragment)},l(a){t=u(a,"P",{"data-svelte-h":!0}),x(t)!=="svelte-11lpom8"&&(t.textContent=v),o=i(a),y(l.$$.fragment,a)},m(a,c){p(a,t,c),p(a,o,c),w(l,a,c),m=!0},p:fs,i(a){m||(M(l.$$.fragment,a),m=!0)},o(a){f(l.$$.fragment,a),m=!1},d(a){a&&(e(t),e(o)),b(l,a)}}}function ta(I){let t,v,o,l,m,a,c,es,G,As='This module contains some useful reward functions, primarily intended for use with the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> and <a href="/docs/trl/pr_5607/en/rloo_trainer#trl.RLOOTrainer">RLOOTrainer</a>.',ls,Z,ps,j,W,vs,D,zs="Reward function that checks if the completion matches the ground truth.",js,P,Rs="<li>If both gold and prediction are parseable → use math verification.</li> <li>If gold is not parseable → return <code>None</code> to skip the example.</li>",Js,N,rs,L,is,J,X,Ts,O,Qs="Reward function that removes the reasoning content and checks if the final answer matches the ground truth.",Us,K,qs="<li>If both gold and prediction are parseable → use math verification.</li> <li>If gold is not parseable → return <code>None</code> to skip the example.</li>",_s,A,ms,B,os,T,V,$s,ss,Es=`Reward function that checks if the reasoning process is enclosed within <code>"<think>"</code> and <code>"</think>"</code> tags. The | |
| function returns a reward of 1.0 if the format is correct, otherwise 0.0.`,xs,z,cs,F,hs,U,H,Is,R,Cs,Q,Gs="https://huggingface.co/papers/2503.14476",ks,gs,Ls=`<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><msub><mi>R</mi><mtext>length</mtext></msub><mo stretchy="false">(</mo><mi>y</mi><mo stretchy="false">)</mo><mo>=</mo><mrow><mo fence="true">{</mo><mtable rowspacing="0.36em" columnalign="left left" columnspacing="1em"><mtr><mtd><mstyle scriptlevel="0" displaystyle="false"><mrow><mn>0</mn><mo separator="true">,</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel="0" displaystyle="false"><mrow><mi mathvariant="normal">∣</mi><mi>y</mi><mi mathvariant="normal">∣</mi><mo>≤</mo><msub><mi>L</mi><mi>max</mi><mo></mo></msub><mo>−</mo><msub><mi>L</mi><mtext>cache</mtext></msub></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel="0" displaystyle="false"><mrow><mstyle displaystyle="true" scriptlevel="0"><mfrac><mrow><mo stretchy="false">(</mo><msub><mi>L</mi><mi>max</mi><mo></mo></msub><mo>−</mo><msub><mi>L</mi><mtext>cache</mtext></msub><mo stretchy="false">)</mo><mo>−</mo><mi mathvariant="normal">∣</mi><mi>y</mi><mi mathvariant="normal">∣</mi></mrow><msub><mi>L</mi><mtext>cache</mtext></msub></mfrac></mstyle><mo separator="true">,</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel="0" displaystyle="false"><mrow><msub><mi>L</mi><mi>max</mi><mo></mo></msub><mo>−</mo><msub><mi>L</mi><mtext>cache</mtext></msub><mo><</mo><mi mathvariant="normal">∣</mi><mi>y</mi><mi mathvariant="normal">∣</mi><mo>≤</mo><msub><mi>L</mi><mi>max</mi><mo></mo></msub></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel="0" displaystyle="false"><mrow><mo>−</mo><mn>1</mn><mo separator="true">,</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel="0" displaystyle="false"><mrow><msub><mi>L</mi><mi>max</mi><mo></mo></msub><mo><</mo><mi mathvariant="normal">∣</mi><mi>y</mi><mi mathvariant="normal">∣</mi></mrow></mstyle></mtd></mtr></mtable></mrow></mrow><annotation encoding="application/x-tex"> | |
| R_{\\text{length}}(y) = \\begin{cases} | |
| 0, & |y| \\le L_{\\max} - L_{\\text{cache}} \\\\ | |
| \\dfrac{(L_{\\max} - L_{\\text{cache}}) - |y|}{L_{\\text{cache}}}, & L_{\\max} - L_{\\text{cache}} < |y| \\le L_{\\max} \\\\ | |
| -1, & L_{\\max} < |y| | |
| \\end{cases} | |
| </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.0361em;vertical-align:-0.2861em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.00773em;">R</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:-0.0077em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">length</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.2861em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:5.143em;vertical-align:-2.3215em;"></span><span class="minner"><span class="mopen"><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:2.65em;"><span style="top:-1.9em;"><span class="pstrut" style="height:3.15em;"></span><span class="delimsizinginner delim-size4"><span>⎩</span></span></span><span style="top:-1.892em;"><span class="pstrut" style="height:3.15em;"></span><span style="height:0.616em;width:0.8889em;"><svg xmlns="http://www.w3.org/2000/svg" width="0.8889em" height="0.616em" style="width:0.8889em" viewBox="0 0 888.89 616" preserveAspectRatio="xMinYMin"><path d="M384 0 H504 V616 H384z M384 0 H504 V616 H384z"/></svg></span></span><span style="top:-3.15em;"><span class="pstrut" style="height:3.15em;"></span><span class="delimsizinginner delim-size4"><span>⎨</span></span></span><span style="top:-4.292em;"><span class="pstrut" style="height:3.15em;"></span><span style="height:0.616em;width:0.8889em;"><svg xmlns="http://www.w3.org/2000/svg" width="0.8889em" height="0.616em" style="width:0.8889em" viewBox="0 0 888.89 616" preserveAspectRatio="xMinYMin"><path d="M384 0 H504 V616 H384z M384 0 H504 V616 H384z"/></svg></span></span><span style="top:-4.9em;"><span class="pstrut" style="height:3.15em;"></span><span class="delimsizinginner delim-size4"><span>⎧</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:2.15em;"><span></span></span></span></span></span></span><span class="mord"><span class="mtable"><span class="col-align-l"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:2.8215em;"><span style="top:-5.2405em;"><span class="pstrut" style="height:3.427em;"></span><span class="mord"><span class="mord">0</span><span class="mpunct">,</span></span></span><span style="top:-3.3815em;"><span class="pstrut" style="height:3.427em;"></span><span class="mord"><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.427em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">cache</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mopen">(</span><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mop mtight"><span class="mtight">m</span><span class="mtight">a</span><span class="mtight">x</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">cache</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mord">∣</span><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="mord">∣</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.836em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mpunct">,</span></span></span><span style="top:-1.5375em;"><span class="pstrut" style="height:3.427em;"></span><span class="mord"><span class="mord">−</span><span class="mord">1</span><span class="mpunct">,</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:2.3215em;"><span></span></span></span></span></span><span class="arraycolsep" style="width:1em;"></span><span class="col-align-l"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:2.8215em;"><span style="top:-5.2405em;"><span class="pstrut" style="height:3.427em;"></span><span class="mord"><span class="mord">∣</span><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="mord">∣</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">≤</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mop mtight"><span class="mtight">m</span><span class="mtight">a</span><span class="mtight">x</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">cache</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.3815em;"><span class="pstrut" style="height:3.427em;"></span><span class="mord"><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mop mtight"><span class="mtight">m</span><span class="mtight">a</span><span class="mtight">x</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">cache</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel"><</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mord">∣</span><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="mord">∣</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">≤</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mop mtight"><span class="mtight">m</span><span class="mtight">a</span><span class="mtight">x</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-1.5375em;"><span class="pstrut" style="height:3.427em;"></span><span class="mord"><span class="mord"><span class="mord mathnormal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mop mtight"><span class="mtight">m</span><span class="mtight">a</span><span class="mtight">x</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel"><</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mord">∣</span><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="mord">∣</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:2.3215em;"><span></span></span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span></span>`,Ns,q,ds,Y,us,as,ys;return m=new Ds({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),c=new ns({props:{title:"Reward Functions",local:"reward-functions",headingTag:"h1"}}),Z=new ns({props:{title:"accuracy_reward",local:"trl.rewards.accuracy_reward",headingTag:"h2"}}),W=new ws({props:{name:"trl.rewards.accuracy_reward",anchor:"trl.rewards.accuracy_reward",parameters:[{name:"completions",val:": list"},{name:"solution",val:": list"},{name:"log_extra",val:": collections.abc.Callable[[str, list], None] | None = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"trl.rewards.accuracy_reward.completions",description:`<strong>completions</strong> (<code>list[list[dict[str, str]]]</code>) — | |
| List of completions to be evaluated. Each completion must be a list of one message, i.e. a dictionary | |
| containing the key <code>"content"</code> with the value being the text of the completion.`,name:"completions"},{anchor:"trl.rewards.accuracy_reward.solution",description:`<strong>solution</strong> — (<code>list[str]</code>): | |
| List of the raw-text solutions to the questions/problems/prompts.`,name:"solution"},{anchor:"trl.rewards.accuracy_reward.log_extra",description:`<strong>log_extra</strong> (<code>callable</code>, <em>optional</em>) — | |
| Callable to log extra columns to the completions table, provided automatically by the trainer. Defaults to | |
| <code>None</code> to allow calling the function directly outside of a trainer (e.g., for testing).`,name:"log_extra"},{anchor:"trl.rewards.accuracy_reward.*kwargs",description:`*<strong>*kwargs</strong> — | |
| Additional keyword arguments. This function does not use them, but they are required in the function | |
| signature to ensure compatibility with trainers like <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a>.`,name:"*kwargs"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/rewards/accuracy_rewards.py#L27"}}),N=new Ms({props:{anchor:"trl.rewards.accuracy_reward.example",$$slots:{default:[Os]},$$scope:{ctx:I}}}),L=new ns({props:{title:"reasoning_accuracy_reward",local:"trl.rewards.reasoning_accuracy_reward",headingTag:"h2"}}),X=new ws({props:{name:"trl.rewards.reasoning_accuracy_reward",anchor:"trl.rewards.reasoning_accuracy_reward",parameters:[{name:"completions",val:": list"},{name:"solution",val:": list"},{name:"reasoning_delimiters",val:": list[str] | None = None"},{name:"log_extra",val:": collections.abc.Callable[[str, list], None] | None = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"trl.rewards.reasoning_accuracy_reward.completions",description:`<strong>completions</strong> (<code>list[list[dict[str, str]]]</code>) — | |
| List of completions to be evaluated. Each completion must be a list of one message, i.e. a dictionary | |
| containing the key <code>"content"</code> with the value being the text of the completion.`,name:"completions"},{anchor:"trl.rewards.reasoning_accuracy_reward.solution",description:`<strong>solution</strong> — (<code>list[str]</code>): | |
| List of the raw-text solutions to the questions/problems/prompts.`,name:"solution"},{anchor:"trl.rewards.reasoning_accuracy_reward.reasoning_delimiters",description:`<strong>reasoning_delimiters</strong> (<code>list[str]]</code>, <em>optional</em>) — | |
| List of strings indicating where the reasoning content ends. The final answer is assumed to be after the | |
| last occurrence of any of these delimiters. If <code>None</code>, defaults to <code>["</think>"]</code>.`,name:"reasoning_delimiters"},{anchor:"trl.rewards.reasoning_accuracy_reward.log_extra",description:`<strong>log_extra</strong> (<code>callable</code>, <em>optional</em>) — | |
| Callable to log extra columns to the completions table, provided automatically by the trainer. Defaults to | |
| <code>None</code> to allow calling the function directly outside of a trainer (e.g., for testing).`,name:"log_extra"},{anchor:"trl.rewards.reasoning_accuracy_reward.*kwargs",description:`*<strong>*kwargs</strong> — | |
| Additional keyword arguments. This function does not use them, but they are required in the function | |
| signature to ensure compatibility with trainers like <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a>.`,name:"*kwargs"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/rewards/accuracy_rewards.py#L117"}}),A=new Ms({props:{anchor:"trl.rewards.reasoning_accuracy_reward.example",$$slots:{default:[Ks]},$$scope:{ctx:I}}}),B=new ns({props:{title:"think_format_reward",local:"trl.rewards.think_format_reward",headingTag:"h2"}}),V=new ws({props:{name:"trl.rewards.think_format_reward",anchor:"trl.rewards.think_format_reward",parameters:[{name:"completions",val:": list"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"trl.rewards.think_format_reward.completions",description:`<strong>completions</strong> (<code>list[list[dict[str, str]]]</code>) — | |
| List of completions to be evaluated. Each completion must be a list of one message, i.e. a dictionary | |
| containing the key <code>"content"</code> with the value being the text of the completion.`,name:"completions"},{anchor:"trl.rewards.think_format_reward.*kwargs",description:`*<strong>*kwargs</strong> — | |
| Additional keyword arguments. This function does not use them, but they are required in the function | |
| signature to ensure compatibility with trainers like <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a>.`,name:"*kwargs"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/rewards/format_rewards.py#L18",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of rewards, where each reward is 1.0 if the completion matches the expected format, otherwise 0.0.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>list[float]</code></p> | |
| `}}),z=new Ms({props:{anchor:"trl.rewards.think_format_reward.example",$$slots:{default:[sa]},$$scope:{ctx:I}}}),F=new ns({props:{title:"get_soft_overlong_punishment",local:"trl.rewards.get_soft_overlong_punishment",headingTag:"h2"}}),H=new ws({props:{name:"trl.rewards.get_soft_overlong_punishment",anchor:"trl.rewards.get_soft_overlong_punishment",parameters:[{name:"max_completion_len",val:": int"},{name:"soft_punish_cache",val:": int"}],parametersDescription:[{anchor:"trl.rewards.get_soft_overlong_punishment.max_completion_len",description:`<strong>max_completion_len</strong> (<code>int</code>) — | |
| Maximum length of the completion, ( L_{\\max} ).`,name:"max_completion_len"},{anchor:"trl.rewards.get_soft_overlong_punishment.soft_punish_cache",description:`<strong>soft_punish_cache</strong> (<code>int</code>) — | |
| Minimum length of the completion, ( L_{\\text{cache}‌} ). If set to <code>0</code>, no minimum length is applied.`,name:"soft_punish_cache"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/rewards/other_rewards.py#L18"}}),q=new Ms({props:{anchor:"trl.rewards.get_soft_overlong_punishment.example",$$slots:{default:[aa]},$$scope:{ctx:I}}}),Y=new Ps({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/rewards.md"}}),{c(){t=g("meta"),v=r(),o=g("p"),l=r(),d(m.$$.fragment),a=r(),d(c.$$.fragment),es=r(),G=g("p"),G.innerHTML=As,ls=r(),d(Z.$$.fragment),ps=r(),j=g("div"),d(W.$$.fragment),vs=r(),D=g("p"),D.textContent=zs,js=r(),P=g("ul"),P.innerHTML=Rs,Js=r(),d(N.$$.fragment),rs=r(),d(L.$$.fragment),is=r(),J=g("div"),d(X.$$.fragment),Ts=r(),O=g("p"),O.textContent=Qs,Us=r(),K=g("ul"),K.innerHTML=qs,_s=r(),d(A.$$.fragment),ms=r(),d(B.$$.fragment),os=r(),T=g("div"),d(V.$$.fragment),$s=r(),ss=g("p"),ss.innerHTML=Es,xs=r(),d(z.$$.fragment),cs=r(),d(F.$$.fragment),hs=r(),U=g("div"),d(H.$$.fragment),Is=r(),R=g("p"),Cs=Zs(`Reward function that penalizes overlong completions. It is used to penalize overlong completions, but not to reward | |
| shorter completions. Reference: Eq. (13) from the DAPO paper (`),Q=g("a"),Q.textContent=Gs,ks=Zs(`) | |
| `),gs=new Hs(!1),Ns=r(),d(q.$$.fragment),ds=r(),d(Y.$$.fragment),us=r(),as=g("p"),this.h()},l(s){const n=Ys("svelte-u9bgzb",document.head);t=u(n,"META",{name:!0,content:!0}),n.forEach(e),v=i(s),o=u(s,"P",{}),S(o).forEach(e),l=i(s),y(m.$$.fragment,s),a=i(s),y(c.$$.fragment,s),es=i(s),G=u(s,"P",{"data-svelte-h":!0}),x(G)!=="svelte-34mox4"&&(G.innerHTML=As),ls=i(s),y(Z.$$.fragment,s),ps=i(s),j=u(s,"DIV",{class:!0});var _=S(j);y(W.$$.fragment,_),vs=i(_),D=u(_,"P",{"data-svelte-h":!0}),x(D)!=="svelte-xtr7df"&&(D.textContent=zs),js=i(_),P=u(_,"UL",{"data-svelte-h":!0}),x(P)!=="svelte-14gkls"&&(P.innerHTML=Rs),Js=i(_),y(N.$$.fragment,_),_.forEach(e),rs=i(s),y(L.$$.fragment,s),is=i(s),J=u(s,"DIV",{class:!0});var $=S(J);y(X.$$.fragment,$),Ts=i($),O=u($,"P",{"data-svelte-h":!0}),x(O)!=="svelte-1j1sigd"&&(O.textContent=Qs),Us=i($),K=u($,"UL",{"data-svelte-h":!0}),x(K)!=="svelte-14gkls"&&(K.innerHTML=qs),_s=i($),y(A.$$.fragment,$),$.forEach(e),ms=i(s),y(B.$$.fragment,s),os=i(s),T=u(s,"DIV",{class:!0});var C=S(T);y(V.$$.fragment,C),$s=i(C),ss=u(C,"P",{"data-svelte-h":!0}),x(ss)!=="svelte-nu0n1u"&&(ss.innerHTML=Es),xs=i(C),y(z.$$.fragment,C),C.forEach(e),cs=i(s),y(F.$$.fragment,s),hs=i(s),U=u(s,"DIV",{class:!0});var k=S(U);y(H.$$.fragment,k),Is=i(k),R=u(k,"P",{});var ts=S(R);Cs=Ws(ts,`Reward function that penalizes overlong completions. It is used to penalize overlong completions, but not to reward | |
| shorter completions. Reference: Eq. (13) from the DAPO paper (`),Q=u(ts,"A",{href:!0,rel:!0,"data-svelte-h":!0}),x(Q)!=="svelte-d6e55a"&&(Q.textContent=Gs),ks=Ws(ts,`) | |
| `),gs=Ss(ts,!1),ts.forEach(e),Ns=i(k),y(q.$$.fragment,k),k.forEach(e),ds=i(s),y(Y.$$.fragment,s),us=i(s),as=u(s,"P",{}),S(as).forEach(e),this.h()},h(){E(t,"name","hf:doc:metadata"),E(t,"content",na),E(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(Q,"href","https://huggingface.co/papers/2503.14476"),E(Q,"rel","nofollow"),gs.a=null,E(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(s,n){h(document.head,t),p(s,v,n),p(s,o,n),p(s,l,n),w(m,s,n),p(s,a,n),w(c,s,n),p(s,es,n),p(s,G,n),p(s,ls,n),w(Z,s,n),p(s,ps,n),p(s,j,n),w(W,j,null),h(j,vs),h(j,D),h(j,js),h(j,P),h(j,Js),w(N,j,null),p(s,rs,n),w(L,s,n),p(s,is,n),p(s,J,n),w(X,J,null),h(J,Ts),h(J,O),h(J,Us),h(J,K),h(J,_s),w(A,J,null),p(s,ms,n),w(B,s,n),p(s,os,n),p(s,T,n),w(V,T,null),h(T,$s),h(T,ss),h(T,xs),w(z,T,null),p(s,cs,n),w(F,s,n),p(s,hs,n),p(s,U,n),w(H,U,null),h(U,Is),h(U,R),h(R,Cs),h(R,Q),h(R,ks),gs.m(Ls,R),h(U,Ns),w(q,U,null),p(s,ds,n),w(Y,s,n),p(s,us,n),p(s,as,n),ys=!0},p(s,[n]){const _={};n&2&&(_.$$scope={dirty:n,ctx:s}),N.$set(_);const $={};n&2&&($.$$scope={dirty:n,ctx:s}),A.$set($);const C={};n&2&&(C.$$scope={dirty:n,ctx:s}),z.$set(C);const k={};n&2&&(k.$$scope={dirty:n,ctx:s}),q.$set(k)},i(s){ys||(M(m.$$.fragment,s),M(c.$$.fragment,s),M(Z.$$.fragment,s),M(W.$$.fragment,s),M(N.$$.fragment,s),M(L.$$.fragment,s),M(X.$$.fragment,s),M(A.$$.fragment,s),M(B.$$.fragment,s),M(V.$$.fragment,s),M(z.$$.fragment,s),M(F.$$.fragment,s),M(H.$$.fragment,s),M(q.$$.fragment,s),M(Y.$$.fragment,s),ys=!0)},o(s){f(m.$$.fragment,s),f(c.$$.fragment,s),f(Z.$$.fragment,s),f(W.$$.fragment,s),f(N.$$.fragment,s),f(L.$$.fragment,s),f(X.$$.fragment,s),f(A.$$.fragment,s),f(B.$$.fragment,s),f(V.$$.fragment,s),f(z.$$.fragment,s),f(F.$$.fragment,s),f(H.$$.fragment,s),f(q.$$.fragment,s),f(Y.$$.fragment,s),ys=!1},d(s){s&&(e(v),e(o),e(l),e(a),e(es),e(G),e(ls),e(ps),e(j),e(rs),e(is),e(J),e(ms),e(os),e(T),e(cs),e(hs),e(U),e(ds),e(us),e(as)),e(t),b(m,s),b(c,s),b(Z,s),b(W),b(N),b(L,s),b(X),b(A),b(B,s),b(V),b(z),b(F,s),b(H),b(q),b(Y,s)}}}const na='{"title":"Reward Functions","local":"reward-functions","sections":[{"title":"accuracy_reward","local":"trl.rewards.accuracy_reward","sections":[],"depth":2},{"title":"reasoning_accuracy_reward","local":"trl.rewards.reasoning_accuracy_reward","sections":[],"depth":2},{"title":"think_format_reward","local":"trl.rewards.think_format_reward","sections":[],"depth":2},{"title":"get_soft_overlong_punishment","local":"trl.rewards.get_soft_overlong_punishment","sections":[],"depth":2}],"depth":1}';function ea(I){return Bs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ca extends Vs{constructor(t){super(),Fs(this,t,ea,ta,Xs,{})}}export{ca as component}; | |
Xet Storage Details
- Size:
- 39.7 kB
- Xet hash:
- ef808f18099f5c6b1826f8aaae3ed6d77e511c8696365a0ffcb25d1c4a9728b8
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.