Buckets:
| import{s as Za,o as Ga,n as ba}from"../chunks/scheduler.7b731bd4.js";import{S as Xa,i as Ra,e as r,s as p,c as b,q as E,H as z,h as Fa,a as o,d as e,b as m,f as J,g as y,j as T,r as j,u as Y,k as as,l as t,m as c,n as w,t as x,o as M,p as _}from"../chunks/index.cc268345.js";import{C as Ha,H as ds,E as Va}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.bdb7db7f.js";import{D as fs}from"../chunks/Docstring.cf11c414.js";import{C as ya}from"../chunks/CodeBlock.472c8bc2.js";import{E as va}from"../chunks/ExampleCodeBlock.650468ee.js";function Da(N){let l,C="Usage:",u,i,h;return i=new ya({props:{code:"dHJhaW5lciUyMCUzRCUyMERQT1RyYWluZXIoLi4uKSUwQWNvbXBsZXRpb25zX2NhbGxiYWNrJTIwJTNEJTIwTG9nQ29tcGxldGlvbnNDYWxsYmFjayh0cmFpbmVyJTNEdHJhaW5lciklMEF0cmFpbmVyLmFkZF9jYWxsYmFjayhjb21wbGV0aW9uc19jYWxsYmFjayk=",highlighted:`trainer = DPOTrainer(...) | |
| completions_callback = LogCompletionsCallback(trainer=trainer) | |
| trainer.add_callback(completions_callback)`,wrap:!1}}),{c(){l=r("p"),l.textContent=C,u=p(),b(i.$$.fragment)},l(a){l=o(a,"P",{"data-svelte-h":!0}),T(l)!=="svelte-5wyjqd"&&(l.textContent=C),u=m(a),y(i.$$.fragment,a)},m(a,d){c(a,l,d),c(a,u,d),w(i,a,d),h=!0},p:ba,i(a){h||(x(i.$$.fragment,a),h=!0)},o(a){M(i.$$.fragment,a),h=!1},d(a){a&&(e(l),e(u)),_(i,a)}}}function Sa(N){let l,C="Example:",u,i,h;return i=new ya({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMEJFTUFDYWxsYmFjayUwQSUwQXRyYWluZXIlMjAlM0QlMjBUcmFpbmVyKC4uLiUyQyUyMGNhbGxiYWNrcyUzRCU1QkJFTUFDYWxsYmFjaygpJTVEKQ==",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> BEMACallback | |
| trainer = Trainer(..., callbacks=[BEMACallback()])`,wrap:!1}}),{c(){l=r("p"),l.textContent=C,u=p(),b(i.$$.fragment)},l(a){l=o(a,"P",{"data-svelte-h":!0}),T(l)!=="svelte-11lpom8"&&(l.textContent=C),u=m(a),y(i.$$.fragment,a)},m(a,d){c(a,l,d),c(a,u,d),w(i,a,d),h=!0},p:ba,i(a){h||(x(i.$$.fragment,a),h=!0)},o(a){M(i.$$.fragment,a),h=!1},d(a){a&&(e(l),e(u)),_(i,a)}}}function Pa(N){let l,C="Usage:",u,i,h;return i=new ya({props:{code:"JTIzJTIwVHJhY2luZyUyMG1vZGUlMjAoanVzdCUyMGxvZyUyMHByZWRpY3Rpb25zKSUwQXRyYWluZXIlMjAlM0QlMjBEUE9UcmFpbmVyKC4uLiklMEF3ZWF2ZV9jYWxsYmFjayUyMCUzRCUyMFdlYXZlVHJhY2VDYWxsYmFjayh0cmFpbmVyJTNEdHJhaW5lciklMjAlMjAlMjMlMjBwcm9qZWN0X25hbWUlMjBvcHRpb25hbCUwQXRyYWluZXIuYWRkX2NhbGxiYWNrKHdlYXZlX2NhbGxiYWNrKSUwQSUwQSUyMyUyME9yJTIwc3BlY2lmeSUyMGElMjBwcm9qZWN0JTIwbmFtZSUwQXdlYXZlX2NhbGxiYWNrJTIwJTNEJTIwV2VhdmVUcmFjZUNhbGxiYWNrKHRyYWluZXIlM0R0cmFpbmVyJTJDJTIwcHJvamVjdF9uYW1lJTNEJTIybXktbGxtLXRyYWluaW5nJTIyKSUwQXRyYWluZXIuYWRkX2NhbGxiYWNrKHdlYXZlX2NhbGxiYWNrKSUwQSUwQSUwQSUyMyUyMEV2YWx1YXRpb24lMjBtb2RlJTIwKGxvZyUyMHByZWRpY3Rpb25zJTIwJTJCJTIwc2NvcmVzJTIwJTJCJTIwc3VtbWFyeSklMEFkZWYlMjBhY2N1cmFjeV9zY29yZXIocHJvbXB0JTNBJTIwc3RyJTJDJTIwY29tcGxldGlvbiUzQSUyMHN0ciklMjAtJTNFJTIwZmxvYXQlM0ElMEElMjAlMjAlMjAlMjAlMjMlMjBZb3VyJTIwc2NvcmluZyUyMGxvZ2ljJTIwaGVyZSUyMChtZXRhZGF0YSUyMGF2YWlsYWJsZSUyMHZpYSUyMGV2YWxfYXR0cmlidXRlcyklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBzY29yZSUwQSUwQSUwQXdlYXZlX2NhbGxiYWNrJTIwJTNEJTIwV2VhdmVUcmFjZUNhbGxiYWNrKCUwQSUyMCUyMCUyMCUyMHRyYWluZXIlM0R0cmFpbmVyJTJDJTBBJTIwJTIwJTIwJTIwcHJvamVjdF9uYW1lJTNEJTIybXktbGxtLXRyYWluaW5nJTIyJTJDJTIwJTIwJTIzJTIwb3B0aW9uYWwlMjBhbmQlMjBuZWVkZWQlMjBvbmx5JTIwaWYlMjB3ZWF2ZSUyMGNsaWVudCUyMGlzJTIwbm90JTIwaW5pdGlhbGl6ZWQlMEElMjAlMjAlMjAlMjBzY29yZXJzJTNEJTdCJTIyYWNjdXJhY3klMjIlM0ElMjBhY2N1cmFjeV9zY29yZXIlN0QlMkMlMEEpJTBBdHJhaW5lci5hZGRfY2FsbGJhY2sod2VhdmVfY2FsbGJhY2sp",highlighted:`<span class="hljs-comment"># Tracing mode (just log predictions)</span> | |
| trainer = DPOTrainer(...) | |
| weave_callback = WeaveTraceCallback(trainer=trainer) <span class="hljs-comment"># project_name optional</span> | |
| trainer.add_callback(weave_callback) | |
| <span class="hljs-comment"># Or specify a project name</span> | |
| weave_callback = WeaveTraceCallback(trainer=trainer, project_name=<span class="hljs-string">"my-llm-training"</span>) | |
| trainer.add_callback(weave_callback) | |
| <span class="hljs-comment"># Evaluation mode (log predictions + scores + summary)</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">accuracy_scorer</span>(<span class="hljs-params">prompt: <span class="hljs-built_in">str</span>, completion: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-comment"># Your scoring logic here (metadata available via eval_attributes)</span> | |
| <span class="hljs-keyword">return</span> score | |
| weave_callback = WeaveTraceCallback( | |
| trainer=trainer, | |
| project_name=<span class="hljs-string">"my-llm-training"</span>, <span class="hljs-comment"># optional and needed only if weave client is not initialized</span> | |
| scorers={<span class="hljs-string">"accuracy"</span>: accuracy_scorer}, | |
| ) | |
| trainer.add_callback(weave_callback)`,wrap:!1}}),{c(){l=r("p"),l.textContent=C,u=p(),b(i.$$.fragment)},l(a){l=o(a,"P",{"data-svelte-h":!0}),T(l)!=="svelte-5wyjqd"&&(l.textContent=C),u=m(a),y(i.$$.fragment,a)},m(a,d){c(a,l,d),c(a,u,d),w(i,a,d),h=!0},p:ba,i(a){h||(x(i.$$.fragment,a),h=!0)},o(a){M(i.$$.fragment,a),h=!1},d(a){a&&(e(l),e(u)),_(i,a)}}}function Qa(N){let l,C,u,i,h,a,d,vs,X,bs,U,R,Vs,ts,wa="A <code>TrainerCallback</code> that displays the progress of training or evaluation using Rich.",ys,F,ws,W,H,Ds,es,xa='A <a href="https://huggingface.co/docs/transformers/main/en/main_classes/callback#transformers.TrainerCallback" rel="nofollow">TrainerCallback</a> that logs completions to Weights & Biases and/or Comet.',Ss,I,xs,V,Ms,f,D,Ps,ns,Ma=`A <a href="https://huggingface.co/docs/transformers/main/en/main_classes/callback#transformers.TrainerCallback" rel="nofollow">TrainerCallback</a> that implements <a href="https://huggingface.co/papers/2508.00180" rel="nofollow">BEMA</a> | |
| (Bias-Corrected Exponential Moving Average) by <a href="https://huggingface.co/abblock" rel="nofollow">Adam Block</a> and <a href="https://huggingface.co/cyrilzhang" rel="nofollow">Cyril | |
| Zhang</a>. Code from <a href="https://github.com/abblock/bema" rel="nofollow">https://github.com/abblock/bema</a> under MIT license.`,Qs,ls,Ks,_s,ja=`<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><msubsup><mi>θ</mi><mi>t</mi><mo mathvariant="normal" lspace="0em" rspace="0em">′</mo></msubsup><mo>=</mo><msub><mi>α</mi><mi>t</mi></msub><mo>⋅</mo><mo stretchy="false">(</mo><msub><mi>θ</mi><mi>t</mi></msub><mo>−</mo><msub><mi>θ</mi><mn>0</mn></msub><mo stretchy="false">)</mo><mo>+</mo><msub><mtext>EMA</mtext><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> | |
| \\theta_t' = \\alpha_t \\cdot (\\theta_t - \\theta_0) + \\text{EMA}_t | |
| </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.0489em;vertical-align:-0.247em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8019em;"><span style="top:-2.453em;margin-left:-0.0278em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.247em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:0.5945em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.0037em;">α</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0037em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">⋅</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">(</span><span class="mord"><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0278em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3011em;"><span style="top:-2.55em;margin-left:-0.0278em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:0.8333em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord text"><span class="mord">EMA</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span>`,Os,v,sa,ks,Wa='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>θ</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> \\theta_t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8444em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0278em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>',$s,qs,za='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>θ</mi><mn>0</mn></msub></mrow><annotation encoding="application/x-tex"> \\theta_0 </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8444em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3011em;"><span style="top:-2.55em;margin-left:-0.0278em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>',Cs,ps,_a="update_after",aa,Ts,Ja='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mtext>EMA</mtext><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> \\text{EMA}_t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8333em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord text"><span class="mord">EMA</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>',Es,js,Ya='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> \\alpha_t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.5806em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.0037em;">α</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0037em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>',Ws,zs,Aa='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex"> t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6151em;"></span><span class="mord mathnormal">t</span></span></span></span>',Js,Ys,Ua=`<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><msub><mi>α</mi><mi>t</mi></msub><mo>=</mo><mo stretchy="false">(</mo><mi>ρ</mi><mo>+</mo><mi>γ</mi><mo>⋅</mo><mi>t</mi><msup><mo stretchy="false">)</mo><mrow><mo>−</mo><mi>η</mi></mrow></msup><mi mathvariant="normal">.</mi></mrow><annotation encoding="application/x-tex"> | |
| \\alpha_t = (\\rho + \\gamma \\cdot t)^{-\\eta}. | |
| </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.5806em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.0037em;">α</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0037em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">(</span><span class="mord mathnormal">ρ</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:0.6389em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.05556em;">γ</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">⋅</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:1.0713em;vertical-align:-0.25em;"></span><span class="mord mathnormal">t</span><span class="mclose"><span class="mclose">)</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8213em;"><span style="top:-3.113em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mathnormal mtight" style="margin-right:0.03588em;">η</span></span></span></span></span></span></span></span></span><span class="mord">.</span></span></span></span></span>`,ta,ms,ea,As,Ba=`<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><msub><mtext>EMA</mtext><mi>t</mi></msub><mo>=</mo><mo stretchy="false">(</mo><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub><mo stretchy="false">)</mo><mo>⋅</mo><msub><mtext>EMA</mtext><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>+</mo><msub><mi>β</mi><mi>t</mi></msub><mo>⋅</mo><msub><mi>θ</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> | |
| \\text{EMA}_t = (1 - \\beta_t) \\cdot \\text{EMA}_{t-1} + \\beta_t \\cdot \\theta_t | |
| </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8333em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord text"><span class="mord">EMA</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">(</span><span class="mord">1</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.05278em;">β</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0528em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">⋅</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:0.8917em;vertical-align:-0.2083em;"></span><span class="mord"><span class="mord text"><span class="mord">EMA</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3011em;"><span style="top:-2.55em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.2083em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.05278em;">β</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0528em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">⋅</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:0.8444em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0278em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span>`,na,A,la,Us,Na='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>β</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> \\beta_t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.05278em;">β</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0528em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>',Bs,Ns,Ia='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex"> t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6151em;"></span><span class="mord mathnormal">t</span></span></span></span>',Is,Ls,La=`<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><msub><mi>β</mi><mi>t</mi></msub><mo>=</mo><mo stretchy="false">(</mo><mi>ρ</mi><mo>+</mo><mi>γ</mi><mo>⋅</mo><mi>t</mi><msup><mo stretchy="false">)</mo><mrow><mo>−</mo><mi>κ</mi></mrow></msup><mi mathvariant="normal">.</mi></mrow><annotation encoding="application/x-tex"> | |
| \\beta_t = (\\rho + \\gamma \\cdot t)^{-\\kappa}. | |
| </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.05278em;">β</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0528em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">(</span><span class="mord mathnormal">ρ</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:0.6389em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.05556em;">γ</span><span class="mspace" style="margin-right:0.2222em;"></span><span class="mbin">⋅</span><span class="mspace" style="margin-right:0.2222em;"></span></span><span class="base"><span class="strut" style="height:1.0713em;vertical-align:-0.25em;"></span><span class="mord mathnormal">t</span><span class="mclose"><span class="mclose">)</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8213em;"><span style="top:-3.113em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mathnormal mtight">κ</span></span></span></span></span></span></span></span></span><span class="mord">.</span></span></span></span></span>`,pa,L,Zs,S,Gs,g,P,ma,is,ka=`A <a href="https://huggingface.co/docs/transformers/main/en/main_classes/callback#transformers.TrainerCallback" rel="nofollow">TrainerCallback</a> that logs traces and evaluations to W&B Weave. The callback uses | |
| <a href="https://weave-docs.wandb.ai/guides/evaluation/evaluation_logger/" rel="nofollow">https://weave-docs.wandb.ai/guides/evaluation/evaluation_logger/</a> to log traces and evaluations at each evaluation | |
| step.`,ia,rs,$a="Supports two modes based on the <code>scorers</code> parameter:",ra,os,qa="<li><strong>Tracing Mode</strong> (when scorers=None): Logs predictions for data exploration and analysis</li> <li><strong>Evaluation Mode</strong> (when scorers provided): Logs predictions with scoring and summary metrics</li>",oa,cs,Ca="Both modes use Weave’s EvaluationLogger for structured, consistent data logging.",ca,hs,Ta=`The callback logs data during evaluation phases (<code>on_evaluate</code>) rather than training steps, making it more | |
| efficient and semantically correct. It gracefully handles missing weave installation by logging warnings and | |
| skipping weave-specific functionality. It also checks for existing weave clients before initializing new ones.`,ha,Z,ga,G,Q,ua,gs,Ea="Initialize Weave when training begins.",Xs,K,Rs,us,Fs;return h=new Ha({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),d=new ds({props:{title:"Callbacks",local:"callbacks",headingTag:"h1"}}),X=new ds({props:{title:"RichProgressCallback",local:"trl.RichProgressCallback",headingTag:"h2"}}),R=new fs({props:{name:"class trl.RichProgressCallback",anchor:"trl.RichProgressCallback",parameters:[],source:"https://github.com/huggingface/trl/blob/vr_5618/trl/trainer/callbacks.py#L143"}}),F=new ds({props:{title:"LogCompletionsCallback",local:"trl.LogCompletionsCallback",headingTag:"h2"}}),H=new fs({props:{name:"class trl.LogCompletionsCallback",anchor:"trl.LogCompletionsCallback",parameters:[{name:"trainer",val:": Trainer"},{name:"generation_config",val:": transformers.generation.configuration_utils.GenerationConfig | None = None"},{name:"num_prompts",val:": int | None = None"},{name:"freq",val:": int | None = None"}],parametersDescription:[{anchor:"trl.LogCompletionsCallback.trainer",description:`<strong>trainer</strong> (<code>Trainer</code>) — | |
| Trainer to which the callback will be attached. The trainer’s evaluation dataset must include a <code>"prompt"</code> | |
| column containing the prompts for generating completions.`,name:"trainer"},{anchor:"trl.LogCompletionsCallback.generation_config",description:`<strong>generation_config</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig" rel="nofollow">GenerationConfig</a>, <em>optional</em>) — | |
| The generation config to use for generating completions.`,name:"generation_config"},{anchor:"trl.LogCompletionsCallback.num_prompts",description:`<strong>num_prompts</strong> (<code>int</code>, <em>optional</em>) — | |
| The number of prompts to generate completions for. If not provided, defaults to the number of examples in | |
| the evaluation dataset.`,name:"num_prompts"},{anchor:"trl.LogCompletionsCallback.freq",description:`<strong>freq</strong> (<code>int</code>, <em>optional</em>) — | |
| The frequency at which to log completions. If not provided, defaults to the trainer’s <code>eval_steps</code>.`,name:"freq"}],source:"https://github.com/huggingface/trl/blob/vr_5618/trl/trainer/callbacks.py#L254"}}),I=new va({props:{anchor:"trl.LogCompletionsCallback.example",$$slots:{default:[Da]},$$scope:{ctx:N}}}),V=new ds({props:{title:"BEMACallback",local:"trl.BEMACallback",headingTag:"h2"}}),D=new fs({props:{name:"class trl.BEMACallback",anchor:"trl.BEMACallback",parameters:[{name:"update_freq",val:": int = 400"},{name:"ema_power",val:": float = 0.5"},{name:"bias_power",val:": float = 0.2"},{name:"lag",val:": int = 10"},{name:"update_after",val:": int = 0"},{name:"multiplier",val:": float = 1.0"},{name:"min_ema_multiplier",val:": float = 0.0"},{name:"device",val:": str = 'cpu'"}],parametersDescription:[{anchor:"trl.BEMACallback.update_freq",description:`<strong>update_freq</strong> (<code>int</code>, <em>optional</em>, defaults to <code>400</code>) — | |
| Update the BEMA weights every X steps. Denoted this as {@html "<span class="\\"katex\\""><span class="\\"katex-mathml\\""><math xmlns="\\"http://www.w3.org/1998/Math/MathML\\""><semantics><mrow><mi>ϕ</mi></mrow><annotation encoding="\\"application/x-tex\\""> \\\\phi </annotation></semantics></math></span><span class="\\"katex-html\\"" aria-hidden="\\"true\\""><span class="\\"base\\""><span class="\\"strut\\"" style="\\"height:0.8889em;vertical-align:-0.1944em;\\""></span><span class="\\"mord" mathnormal\\">ϕ</span></span></span></span>"} in the paper.`,name:"update_freq"},{anchor:"trl.BEMACallback.ema_power",description:`<strong>ema_power</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.5</code>) — | |
| Power for the EMA decay factor. Denoted {@html "<span class="\\"katex\\""><span class="\\"katex-mathml\\""><math xmlns="\\"http://www.w3.org/1998/Math/MathML\\""><semantics><mrow><mi>κ</mi></mrow><annotation encoding="\\"application/x-tex\\""> \\\\kappa </annotation></semantics></math></span><span class="\\"katex-html\\"" aria-hidden="\\"true\\""><span class="\\"base\\""><span class="\\"strut\\"" style="\\"height:0.4306em;\\""></span><span class="\\"mord" mathnormal\\">κ</span></span></span></span>"} in the paper. To disable EMA, set this to <code>0.0</code>.`,name:"ema_power"},{anchor:"trl.BEMACallback.bias_power",description:`<strong>bias_power</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.2</code>) — | |
| Power for the BEMA scaling factor. Denoted {@html "<span class="\\"katex\\""><span class="\\"katex-mathml\\""><math xmlns="\\"http://www.w3.org/1998/Math/MathML\\""><semantics><mrow><mi>η</mi></mrow><annotation encoding="\\"application/x-tex\\""> \\\\eta </annotation></semantics></math></span><span class="\\"katex-html\\"" aria-hidden="\\"true\\""><span class="\\"base\\""><span class="\\"strut\\"" style="\\"height:0.625em;vertical-align:-0.1944em;\\""></span><span class="\\"mord" mathnormal\\" style="\\"margin-right:0.03588em;\\"">η</span></span></span></span>"} in the paper. To disable BEMA, set this to <code>0.0</code>.`,name:"bias_power"},{anchor:"trl.BEMACallback.lag",description:`<strong>lag</strong> (<code>int</code>, <em>optional</em>, defaults to <code>10</code>) — | |
| Initial offset in the weight decay schedule that controls early-stage smoothness by acting as a virtual | |
| starting age for the updates. Denoted as {@html "<span class="\\"katex\\""><span class="\\"katex-mathml\\""><math xmlns="\\"http://www.w3.org/1998/Math/MathML\\""><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="\\"application/x-tex\\""> \\\\rho </annotation></semantics></math></span><span class="\\"katex-html\\"" aria-hidden="\\"true\\""><span class="\\"base\\""><span class="\\"strut\\"" style="\\"height:0.625em;vertical-align:-0.1944em;\\""></span><span class="\\"mord" mathnormal\\">ρ</span></span></span></span>"} in the paper.`,name:"lag"},{anchor:"trl.BEMACallback.update_after",description:`<strong>update_after</strong> (<code>int</code>, <em>optional</em>, defaults to <code>0</code>) — | |
| Burn-in time before starting to update the BEMA weights. Denoted {@html "<span class="\\"katex\\""><span class="\\"katex-mathml\\""><math xmlns="\\"http://www.w3.org/1998/Math/MathML\\""><semantics><mrow><mi>τ</mi></mrow><annotation encoding="\\"application/x-tex\\""> \\\\tau </annotation></semantics></math></span><span class="\\"katex-html\\"" aria-hidden="\\"true\\""><span class="\\"base\\""><span class="\\"strut\\"" style="\\"height:0.4306em;\\""></span><span class="\\"mord" mathnormal\\" style="\\"margin-right:0.1132em;\\"">τ</span></span></span></span>"} in the paper.`,name:"update_after"},{anchor:"trl.BEMACallback.multiplier",description:`<strong>multiplier</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) — | |
| Initial value for the EMA decay factor. Denoted as {@html "<span class="\\"katex\\""><span class="\\"katex-mathml\\""><math xmlns="\\"http://www.w3.org/1998/Math/MathML\\""><semantics><mrow><mi>γ</mi></mrow><annotation encoding="\\"application/x-tex\\""> \\\\gamma </annotation></semantics></math></span><span class="\\"katex-html\\"" aria-hidden="\\"true\\""><span class="\\"base\\""><span class="\\"strut\\"" style="\\"height:0.625em;vertical-align:-0.1944em;\\""></span><span class="\\"mord" mathnormal\\" style="\\"margin-right:0.05556em;\\"">γ</span></span></span></span>"} in the paper.`,name:"multiplier"},{anchor:"trl.BEMACallback.min_ema_multiplier",description:`<strong>min_ema_multiplier</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| Minimum value for the EMA decay factor.`,name:"min_ema_multiplier"},{anchor:"trl.BEMACallback.device",description:`<strong>device</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"cpu"</code>) — | |
| Device to use for the BEMA buffers, e.g. <code>"cpu"</code> or <code>"cuda"</code>. Note that in most cases, this device SHOULD | |
| BE DIFFERENT from the device used for training in order to avoid OOM.`,name:"device"}],source:"https://github.com/huggingface/trl/blob/vr_5618/trl/trainer/callbacks.py#L575"}}),L=new va({props:{anchor:"trl.BEMACallback.example",$$slots:{default:[Sa]},$$scope:{ctx:N}}}),S=new ds({props:{title:"WeaveCallback",local:"trl.WeaveCallback",headingTag:"h2"}}),P=new fs({props:{name:"class trl.WeaveCallback",anchor:"trl.WeaveCallback",parameters:[{name:"trainer",val:": Trainer"},{name:"project_name",val:": str | None = None"},{name:"scorers",val:": dict[str, callable] | None = None"},{name:"generation_config",val:": transformers.generation.configuration_utils.GenerationConfig | None = None"},{name:"num_prompts",val:": int | None = None"},{name:"dataset_name",val:": str = 'eval_dataset'"},{name:"model_name",val:": str | None = None"}],parametersDescription:[{anchor:"trl.WeaveCallback.trainer",description:`<strong>trainer</strong> (<code>Trainer</code>) — | |
| Trainer to which the callback will be attached. The trainer’s evaluation dataset must include a <code>"prompt"</code> | |
| column containing the prompts for generating completions.`,name:"trainer"},{anchor:"trl.WeaveCallback.project_name",description:`<strong>project_name</strong> (<code>str</code>, <em>optional</em>) — | |
| Name of the Weave project where data will be logged. If not provided, will try to use existing weave client | |
| or fall back to the active wandb run’s project name. Raises an error if none of these are available.`,name:"project_name"},{anchor:"trl.WeaveCallback.scorers",description:`<strong>scorers</strong> (<code>dict[str, Callable]</code>, <em>optional</em>) — | |
| Dictionary mapping scorer names to scorer functions. If <code>None</code>, operates in tracing mode (predictions | |
| only). If provided, operates in evaluation mode (predictions + scores + summary). Scorer functions should | |
| have signature: <code>scorer(prompt: str, completion: str) -> float | int</code>`,name:"scorers"},{anchor:"trl.WeaveCallback.generation_config",description:`<strong>generation_config</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig" rel="nofollow">GenerationConfig</a>, <em>optional</em>) — | |
| Generation config to use for generating completions.`,name:"generation_config"},{anchor:"trl.WeaveCallback.num_prompts",description:`<strong>num_prompts</strong> (<code>int</code> or <code>None</code>, <em>optional</em>) — | |
| Number of prompts to generate completions for. If not provided, defaults to the number of examples in the | |
| evaluation dataset.`,name:"num_prompts"},{anchor:"trl.WeaveCallback.dataset_name",description:`<strong>dataset_name</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"eval_dataset"</code>) — | |
| Name for the dataset metadata in Weave.`,name:"dataset_name"},{anchor:"trl.WeaveCallback.model_name",description:`<strong>model_name</strong> (<code>str</code>, <em>optional</em>) — | |
| Name for the model metadata in Weave. If not provided, attempts to extract from model config.`,name:"model_name"}],source:"https://github.com/huggingface/trl/blob/vr_5618/trl/trainer/callbacks.py#L346"}}),Z=new va({props:{anchor:"trl.WeaveCallback.example",$$slots:{default:[Pa]},$$scope:{ctx:N}}}),Q=new fs({props:{name:"on_train_begin",anchor:"trl.WeaveCallback.on_train_begin",parameters:[{name:"args",val:""},{name:"state",val:""},{name:"control",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/trl/blob/vr_5618/trl/trainer/callbacks.py#L477"}}),K=new Va({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/callbacks.md"}}),{c(){l=r("meta"),C=p(),u=r("p"),i=p(),b(h.$$.fragment),a=p(),b(d.$$.fragment),vs=p(),b(X.$$.fragment),bs=p(),U=r("div"),b(R.$$.fragment),Vs=p(),ts=r("p"),ts.innerHTML=wa,ys=p(),b(F.$$.fragment),ws=p(),W=r("div"),b(H.$$.fragment),Ds=p(),es=r("p"),es.innerHTML=xa,Ss=p(),b(I.$$.fragment),xs=p(),b(V.$$.fragment),Ms=p(),f=r("div"),b(D.$$.fragment),Ps=p(),ns=r("p"),ns.innerHTML=Ma,Qs=p(),ls=r("p"),Ks=E(`BEMA computes model weights that scale like: | |
| `),_s=new z(!1),Os=p(),v=r("p"),sa=E("where "),ks=new z(!1),$s=E(" is the current model weights, "),qs=new z(!1),Cs=E(` is a snapshot of the model weights at the | |
| first `),ps=r("code"),ps.textContent=_a,aa=E(" step, "),Ts=new z(!1),Es=E(" is the exponential moving average of the model weights, and"),js=new z(!1),Ws=E(" is a scaling factor that decays with the number of steps "),zs=new z(!1),Js=E(` as | |
| `),Ys=new z(!1),ta=p(),ms=r("p"),ea=E(`The EMA is computed as: | |
| `),As=new z(!1),na=p(),A=r("p"),la=E("where "),Us=new z(!1),Bs=E(" is a decay factor that decays with the number of steps "),Ns=new z(!1),Is=E(` as | |
| `),Ls=new z(!1),pa=p(),b(L.$$.fragment),Zs=p(),b(S.$$.fragment),Gs=p(),g=r("div"),b(P.$$.fragment),ma=p(),is=r("p"),is.innerHTML=ka,ia=p(),rs=r("p"),rs.innerHTML=$a,ra=p(),os=r("ul"),os.innerHTML=qa,oa=p(),cs=r("p"),cs.textContent=Ca,ca=p(),hs=r("p"),hs.innerHTML=Ta,ha=p(),b(Z.$$.fragment),ga=p(),G=r("div"),b(Q.$$.fragment),ua=p(),gs=r("p"),gs.textContent=Ea,Xs=p(),b(K.$$.fragment),Rs=p(),us=r("p"),this.h()},l(s){const n=Fa("svelte-u9bgzb",document.head);l=o(n,"META",{name:!0,content:!0}),n.forEach(e),C=m(s),u=o(s,"P",{}),J(u).forEach(e),i=m(s),y(h.$$.fragment,s),a=m(s),y(d.$$.fragment,s),vs=m(s),y(X.$$.fragment,s),bs=m(s),U=o(s,"DIV",{class:!0});var O=J(U);y(R.$$.fragment,O),Vs=m(O),ts=o(O,"P",{"data-svelte-h":!0}),T(ts)!=="svelte-4ypj8u"&&(ts.innerHTML=wa),O.forEach(e),ys=m(s),y(F.$$.fragment,s),ws=m(s),W=o(s,"DIV",{class:!0});var B=J(W);y(H.$$.fragment,B),Ds=m(B),es=o(B,"P",{"data-svelte-h":!0}),T(es)!=="svelte-1rlpx8d"&&(es.innerHTML=xa),Ss=m(B),y(I.$$.fragment,B),B.forEach(e),xs=m(s),y(V.$$.fragment,s),Ms=m(s),f=o(s,"DIV",{class:!0});var k=J(f);y(D.$$.fragment,k),Ps=m(k),ns=o(k,"P",{"data-svelte-h":!0}),T(ns)!=="svelte-1rpbmy"&&(ns.innerHTML=Ma),Qs=m(k),ls=o(k,"P",{});var da=J(ls);Ks=j(da,`BEMA computes model weights that scale like: | |
| `),_s=Y(da,!1),da.forEach(e),Os=m(k),v=o(k,"P",{});var q=J(v);sa=j(q,"where "),ks=Y(q,!1),$s=j(q," is the current model weights, "),qs=Y(q,!1),Cs=j(q,` is a snapshot of the model weights at the | |
| first `),ps=o(q,"CODE",{"data-svelte-h":!0}),T(ps)!=="svelte-1il68i0"&&(ps.textContent=_a),aa=j(q," step, "),Ts=Y(q,!1),Es=j(q," is the exponential moving average of the model weights, and"),js=Y(q,!1),Ws=j(q," is a scaling factor that decays with the number of steps "),zs=Y(q,!1),Js=j(q,` as | |
| `),Ys=Y(q,!1),q.forEach(e),ta=m(k),ms=o(k,"P",{});var fa=J(ms);ea=j(fa,`The EMA is computed as: | |
| `),As=Y(fa,!1),fa.forEach(e),na=m(k),A=o(k,"P",{});var ss=J(A);la=j(ss,"where "),Us=Y(ss,!1),Bs=j(ss," is a decay factor that decays with the number of steps "),Ns=Y(ss,!1),Is=j(ss,` as | |
| `),Ls=Y(ss,!1),ss.forEach(e),pa=m(k),y(L.$$.fragment,k),k.forEach(e),Zs=m(s),y(S.$$.fragment,s),Gs=m(s),g=o(s,"DIV",{class:!0});var $=J(g);y(P.$$.fragment,$),ma=m($),is=o($,"P",{"data-svelte-h":!0}),T(is)!=="svelte-r3tcia"&&(is.innerHTML=ka),ia=m($),rs=o($,"P",{"data-svelte-h":!0}),T(rs)!=="svelte-1vbdt3u"&&(rs.innerHTML=$a),ra=m($),os=o($,"UL",{"data-svelte-h":!0}),T(os)!=="svelte-15k626o"&&(os.innerHTML=qa),oa=m($),cs=o($,"P",{"data-svelte-h":!0}),T(cs)!=="svelte-15er84l"&&(cs.textContent=Ca),ca=m($),hs=o($,"P",{"data-svelte-h":!0}),T(hs)!=="svelte-1e96no5"&&(hs.innerHTML=Ta),ha=m($),y(Z.$$.fragment,$),ga=m($),G=o($,"DIV",{class:!0});var Hs=J(G);y(Q.$$.fragment,Hs),ua=m(Hs),gs=o(Hs,"P",{"data-svelte-h":!0}),T(gs)!=="svelte-nfuhma"&&(gs.textContent=Ea),Hs.forEach(e),$.forEach(e),Xs=m(s),y(K.$$.fragment,s),Rs=m(s),us=o(s,"P",{}),J(us).forEach(e),this.h()},h(){as(l,"name","hf:doc:metadata"),as(l,"content",Ka),as(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),as(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),_s.a=null,ks.a=$s,qs.a=Cs,Ts.a=Es,js.a=Ws,zs.a=Js,Ys.a=null,As.a=null,Us.a=Bs,Ns.a=Is,Ls.a=null,as(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),as(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),as(g,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(s,n){t(document.head,l),c(s,C,n),c(s,u,n),c(s,i,n),w(h,s,n),c(s,a,n),w(d,s,n),c(s,vs,n),w(X,s,n),c(s,bs,n),c(s,U,n),w(R,U,null),t(U,Vs),t(U,ts),c(s,ys,n),w(F,s,n),c(s,ws,n),c(s,W,n),w(H,W,null),t(W,Ds),t(W,es),t(W,Ss),w(I,W,null),c(s,xs,n),w(V,s,n),c(s,Ms,n),c(s,f,n),w(D,f,null),t(f,Ps),t(f,ns),t(f,Qs),t(f,ls),t(ls,Ks),_s.m(ja,ls),t(f,Os),t(f,v),t(v,sa),ks.m(Wa,v),t(v,$s),qs.m(za,v),t(v,Cs),t(v,ps),t(v,aa),Ts.m(Ja,v),t(v,Es),js.m(Ya,v),t(v,Ws),zs.m(Aa,v),t(v,Js),Ys.m(Ua,v),t(f,ta),t(f,ms),t(ms,ea),As.m(Ba,ms),t(f,na),t(f,A),t(A,la),Us.m(Na,A),t(A,Bs),Ns.m(Ia,A),t(A,Is),Ls.m(La,A),t(f,pa),w(L,f,null),c(s,Zs,n),w(S,s,n),c(s,Gs,n),c(s,g,n),w(P,g,null),t(g,ma),t(g,is),t(g,ia),t(g,rs),t(g,ra),t(g,os),t(g,oa),t(g,cs),t(g,ca),t(g,hs),t(g,ha),w(Z,g,null),t(g,ga),t(g,G),w(Q,G,null),t(G,ua),t(G,gs),c(s,Xs,n),w(K,s,n),c(s,Rs,n),c(s,us,n),Fs=!0},p(s,[n]){const O={};n&2&&(O.$$scope={dirty:n,ctx:s}),I.$set(O);const B={};n&2&&(B.$$scope={dirty:n,ctx:s}),L.$set(B);const k={};n&2&&(k.$$scope={dirty:n,ctx:s}),Z.$set(k)},i(s){Fs||(x(h.$$.fragment,s),x(d.$$.fragment,s),x(X.$$.fragment,s),x(R.$$.fragment,s),x(F.$$.fragment,s),x(H.$$.fragment,s),x(I.$$.fragment,s),x(V.$$.fragment,s),x(D.$$.fragment,s),x(L.$$.fragment,s),x(S.$$.fragment,s),x(P.$$.fragment,s),x(Z.$$.fragment,s),x(Q.$$.fragment,s),x(K.$$.fragment,s),Fs=!0)},o(s){M(h.$$.fragment,s),M(d.$$.fragment,s),M(X.$$.fragment,s),M(R.$$.fragment,s),M(F.$$.fragment,s),M(H.$$.fragment,s),M(I.$$.fragment,s),M(V.$$.fragment,s),M(D.$$.fragment,s),M(L.$$.fragment,s),M(S.$$.fragment,s),M(P.$$.fragment,s),M(Z.$$.fragment,s),M(Q.$$.fragment,s),M(K.$$.fragment,s),Fs=!1},d(s){s&&(e(C),e(u),e(i),e(a),e(vs),e(bs),e(U),e(ys),e(ws),e(W),e(xs),e(Ms),e(f),e(Zs),e(Gs),e(g),e(Xs),e(Rs),e(us)),e(l),_(h,s),_(d,s),_(X,s),_(R),_(F,s),_(H),_(I),_(V,s),_(D),_(L),_(S,s),_(P),_(Z),_(Q),_(K,s)}}}const Ka='{"title":"Callbacks","local":"callbacks","sections":[{"title":"RichProgressCallback","local":"trl.RichProgressCallback","sections":[],"depth":2},{"title":"LogCompletionsCallback","local":"trl.LogCompletionsCallback","sections":[],"depth":2},{"title":"BEMACallback","local":"trl.BEMACallback","sections":[],"depth":2},{"title":"WeaveCallback","local":"trl.WeaveCallback","sections":[],"depth":2}],"depth":1}';function Oa(N){return Ga(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class pt extends Xa{constructor(l){super(),Ra(this,l,Oa,Qa,Za,{})}}export{pt as component}; | |
Xet Storage Details
- Size:
- 48.4 kB
- Xet hash:
- 194c45976b58a8b9bdfeeb79e5bae1acd92ab859f872d218e7e7cf4f1c98891c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.