Buckets:

HuggingFaceDocBuilder's picture
download
raw
10.5 kB
import{s as he,o as ye,n as Q}from"../chunks/scheduler.7b731bd4.js";import{S as we,i as _e,e as T,s as p,c as $,h as be,a as M,d as s,b as f,f as oe,g,j as J,k as de,l as X,m as a,n as u,t as c,o as d,p as h}from"../chunks/index.cc268345.js";import{C as Te,H as fe,E as Me}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{C as I}from"../chunks/CodeBlock.169a125f.js";import{H as ke,a as B}from"../chunks/HfOption.9f04abd1.js";function Ce(b){let n,i;return n=new I({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBTRlRDb25maWcoLi4uJTJDJTIwdXNlX2xpZ2VyX2tlcm5lbCUzRFRydWUp",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig
training_args = SFTConfig(..., use_liger_kernel=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){$(n.$$.fragment)},l(t){g(n.$$.fragment,t)},m(t,m){u(n,t,m),i=!0},p:Q,i(t){i||(c(n.$$.fragment,t),i=!0)},o(t){d(n.$$.fragment,t),i=!1},d(t){h(n,t)}}}function Le(b){let n,i;return n=new I({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMERQT0NvbmZpZyUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBEUE9Db25maWcoLi4uJTJDJTIwdXNlX2xpZ2VyX2tlcm5lbCUzRFRydWUp",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> DPOConfig
training_args = DPOConfig(..., use_liger_kernel=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){$(n.$$.fragment)},l(t){g(n.$$.fragment,t)},m(t,m){u(n,t,m),i=!0},p:Q,i(t){i||(c(n.$$.fragment,t),i=!0)},o(t){d(n.$$.fragment,t),i=!1},d(t){h(n,t)}}}function ve(b){let n,i;return n=new I({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMEdSUE9Db25maWclMEElMEF0cmFpbmluZ19hcmdzJTIwJTNEJTIwR1JQT0NvbmZpZyguLi4lMkMlMjB1c2VfbGlnZXJfa2VybmVsJTNEVHJ1ZSk=",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> GRPOConfig
training_args = GRPOConfig(..., use_liger_kernel=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){$(n.$$.fragment)},l(t){g(n.$$.fragment,t)},m(t,m){u(n,t,m),i=!0},p:Q,i(t){i||(c(n.$$.fragment,t),i=!0)},o(t){d(n.$$.fragment,t),i=!1},d(t){h(n,t)}}}function Ue(b){let n,i;return n=new I({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMEtUT0NvbmZpZyUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBLVE9Db25maWcoLi4uJTJDJTIwdXNlX2xpZ2VyX2tlcm5lbCUzRFRydWUp",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> KTOConfig
training_args = KTOConfig(..., use_liger_kernel=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){$(n.$$.fragment)},l(t){g(n.$$.fragment,t)},m(t,m){u(n,t,m),i=!0},p:Q,i(t){i||(c(n.$$.fragment,t),i=!0)},o(t){d(n.$$.fragment,t),i=!1},d(t){h(n,t)}}}function Je(b){let n,i;return n=new I({props:{code:"ZnJvbSUyMHRybC5leHBlcmltZW50YWwuZ2tkJTIwaW1wb3J0JTIwR0tEQ29uZmlnJTBBJTBBdHJhaW5pbmdfYXJncyUyMCUzRCUyMEdLRENvbmZpZyguLi4lMkMlMjB1c2VfbGlnZXJfa2VybmVsJTNEVHJ1ZSk=",highlighted:`<span class="hljs-keyword">from</span> trl.experimental.gkd <span class="hljs-keyword">import</span> GKDConfig
training_args = GKDConfig(..., use_liger_kernel=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){$(n.$$.fragment)},l(t){g(n.$$.fragment,t)},m(t,m){u(n,t,m),i=!0},p:Q,i(t){i||(c(n.$$.fragment,t),i=!0)},o(t){d(n.$$.fragment,t),i=!1},d(t){h(n,t)}}}function Re(b){let n,i,t,m,y,C,w,L,_,R;return n=new B({props:{id:"liger",option:"SFT",$$slots:{default:[Ce]},$$scope:{ctx:b}}}),t=new B({props:{id:"liger",option:"DPO",$$slots:{default:[Le]},$$scope:{ctx:b}}}),y=new B({props:{id:"liger",option:"GRPO",$$slots:{default:[ve]},$$scope:{ctx:b}}}),w=new B({props:{id:"liger",option:"KTO",$$slots:{default:[Ue]},$$scope:{ctx:b}}}),_=new B({props:{id:"liger",option:"GKD",$$slots:{default:[Je]},$$scope:{ctx:b}}}),{c(){$(n.$$.fragment),i=p(),$(t.$$.fragment),m=p(),$(y.$$.fragment),C=p(),$(w.$$.fragment),L=p(),$(_.$$.fragment)},l(l){g(n.$$.fragment,l),i=f(l),g(t.$$.fragment,l),m=f(l),g(y.$$.fragment,l),C=f(l),g(w.$$.fragment,l),L=f(l),g(_.$$.fragment,l)},m(l,o){u(n,l,o),a(l,i,o),u(t,l,o),a(l,m,o),u(y,l,o),a(l,C,o),u(w,l,o),a(l,L,o),u(_,l,o),R=!0},p(l,o){const N={};o&2&&(N.$$scope={dirty:o,ctx:l}),n.$set(N);const S={};o&2&&(S.$$scope={dirty:o,ctx:l}),t.$set(S);const k={};o&2&&(k.$$scope={dirty:o,ctx:l}),y.$set(k);const V={};o&2&&(V.$$scope={dirty:o,ctx:l}),w.$set(V);const Z={};o&2&&(Z.$$scope={dirty:o,ctx:l}),_.$set(Z)},i(l){R||(c(n.$$.fragment,l),c(t.$$.fragment,l),c(y.$$.fragment,l),c(w.$$.fragment,l),c(_.$$.fragment,l),R=!0)},o(l){d(n.$$.fragment,l),d(t.$$.fragment,l),d(y.$$.fragment,l),d(w.$$.fragment,l),d(_.$$.fragment,l),R=!1},d(l){l&&(s(i),s(m),s(C),s(L)),h(n,l),h(t,l),h(y,l),h(w,l),h(_,l)}}}function Se(b){let n,i,t,m,y,C,w,L,_,R='<a href="https://github.com/linkedin/Liger-Kernel" rel="nofollow">Liger Kernel</a> is a collection of Triton kernels designed specifically for LLM training. It can effectively increase multi-GPU training throughput by 20% and reduce memory usage by 60%. That way, we can <strong>4x</strong> our context length, as described in the benchmark below. They have implemented Hugging Face compatible <code>RMSNorm</code>, <code>RoPE</code>, <code>SwiGLU</code>, <code>CrossEntropy</code>, <code>FusedLinearCrossEntropy</code>, with more to come. The kernel works out of the box with <a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">FlashAttention</a>, <a href="https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html" rel="nofollow">PyTorch FSDP</a>, and <a href="https://github.com/microsoft/DeepSpeed" rel="nofollow">Microsoft DeepSpeed</a>.',l,o,N="With this memory reduction, you can potentially turn off <code>cpu_offloading</code> or gradient checkpointing to further boost the performance.",S,k,V='<thead><tr><th>Speed Up</th> <th>Memory Reduction</th></tr></thead> <tbody><tr><td><img src="https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/e2e-tps.png" alt="Speed up"/></td> <td><img src="https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/e2e-memory.png" alt="Memory"/></td></tr></tbody>',Z,E,A,G,me="Liger Kernel is supported in the following TRL trainers:",Y,P,$e="<li><strong>SFT</strong> (Supervised Fine-Tuning)</li> <li><strong>DPO</strong> (Direct Preference Optimization)</li> <li><strong>GRPO</strong> (Group Relative Policy Optimization)</li> <li><strong>KTO</strong> (Kahneman-Tversky Optimization)</li> <li><strong>GKD</strong> (Generalized Knowledge Distillation)</li>",q,H,ee,v,x,K,ge="First, install Liger Kernel:",ae,j,pe,W,ue="<p>Once installed, set <code>use_liger_kernel=True</code> in your trainer config. No other changes are needed!</p>",te,U,ne,D,ce='To learn more about Liger-Kernel, visit their <a href="https://github.com/linkedin/Liger-Kernel/" rel="nofollow">official repository</a>.',le,O,re,z,ie;return y=new Te({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new fe({props:{title:"Liger Kernel Integration",local:"liger-kernel-integration",headingTag:"h1"}}),E=new fe({props:{title:"Supported Trainers",local:"supported-trainers",headingTag:"h2"}}),H=new fe({props:{title:"Usage",local:"usage",headingTag:"h2"}}),j=new I({props:{code:"cGlwJTIwaW5zdGFsbCUyMGxpZ2VyLWtlcm5lbA==",highlighted:"pip install liger-kernel",wrap:!1}}),U=new ke({props:{id:"liger",options:["SFT","DPO","GRPO","KTO","GKD"],$$slots:{default:[Re]},$$scope:{ctx:b}}}),O=new Me({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/liger_kernel_integration.md"}}),{c(){n=T("meta"),i=p(),t=T("p"),m=p(),$(y.$$.fragment),C=p(),$(w.$$.fragment),L=p(),_=T("p"),_.innerHTML=R,l=p(),o=T("p"),o.innerHTML=N,S=p(),k=T("table"),k.innerHTML=V,Z=p(),$(E.$$.fragment),A=p(),G=T("p"),G.textContent=me,Y=p(),P=T("ul"),P.innerHTML=$e,q=p(),$(H.$$.fragment),ee=p(),v=T("ol"),x=T("li"),K=T("p"),K.textContent=ge,ae=p(),$(j.$$.fragment),pe=p(),W=T("li"),W.innerHTML=ue,te=p(),$(U.$$.fragment),ne=p(),D=T("p"),D.innerHTML=ce,le=p(),$(O.$$.fragment),re=p(),z=T("p"),this.h()},l(e){const r=be("svelte-u9bgzb",document.head);n=M(r,"META",{name:!0,content:!0}),r.forEach(s),i=f(e),t=M(e,"P",{}),oe(t).forEach(s),m=f(e),g(y.$$.fragment,e),C=f(e),g(w.$$.fragment,e),L=f(e),_=M(e,"P",{"data-svelte-h":!0}),J(_)!=="svelte-1xp5liz"&&(_.innerHTML=R),l=f(e),o=M(e,"P",{"data-svelte-h":!0}),J(o)!=="svelte-m20qmf"&&(o.innerHTML=N),S=f(e),k=M(e,"TABLE",{"data-svelte-h":!0}),J(k)!=="svelte-1jpb79"&&(k.innerHTML=V),Z=f(e),g(E.$$.fragment,e),A=f(e),G=M(e,"P",{"data-svelte-h":!0}),J(G)!=="svelte-1cfm1a1"&&(G.textContent=me),Y=f(e),P=M(e,"UL",{"data-svelte-h":!0}),J(P)!=="svelte-1plmcr1"&&(P.innerHTML=$e),q=f(e),g(H.$$.fragment,e),ee=f(e),v=M(e,"OL",{});var F=oe(v);x=M(F,"LI",{});var se=oe(x);K=M(se,"P",{"data-svelte-h":!0}),J(K)!=="svelte-5sqvp9"&&(K.textContent=ge),ae=f(se),g(j.$$.fragment,se),se.forEach(s),pe=f(F),W=M(F,"LI",{"data-svelte-h":!0}),J(W)!=="svelte-1vdnfpm"&&(W.innerHTML=ue),F.forEach(s),te=f(e),g(U.$$.fragment,e),ne=f(e),D=M(e,"P",{"data-svelte-h":!0}),J(D)!=="svelte-1h5mxii"&&(D.innerHTML=ce),le=f(e),g(O.$$.fragment,e),re=f(e),z=M(e,"P",{}),oe(z).forEach(s),this.h()},h(){de(n,"name","hf:doc:metadata"),de(n,"content",Ze)},m(e,r){X(document.head,n),a(e,i,r),a(e,t,r),a(e,m,r),u(y,e,r),a(e,C,r),u(w,e,r),a(e,L,r),a(e,_,r),a(e,l,r),a(e,o,r),a(e,S,r),a(e,k,r),a(e,Z,r),u(E,e,r),a(e,A,r),a(e,G,r),a(e,Y,r),a(e,P,r),a(e,q,r),u(H,e,r),a(e,ee,r),a(e,v,r),X(v,x),X(x,K),X(x,ae),u(j,x,null),X(v,pe),X(v,W),a(e,te,r),u(U,e,r),a(e,ne,r),a(e,D,r),a(e,le,r),u(O,e,r),a(e,re,r),a(e,z,r),ie=!0},p(e,[r]){const F={};r&2&&(F.$$scope={dirty:r,ctx:e}),U.$set(F)},i(e){ie||(c(y.$$.fragment,e),c(w.$$.fragment,e),c(E.$$.fragment,e),c(H.$$.fragment,e),c(j.$$.fragment,e),c(U.$$.fragment,e),c(O.$$.fragment,e),ie=!0)},o(e){d(y.$$.fragment,e),d(w.$$.fragment,e),d(E.$$.fragment,e),d(H.$$.fragment,e),d(j.$$.fragment,e),d(U.$$.fragment,e),d(O.$$.fragment,e),ie=!1},d(e){e&&(s(i),s(t),s(m),s(C),s(L),s(_),s(l),s(o),s(S),s(k),s(Z),s(A),s(G),s(Y),s(P),s(q),s(ee),s(v),s(te),s(ne),s(D),s(le),s(re),s(z)),s(n),h(y,e),h(w,e),h(E,e),h(H,e),h(j),h(U,e),h(O,e)}}}const Ze='{"title":"Liger Kernel Integration","local":"liger-kernel-integration","sections":[{"title":"Supported Trainers","local":"supported-trainers","sections":[],"depth":2},{"title":"Usage","local":"usage","sections":[],"depth":2}],"depth":1}';function Ee(b){return ye(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class De extends we{constructor(n){super(),_e(this,n,Ee,Se,he,{})}}export{De as component};

Xet Storage Details

Size:
10.5 kB
·
Xet hash:
38b6b61ef516a2ef1548b798fe89b129f72fa8a6383a270ab030527abcac0159

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.