Buckets:
| import{s as je,o as Ue,n as X}from"../chunks/scheduler.7b731bd4.js";import{S as _e,i as Ze,e as W,s as w,c as g,h as We,a as C,d as r,b as u,f as be,g as J,j as I,k as ye,l as Ce,m as f,n as h,t as d,o as M,p as T}from"../chunks/index.cc268345.js";import{C as Ie,H as Q,E as Be}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{C as Z}from"../chunks/CodeBlock.169a125f.js";import{H as Mt,a as B}from"../chunks/HfOption.9f04abd1.js";function Xe(_){let s,m;return s=new Z({props:{code:"dHJsJTIwc2Z0JTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjBzdGFuZm9yZG5scCUyRmltZGI=",highlighted:`trl sft \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name stanfordnlp/imdb`,wrap:!1}}),{c(){g(s.$$.fragment)},l(l){J(s.$$.fragment,l)},m(l,b){h(s,l,b),m=!0},p:X,i(l){m||(d(s.$$.fragment,l),m=!0)},o(l){M(s.$$.fragment,l),m=!1},d(l){T(s,l)}}}function Fe(_){let s,m;return s=new Z({props:{code:"dHJsJTIwZHBvJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjBhbnRocm9waWMlMkZoaC1ybGhm",highlighted:`trl dpo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name anthropic/hh-rlhf`,wrap:!1}}),{c(){g(s.$$.fragment)},l(l){J(s.$$.fragment,l)},m(l,b){h(s,l,b),m=!0},p:X,i(l){m||(d(s.$$.fragment,l),m=!0)},o(l){M(s.$$.fragment,l),m=!1},d(l){T(s,l)}}}function Re(_){let s,m;return s=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGdWx0cmFmZWVkYmFja19iaW5hcml6ZWQ=",highlighted:`trl reward \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name trl-lib/ultrafeedback_binarized`,wrap:!1}}),{c(){g(s.$$.fragment)},l(l){J(s.$$.fragment,l)},m(l,b){h(s,l,b),m=!0},p:X,i(l){m||(d(s.$$.fragment,l),m=!0)},o(l){M(s.$$.fragment,l),m=!1},d(l){T(s,l)}}}function ve(_){let s,m;return s=new Z({props:{code:"dHJsJTIwZ3JwbyUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMjAlNUMlMEElMjAlMjAtLXJld2FyZF9mdW5jcyUyMGFjY3VyYWN5X3Jld2FyZA==",highlighted:`trl grpo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name HuggingFaceH4/Polaris-Dataset-53K \\ | |
| --reward_funcs accuracy_reward`,wrap:!1}}),{c(){g(s.$$.fragment)},l(l){J(s.$$.fragment,l)},m(l,b){h(s,l,b),m=!0},p:X,i(l){m||(d(s.$$.fragment,l),m=!0)},o(l){M(s.$$.fragment,l),m=!1},d(l){T(s,l)}}}function Ye(_){let s,m;return s=new Z({props:{code:"dHJsJTIwcmxvbyUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMjAlNUMlMEElMjAlMjAtLXJld2FyZF9mdW5jcyUyMGFjY3VyYWN5X3Jld2FyZA==",highlighted:`trl rloo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name HuggingFaceH4/Polaris-Dataset-53K \\ | |
| --reward_funcs accuracy_reward`,wrap:!1}}),{c(){g(s.$$.fragment)},l(l){J(s.$$.fragment,l)},m(l,b){h(s,l,b),m=!0},p:X,i(l){m||(d(s.$$.fragment,l),m=!0)},o(l){M(s.$$.fragment,l),m=!1},d(l){T(s,l)}}}function Le(_){let s,m;return s=new Z({props:{code:"dHJsJTIwa3RvJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGa3RvLW1peC0xNGs=",highlighted:`trl kto \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name trl-lib/kto-mix-14k`,wrap:!1}}),{c(){g(s.$$.fragment)},l(l){J(s.$$.fragment,l)},m(l,b){h(s,l,b),m=!0},p:X,i(l){m||(d(s.$$.fragment,l),m=!0)},o(l){M(s.$$.fragment,l),m=!1},d(l){T(s,l)}}}function Ge(_){let s,m,l,b,i,p,$,e,o,U,c,j;return s=new B({props:{id:"trainer",option:"SFT",$$slots:{default:[Xe]},$$scope:{ctx:_}}}),l=new B({props:{id:"trainer",option:"DPO",$$slots:{default:[Fe]},$$scope:{ctx:_}}}),i=new B({props:{id:"trainer",option:"Reward",$$slots:{default:[Re]},$$scope:{ctx:_}}}),$=new B({props:{id:"trainer",option:"GRPO",$$slots:{default:[ve]},$$scope:{ctx:_}}}),o=new B({props:{id:"trainer",option:"RLOO",$$slots:{default:[Ye]},$$scope:{ctx:_}}}),c=new B({props:{id:"trainer",option:"KTO",$$slots:{default:[Le]},$$scope:{ctx:_}}}),{c(){g(s.$$.fragment),m=w(),g(l.$$.fragment),b=w(),g(i.$$.fragment),p=w(),g($.$$.fragment),e=w(),g(o.$$.fragment),U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),J(l.$$.fragment,t),b=u(t),J(i.$$.fragment,t),p=u(t),J($.$$.fragment,t),e=u(t),J(o.$$.fragment,t),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),h(l,t,n),f(t,b,n),h(i,t,n),f(t,p,n),h($,t,n),f(t,e,n),h(o,t,n),f(t,U,n),h(c,t,n),j=!0},p(t,n){const L={};n&2&&(L.$$scope={dirty:n,ctx:t}),s.$set(L);const v={};n&2&&(v.$$scope={dirty:n,ctx:t}),l.$set(v);const F={};n&2&&(F.$$scope={dirty:n,ctx:t}),i.$set(F);const Y={};n&2&&(Y.$$scope={dirty:n,ctx:t}),$.$set(Y);const R={};n&2&&(R.$$scope={dirty:n,ctx:t}),o.$set(R);const G={};n&2&&(G.$$scope={dirty:n,ctx:t}),c.$set(G)},i(t){j||(d(s.$$.fragment,t),d(l.$$.fragment,t),d(i.$$.fragment,t),d($.$$.fragment,t),d(o.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(l.$$.fragment,t),M(i.$$.fragment,t),M($.$$.fragment,t),M(o.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(b),r(p),r(e),r(U)),T(s,t),T(l,t),T(i,t),T($,t),T(o,t),T(c,t)}}}function Qe(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwc2Z0X2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHN0YW5mb3JkbmxwJTJGaW1kYg==",highlighted:`<span class="hljs-comment"># sft_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">stanfordnlp/imdb</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwc2Z0JTIwLS1jb25maWclMjBzZnRfY29uZmlnLnlhbWw=",highlighted:"trl sft --config sft_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function Ve(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwZHBvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMGFudGhyb3BpYyUyRmhoLXJsaGY=",highlighted:`<span class="hljs-comment"># dpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">anthropic/hh-rlhf</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwZHBvJTIwLS1jb25maWclMjBkcG9fY29uZmlnLnlhbWw=",highlighted:"trl dpo --config dpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function Se(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwcmV3YXJkX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHRybC1saWIlMkZ1bHRyYWZlZWRiYWNrX2JpbmFyaXplZA==",highlighted:`<span class="hljs-comment"># reward_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">trl-lib/ultrafeedback_binarized</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwLS1jb25maWclMjByZXdhcmRfY29uZmlnLnlhbWw=",highlighted:"trl reward --config reward_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function ke(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwZ3Jwb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0X25hbWUlM0ElMjBIdWdnaW5nRmFjZUg0JTJGUG9sYXJpcy1EYXRhc2V0LTUzSyUwQXJld2FyZF9mdW5jcyUzQSUwQSUyMCUyMC0lMjBhY2N1cmFjeV9yZXdhcmQ=",highlighted:`<span class="hljs-comment"># grpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwZ3JwbyUyMC0tY29uZmlnJTIwZ3Jwb19jb25maWcueWFtbA==",highlighted:"trl grpo --config grpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function xe(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwcmxvb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0X25hbWUlM0ElMjBIdWdnaW5nRmFjZUg0JTJGUG9sYXJpcy1EYXRhc2V0LTUzSyUwQXJld2FyZF9mdW5jcyUzQSUwQSUyMCUyMC0lMjBhY2N1cmFjeV9yZXdhcmQ=",highlighted:`<span class="hljs-comment"># rloo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwcmxvbyUyMC0tY29uZmlnJTIwcmxvb19jb25maWcueWFtbA==",highlighted:"trl rloo --config rloo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function Ne(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwa3RvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHRybC1saWIlMkZrdG8tbWl4LTE0aw==",highlighted:`<span class="hljs-comment"># kto_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">trl-lib/kto-mix-14k</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwa3RvJTIwLS1jb25maWclMjBrdG9fY29uZmlnLnlhbWw=",highlighted:"trl kto --config kto_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function ze(_){let s,m,l,b,i,p,$,e,o,U,c,j;return s=new B({props:{id:"trainer",option:"SFT",$$slots:{default:[Qe]},$$scope:{ctx:_}}}),l=new B({props:{id:"trainer",option:"DPO",$$slots:{default:[Ve]},$$scope:{ctx:_}}}),i=new B({props:{id:"trainer",option:"Reward",$$slots:{default:[Se]},$$scope:{ctx:_}}}),$=new B({props:{id:"trainer",option:"GRPO",$$slots:{default:[ke]},$$scope:{ctx:_}}}),o=new B({props:{id:"trainer",option:"RLOO",$$slots:{default:[xe]},$$scope:{ctx:_}}}),c=new B({props:{id:"trainer",option:"KTO",$$slots:{default:[Ne]},$$scope:{ctx:_}}}),{c(){g(s.$$.fragment),m=w(),g(l.$$.fragment),b=w(),g(i.$$.fragment),p=w(),g($.$$.fragment),e=w(),g(o.$$.fragment),U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),J(l.$$.fragment,t),b=u(t),J(i.$$.fragment,t),p=u(t),J($.$$.fragment,t),e=u(t),J(o.$$.fragment,t),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),h(l,t,n),f(t,b,n),h(i,t,n),f(t,p,n),h($,t,n),f(t,e,n),h(o,t,n),f(t,U,n),h(c,t,n),j=!0},p(t,n){const L={};n&2&&(L.$$scope={dirty:n,ctx:t}),s.$set(L);const v={};n&2&&(v.$$scope={dirty:n,ctx:t}),l.$set(v);const F={};n&2&&(F.$$scope={dirty:n,ctx:t}),i.$set(F);const Y={};n&2&&(Y.$$scope={dirty:n,ctx:t}),$.$set(Y);const R={};n&2&&(R.$$scope={dirty:n,ctx:t}),o.$set(R);const G={};n&2&&(G.$$scope={dirty:n,ctx:t}),c.$set(G)},i(t){j||(d(s.$$.fragment,t),d(l.$$.fragment,t),d(i.$$.fragment,t),d($.$$.fragment,t),d(o.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(l.$$.fragment,t),M(i.$$.fragment,t),M($.$$.fragment,t),M(o.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(b),r(p),r(e),r(U)),T(s,t),T(l,t),T(i,t),T($,t),T(o,t),T(c,t)}}}function He(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwc2Z0JTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjBzdGFuZm9yZG5scCUyRmltZGIlMjAlNUMlMEElMjAlMjAtLW51bV9wcm9jZXNzZXMlMjA0",highlighted:`trl sft \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name stanfordnlp/imdb \\ | |
| --num_processes 4`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwc2Z0X2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHN0YW5mb3JkbmxwJTJGaW1kYiUwQW51bV9wcm9jZXNzZXMlM0ElMjA0",highlighted:`<span class="hljs-comment"># sft_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">stanfordnlp/imdb</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwc2Z0JTIwLS1jb25maWclMjBzZnRfY29uZmlnLnlhbWw=",highlighted:"trl sft --config sft_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function Ee(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwZHBvJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjBhbnRocm9waWMlMkZoaC1ybGhmJTIwJTVDJTBBJTIwJTIwLS1udW1fcHJvY2Vzc2VzJTIwNA==",highlighted:`trl dpo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name anthropic/hh-rlhf \\ | |
| --num_processes 4`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwZHBvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMGFudGhyb3BpYyUyRmhoLXJsaGYlMEFudW1fcHJvY2Vzc2VzJTNBJTIwNA==",highlighted:`<span class="hljs-comment"># dpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">anthropic/hh-rlhf</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwZHBvJTIwLS1jb25maWclMjBkcG9fY29uZmlnLnlhbWw=",highlighted:"trl dpo --config dpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function Ae(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGdWx0cmFmZWVkYmFja19iaW5hcml6ZWQlMjAlNUMlMEElMjAlMjAtLW51bV9wcm9jZXNzZXMlMjA0",highlighted:`trl reward \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name trl-lib/ultrafeedback_binarized \\ | |
| --num_processes 4`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwcmV3YXJkX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHRybC1saWIlMkZ1bHRyYWZlZWRiYWNrX2JpbmFyaXplZCUwQW51bV9wcm9jZXNzZXMlM0ElMjA0",highlighted:`<span class="hljs-comment"># reward_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">trl-lib/ultrafeedback_binarized</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwLS1jb25maWclMjByZXdhcmRfY29uZmlnLnlhbWw=",highlighted:"trl reward --config reward_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function Pe(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwZ3JwbyUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMjAlNUMlMEElMjAlMjAtLXJld2FyZF9mdW5jcyUyMGFjY3VyYWN5X3Jld2FyZCUyMCU1QyUwQSUyMCUyMC0tbnVtX3Byb2Nlc3NlcyUyMDQ=",highlighted:`trl grpo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name HuggingFaceH4/Polaris-Dataset-53K \\ | |
| --reward_funcs accuracy_reward \\ | |
| --num_processes 4`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwZ3Jwb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0X25hbWUlM0ElMjBIdWdnaW5nRmFjZUg0JTJGUG9sYXJpcy1EYXRhc2V0LTUzSyUwQXJld2FyZF9mdW5jcyUzQSUwQSUyMCUyMC0lMjBhY2N1cmFjeV9yZXdhcmQlMEFudW1fcHJvY2Vzc2VzJTNBJTIwNA==",highlighted:`<span class="hljs-comment"># grpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwZ3JwbyUyMC0tY29uZmlnJTIwZ3Jwb19jb25maWcueWFtbA==",highlighted:"trl grpo --config grpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function De(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwcmxvbyUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMjAlNUMlMEElMjAlMjAtLXJld2FyZF9mdW5jcyUyMGFjY3VyYWN5X3Jld2FyZCUyMCU1QyUwQSUyMCUyMC0tbnVtX3Byb2Nlc3NlcyUyMDQ=",highlighted:`trl rloo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name HuggingFaceH4/Polaris-Dataset-53K \\ | |
| --reward_funcs accuracy_reward \\ | |
| --num_processes 4`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwcmxvb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0X25hbWUlM0ElMjBIdWdnaW5nRmFjZUg0JTJGUG9sYXJpcy1EYXRhc2V0LTUzSyUwQXJld2FyZF9mdW5jcyUzQSUwQSUyMCUyMC0lMjBhY2N1cmFjeV9yZXdhcmQlMEFudW1fcHJvY2Vzc2VzJTNBJTIwNA==",highlighted:`<span class="hljs-comment"># rloo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwcmxvbyUyMC0tY29uZmlnJTIwcmxvb19jb25maWcueWFtbA==",highlighted:"trl rloo --config rloo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function Oe(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwa3RvJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGa3RvLW1peC0xNGslMjAlNUMlMEElMjAlMjAtLW51bV9wcm9jZXNzZXMlMjA0",highlighted:`trl kto \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name trl-lib/kto-mix-14k \\ | |
| --num_processes 4`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwa3RvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHRybC1saWIlMkZrdG8tbWl4LTE0ayUwQW51bV9wcm9jZXNzZXMlM0ElMjA0",highlighted:`<span class="hljs-comment"># kto_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">trl-lib/kto-mix-14k</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwa3RvJTIwLS1jb25maWclMjBrdG9fY29uZmlnLnlhbWw=",highlighted:"trl kto --config kto_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function qe(_){let s,m,l,b,i,p,$,e,o,U,c,j;return s=new B({props:{id:"trainer",option:"SFT",$$slots:{default:[He]},$$scope:{ctx:_}}}),l=new B({props:{id:"trainer",option:"DPO",$$slots:{default:[Ee]},$$scope:{ctx:_}}}),i=new B({props:{id:"trainer",option:"Reward",$$slots:{default:[Ae]},$$scope:{ctx:_}}}),$=new B({props:{id:"trainer",option:"GRPO",$$slots:{default:[Pe]},$$scope:{ctx:_}}}),o=new B({props:{id:"trainer",option:"RLOO",$$slots:{default:[De]},$$scope:{ctx:_}}}),c=new B({props:{id:"trainer",option:"KTO",$$slots:{default:[Oe]},$$scope:{ctx:_}}}),{c(){g(s.$$.fragment),m=w(),g(l.$$.fragment),b=w(),g(i.$$.fragment),p=w(),g($.$$.fragment),e=w(),g(o.$$.fragment),U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),J(l.$$.fragment,t),b=u(t),J(i.$$.fragment,t),p=u(t),J($.$$.fragment,t),e=u(t),J(o.$$.fragment,t),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),h(l,t,n),f(t,b,n),h(i,t,n),f(t,p,n),h($,t,n),f(t,e,n),h(o,t,n),f(t,U,n),h(c,t,n),j=!0},p(t,n){const L={};n&2&&(L.$$scope={dirty:n,ctx:t}),s.$set(L);const v={};n&2&&(v.$$scope={dirty:n,ctx:t}),l.$set(v);const F={};n&2&&(F.$$scope={dirty:n,ctx:t}),i.$set(F);const Y={};n&2&&(Y.$$scope={dirty:n,ctx:t}),$.$set(Y);const R={};n&2&&(R.$$scope={dirty:n,ctx:t}),o.$set(R);const G={};n&2&&(G.$$scope={dirty:n,ctx:t}),c.$set(G)},i(t){j||(d(s.$$.fragment,t),d(l.$$.fragment,t),d(i.$$.fragment,t),d($.$$.fragment,t),d(o.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(l.$$.fragment,t),M(i.$$.fragment,t),M($.$$.fragment,t),M(o.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(b),r(p),r(e),r(U)),T(s,t),T(l,t),T(i,t),T($,t),T(o,t),T(c,t)}}}function Ke(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwc2Z0JTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjBzdGFuZm9yZG5scCUyRmltZGIlMjAlNUMlMEElMjAlMjAtLWFjY2VsZXJhdGVfY29uZmlnJTIwemVybzIlMjAlMjAlMjMlMjBvciUyMHBhdGglMkZ0byUyRm15JTJGYWNjZWxlcmF0ZSUyRmNvbmZpZy55YW1s",highlighted:`trl sft \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name stanfordnlp/imdb \\ | |
| --accelerate_config zero2 <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwc2Z0X2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHN0YW5mb3JkbmxwJTJGaW1kYiUwQWFjY2VsZXJhdGVfY29uZmlnJTNBJTIwemVybzIlMjAlMjAlMjMlMjBvciUyMHBhdGglMkZ0byUyRm15JTJGYWNjZWxlcmF0ZSUyRmNvbmZpZy55YW1s",highlighted:`<span class="hljs-comment"># sft_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">stanfordnlp/imdb</span> | |
| <span class="hljs-attr">accelerate_config:</span> <span class="hljs-string">zero2</span> <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwc2Z0JTIwLS1jb25maWclMjBzZnRfY29uZmlnLnlhbWw=",highlighted:"trl sft --config sft_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function tl(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwZHBvJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjBhbnRocm9waWMlMkZoaC1ybGhmJTIwJTVDJTBBJTIwJTIwLS1hY2NlbGVyYXRlX2NvbmZpZyUyMHplcm8yJTIwJTIwJTIzJTIwb3IlMjBwYXRoJTJGdG8lMkZteSUyRmFjY2VsZXJhdGUlMkZjb25maWcueWFtbA==",highlighted:`trl dpo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name anthropic/hh-rlhf \\ | |
| --accelerate_config zero2 <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwZHBvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMGFudGhyb3BpYyUyRmhoLXJsaGYlMEFhY2NlbGVyYXRlX2NvbmZpZyUzQSUyMHplcm8yJTIwJTIwJTIzJTIwb3IlMjBwYXRoJTJGdG8lMkZteSUyRmFjY2VsZXJhdGUlMkZjb25maWcueWFtbA==",highlighted:`<span class="hljs-comment"># dpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">anthropic/hh-rlhf</span> | |
| <span class="hljs-attr">accelerate_config:</span> <span class="hljs-string">zero2</span> <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwZHBvJTIwLS1jb25maWclMjBkcG9fY29uZmlnLnlhbWw=",highlighted:"trl dpo --config dpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function el(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGdWx0cmFmZWVkYmFja19iaW5hcml6ZWQlMjAlNUMlMEElMjAlMjAtLWFjY2VsZXJhdGVfY29uZmlnJTIwemVybzIlMjAlMjAlMjMlMjBvciUyMHBhdGglMkZ0byUyRm15JTJGYWNjZWxlcmF0ZSUyRmNvbmZpZy55YW1s",highlighted:`trl reward \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name trl-lib/ultrafeedback_binarized \\ | |
| --accelerate_config zero2 <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwcmV3YXJkX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHRybC1saWIlMkZ1bHRyYWZlZWRiYWNrX2JpbmFyaXplZCUwQWFjY2VsZXJhdGVfY29uZmlnJTNBJTIwemVybzIlMjAlMjAlMjMlMjBvciUyMHBhdGglMkZ0byUyRm15JTJGYWNjZWxlcmF0ZSUyRmNvbmZpZy55YW1s",highlighted:`<span class="hljs-comment"># reward_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">trl-lib/ultrafeedback_binarized</span> | |
| <span class="hljs-attr">accelerate_config:</span> <span class="hljs-string">zero2</span> <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwLS1jb25maWclMjByZXdhcmRfY29uZmlnLnlhbWw=",highlighted:"trl reward --config reward_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function ll(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwZ3JwbyUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMjAlNUMlMEElMjAlMjAtLXJld2FyZF9mdW5jcyUyMGFjY3VyYWN5X3Jld2FyZCUyMCU1QyUwQSUyMCUyMC0tYWNjZWxlcmF0ZV9jb25maWclMjB6ZXJvMiUyMCUyMCUyMyUyMG9yJTIwcGF0aCUyRnRvJTJGbXklMkZhY2NlbGVyYXRlJTJGY29uZmlnLnlhbWw=",highlighted:`trl grpo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name HuggingFaceH4/Polaris-Dataset-53K \\ | |
| --reward_funcs accuracy_reward \\ | |
| --accelerate_config zero2 <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwZ3Jwb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0X25hbWUlM0ElMjBIdWdnaW5nRmFjZUg0JTJGUG9sYXJpcy1EYXRhc2V0LTUzSyUwQXJld2FyZF9mdW5jcyUzQSUwQSUyMCUyMC0lMjBhY2N1cmFjeV9yZXdhcmQlMEFhY2NlbGVyYXRlX2NvbmZpZyUzQSUyMHplcm8yJTIwJTIwJTIzJTIwb3IlMjBwYXRoJTJGdG8lMkZteSUyRmFjY2VsZXJhdGUlMkZjb25maWcueWFtbA==",highlighted:`<span class="hljs-comment"># grpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span> | |
| <span class="hljs-attr">accelerate_config:</span> <span class="hljs-string">zero2</span> <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwZ3JwbyUyMC0tY29uZmlnJTIwZ3Jwb19jb25maWcueWFtbA==",highlighted:"trl grpo --config grpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function sl(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwcmxvbyUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMjAlNUMlMEElMjAlMjAtLXJld2FyZF9mdW5jcyUyMGFjY3VyYWN5X3Jld2FyZCUyMCU1QyUwQSUyMCUyMC0tYWNjZWxlcmF0ZV9jb25maWclMjB6ZXJvMiUyMCUyMCUyMyUyMG9yJTIwcGF0aCUyRnRvJTJGbXklMkZhY2NlbGVyYXRlJTJGY29uZmlnLnlhbWw=",highlighted:`trl rloo \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name HuggingFaceH4/Polaris-Dataset-53K \\ | |
| --reward_funcs accuracy_reward \\ | |
| --accelerate_config zero2 <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwcmxvb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0X25hbWUlM0ElMjBIdWdnaW5nRmFjZUg0JTJGUG9sYXJpcy1EYXRhc2V0LTUzSyUwQXJld2FyZF9mdW5jcyUzQSUwQSUyMCUyMC0lMjBhY2N1cmFjeV9yZXdhcmQlMEFhY2NlbGVyYXRlX2NvbmZpZyUzQSUyMHplcm8yJTIwJTIwJTIzJTIwb3IlMjBwYXRoJTJGdG8lMkZteSUyRmFjY2VsZXJhdGUlMkZjb25maWcueWFtbA==",highlighted:`<span class="hljs-comment"># rloo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span> | |
| <span class="hljs-attr">accelerate_config:</span> <span class="hljs-string">zero2</span> <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwcmxvbyUyMC0tY29uZmlnJTIwcmxvb19jb25maWcueWFtbA==",highlighted:"trl rloo --config rloo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function nl(_){let s,m,l,b="or, with a config file:",i,p,$,e,o="Launch with:",U,c,j;return s=new Z({props:{code:"dHJsJTIwa3RvJTIwJTVDJTBBJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjIuNS0wLjVCJTIwJTVDJTBBJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGa3RvLW1peC0xNGslMjAlNUMlMEElMjAlMjAtLWFjY2VsZXJhdGVfY29uZmlnJTIwemVybzIlMjAlMjAlMjMlMjBvciUyMHBhdGglMkZ0byUyRm15JTJGYWNjZWxlcmF0ZSUyRmNvbmZpZy55YW1s",highlighted:`trl kto \\ | |
| --model_name_or_path Qwen/Qwen2.5-0.5B \\ | |
| --dataset_name trl-lib/kto-mix-14k \\ | |
| --accelerate_config zero2 <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),p=new Z({props:{code:"JTIzJTIwa3RvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRfbmFtZSUzQSUyMHRybC1saWIlMkZrdG8tbWl4LTE0ayUwQWFjY2VsZXJhdGVfY29uZmlnJTNBJTIwemVybzIlMjAlMjAlMjMlMjBvciUyMHBhdGglMkZ0byUyRm15JTJGYWNjZWxlcmF0ZSUyRmNvbmZpZy55YW1s",highlighted:`<span class="hljs-comment"># kto_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">dataset_name:</span> <span class="hljs-string">trl-lib/kto-mix-14k</span> | |
| <span class="hljs-attr">accelerate_config:</span> <span class="hljs-string">zero2</span> <span class="hljs-comment"># or path/to/my/accelerate/config.yaml</span>`,wrap:!1}}),c=new Z({props:{code:"dHJsJTIwa3RvJTIwLS1jb25maWclMjBrdG9fY29uZmlnLnlhbWw=",highlighted:"trl kto --config kto_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment),$=w(),e=W("p"),e.textContent=o,U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),l=C(t,"P",{"data-svelte-h":!0}),I(l)!=="svelte-10bmfas"&&(l.textContent=b),i=u(t),J(p.$$.fragment,t),$=u(t),e=C(t,"P",{"data-svelte-h":!0}),I(e)!=="svelte-ge9qnz"&&(e.textContent=o),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),f(t,l,n),f(t,i,n),h(p,t,n),f(t,$,n),f(t,e,n),f(t,U,n),h(c,t,n),j=!0},p:X,i(t){j||(d(s.$$.fragment,t),d(p.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(p.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(l),r(i),r($),r(e),r(U)),T(s,t),T(p,t),T(c,t)}}}function al(_){let s,m,l,b,i,p,$,e,o,U,c,j;return s=new B({props:{id:"trainer",option:"SFT",$$slots:{default:[Ke]},$$scope:{ctx:_}}}),l=new B({props:{id:"trainer",option:"DPO",$$slots:{default:[tl]},$$scope:{ctx:_}}}),i=new B({props:{id:"trainer",option:"Reward",$$slots:{default:[el]},$$scope:{ctx:_}}}),$=new B({props:{id:"trainer",option:"GRPO",$$slots:{default:[ll]},$$scope:{ctx:_}}}),o=new B({props:{id:"trainer",option:"RLOO",$$slots:{default:[sl]},$$scope:{ctx:_}}}),c=new B({props:{id:"trainer",option:"KTO",$$slots:{default:[nl]},$$scope:{ctx:_}}}),{c(){g(s.$$.fragment),m=w(),g(l.$$.fragment),b=w(),g(i.$$.fragment),p=w(),g($.$$.fragment),e=w(),g(o.$$.fragment),U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),J(l.$$.fragment,t),b=u(t),J(i.$$.fragment,t),p=u(t),J($.$$.fragment,t),e=u(t),J(o.$$.fragment,t),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),h(l,t,n),f(t,b,n),h(i,t,n),f(t,p,n),h($,t,n),f(t,e,n),h(o,t,n),f(t,U,n),h(c,t,n),j=!0},p(t,n){const L={};n&2&&(L.$$scope={dirty:n,ctx:t}),s.$set(L);const v={};n&2&&(v.$$scope={dirty:n,ctx:t}),l.$set(v);const F={};n&2&&(F.$$scope={dirty:n,ctx:t}),i.$set(F);const Y={};n&2&&(Y.$$scope={dirty:n,ctx:t}),$.$set(Y);const R={};n&2&&(R.$$scope={dirty:n,ctx:t}),o.$set(R);const G={};n&2&&(G.$$scope={dirty:n,ctx:t}),c.$set(G)},i(t){j||(d(s.$$.fragment,t),d(l.$$.fragment,t),d(i.$$.fragment,t),d($.$$.fragment,t),d(o.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(l.$$.fragment,t),M(i.$$.fragment,t),M($.$$.fragment,t),M(o.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(b),r(p),r(e),r(U)),T(s,t),T(l,t),T(i,t),T($,t),T(o,t),T(c,t)}}}function pl(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwc2Z0X2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRzJTNBJTBBJTIwJTIwLSUyMHBhdGglM0ElMjBzdGFuZm9yZG5scCUyRmltZGIlMEElMjAlMjAtJTIwcGF0aCUzQSUyMHJvbmVuZWxkYW4lMkZUaW55U3Rvcmllcw==",highlighted:`<span class="hljs-comment"># sft_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">datasets:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">stanfordnlp/imdb</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">roneneldan/TinyStories</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwc2Z0JTIwLS1jb25maWclMjBzZnRfY29uZmlnLnlhbWw=",highlighted:"trl sft --config sft_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function rl(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwZHBvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRzJTNBJTBBJTIwJTIwLSUyMHBhdGglM0ElMjBCQUFJJTJGSW5maW5pdHktUHJlZmVyZW5jZSUwQSUyMCUyMC0lMjBwYXRoJTNBJTIwYXJnaWxsYSUyRkNhcHliYXJhLVByZWZlcmVuY2Vz",highlighted:`<span class="hljs-comment"># dpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">datasets:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">BAAI/Infinity-Preference</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">argilla/Capybara-Preferences</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwZHBvJTIwLS1jb25maWclMjBkcG9fY29uZmlnLnlhbWw=",highlighted:"trl dpo --config dpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function fl(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwcmV3YXJkX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRzJTNBJTBBJTIwJTIwLSUyMHBhdGglM0ElMjB0cmwtbGliJTJGdGxkci1wcmVmZXJlbmNlJTBBJTIwJTIwLSUyMHBhdGglM0ElMjB0cmwtbGliJTJGbG0taHVtYW4tcHJlZmVyZW5jZXMtc2VudGltZW50",highlighted:`<span class="hljs-comment"># reward_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">datasets:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">trl-lib/tldr-preference</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">trl-lib/lm-human-preferences-sentiment</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwcmV3YXJkJTIwLS1jb25maWclMjByZXdhcmRfY29uZmlnLnlhbWw=",highlighted:"trl reward --config reward_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function ml(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwZ3Jwb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0cyUzQSUwQSUyMCUyMC0lMjBwYXRoJTNBJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMEElMjAlMjAtJTIwcGF0aCUzQSUyMHRybC1saWIlMkZEZWVwTWF0aC0xMDNLJTBBcmV3YXJkX2Z1bmNzJTNBJTBBJTIwJTIwLSUyMGFjY3VyYWN5X3Jld2FyZA==",highlighted:`<span class="hljs-comment"># grpo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">datasets:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">trl-lib/DeepMath-103K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwZ3JwbyUyMC0tY29uZmlnJTIwZ3Jwb19jb25maWcueWFtbA==",highlighted:"trl grpo --config grpo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function il(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwcmxvb19jb25maWcueWFtbCUwQW1vZGVsX25hbWVfb3JfcGF0aCUzQSUyMFF3ZW4lMkZRd2VuMi41LTAuNUIlMEFkYXRhc2V0cyUzQSUwQSUyMCUyMC0lMjBwYXRoJTNBJTIwSHVnZ2luZ0ZhY2VINCUyRlBvbGFyaXMtRGF0YXNldC01M0slMEElMjAlMjAtJTIwcGF0aCUzQSUyMHRybC1saWIlMkZEZWVwTWF0aC0xMDNLJTBBcmV3YXJkX2Z1bmNzJTNBJTBBJTIwJTIwLSUyMGFjY3VyYWN5X3Jld2FyZA==",highlighted:`<span class="hljs-comment"># rloo_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">datasets:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">HuggingFaceH4/Polaris-Dataset-53K</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">trl-lib/DeepMath-103K</span> | |
| <span class="hljs-attr">reward_funcs:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-string">accuracy_reward</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwcmxvbyUyMC0tY29uZmlnJTIwcmxvb19jb25maWcueWFtbA==",highlighted:"trl rloo --config rloo_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function $l(_){let s,m,l,b="Launch with:",i,p,$;return s=new Z({props:{code:"JTIzJTIwa3RvX2NvbmZpZy55YW1sJTBBbW9kZWxfbmFtZV9vcl9wYXRoJTNBJTIwUXdlbiUyRlF3ZW4yLjUtMC41QiUwQWRhdGFzZXRzJTNBJTBBJTIwJTIwLSUyMHBhdGglM0ElMjB0cmwtbGliJTJGa3RvLW1peC0xNGslMEElMjAlMjAtJTIwcGF0aCUzQSUyMGFyZ2lsbGElMkZ1bHRyYWZlZWRiYWNrLWJpbmFyaXplZC1wcmVmZXJlbmNlcy1jbGVhbmVk",highlighted:`<span class="hljs-comment"># kto_config.yaml</span> | |
| <span class="hljs-attr">model_name_or_path:</span> <span class="hljs-string">Qwen/Qwen2.5-0.5B</span> | |
| <span class="hljs-attr">datasets:</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">trl-lib/kto-mix-14k</span> | |
| <span class="hljs-bullet">-</span> <span class="hljs-attr">path:</span> <span class="hljs-string">argilla/ultrafeedback-binarized-preferences-cleaned</span>`,wrap:!1}}),p=new Z({props:{code:"dHJsJTIwa3RvJTIwLS1jb25maWclMjBrdG9fY29uZmlnLnlhbWw=",highlighted:"trl kto --config kto_config.yaml",wrap:!1}}),{c(){g(s.$$.fragment),m=w(),l=W("p"),l.textContent=b,i=w(),g(p.$$.fragment)},l(e){J(s.$$.fragment,e),m=u(e),l=C(e,"P",{"data-svelte-h":!0}),I(l)!=="svelte-ge9qnz"&&(l.textContent=b),i=u(e),J(p.$$.fragment,e)},m(e,o){h(s,e,o),f(e,m,o),f(e,l,o),f(e,i,o),h(p,e,o),$=!0},p:X,i(e){$||(d(s.$$.fragment,e),d(p.$$.fragment,e),$=!0)},o(e){M(s.$$.fragment,e),M(p.$$.fragment,e),$=!1},d(e){e&&(r(m),r(l),r(i)),T(s,e),T(p,e)}}}function ol(_){let s,m,l,b,i,p,$,e,o,U,c,j;return s=new B({props:{id:"trainer",option:"SFT",$$slots:{default:[pl]},$$scope:{ctx:_}}}),l=new B({props:{id:"trainer",option:"DPO",$$slots:{default:[rl]},$$scope:{ctx:_}}}),i=new B({props:{id:"trainer",option:"Reward",$$slots:{default:[fl]},$$scope:{ctx:_}}}),$=new B({props:{id:"trainer",option:"GRPO",$$slots:{default:[ml]},$$scope:{ctx:_}}}),o=new B({props:{id:"trainer",option:"RLOO",$$slots:{default:[il]},$$scope:{ctx:_}}}),c=new B({props:{id:"trainer",option:"KTO",$$slots:{default:[$l]},$$scope:{ctx:_}}}),{c(){g(s.$$.fragment),m=w(),g(l.$$.fragment),b=w(),g(i.$$.fragment),p=w(),g($.$$.fragment),e=w(),g(o.$$.fragment),U=w(),g(c.$$.fragment)},l(t){J(s.$$.fragment,t),m=u(t),J(l.$$.fragment,t),b=u(t),J(i.$$.fragment,t),p=u(t),J($.$$.fragment,t),e=u(t),J(o.$$.fragment,t),U=u(t),J(c.$$.fragment,t)},m(t,n){h(s,t,n),f(t,m,n),h(l,t,n),f(t,b,n),h(i,t,n),f(t,p,n),h($,t,n),f(t,e,n),h(o,t,n),f(t,U,n),h(c,t,n),j=!0},p(t,n){const L={};n&2&&(L.$$scope={dirty:n,ctx:t}),s.$set(L);const v={};n&2&&(v.$$scope={dirty:n,ctx:t}),l.$set(v);const F={};n&2&&(F.$$scope={dirty:n,ctx:t}),i.$set(F);const Y={};n&2&&(Y.$$scope={dirty:n,ctx:t}),$.$set(Y);const R={};n&2&&(R.$$scope={dirty:n,ctx:t}),o.$set(R);const G={};n&2&&(G.$$scope={dirty:n,ctx:t}),c.$set(G)},i(t){j||(d(s.$$.fragment,t),d(l.$$.fragment,t),d(i.$$.fragment,t),d($.$$.fragment,t),d(o.$$.fragment,t),d(c.$$.fragment,t),j=!0)},o(t){M(s.$$.fragment,t),M(l.$$.fragment,t),M(i.$$.fragment,t),M($.$$.fragment,t),M(o.$$.fragment,t),M(c.$$.fragment,t),j=!1},d(t){t&&(r(m),r(b),r(p),r(e),r(U)),T(s,t),T(l,t),T(i,t),T($,t),T(o,t),T(c,t)}}}function cl(_){let s,m,l,b,i,p,$,e,o,U="TRL provides a powerful command-line interface (CLI) to fine-tune large language models (LLMs) using methods like Supervised Fine-Tuning (SFT), Direct Preference Optimization (DPO), and more. The CLI abstracts away much of the boilerplate, letting you launch training jobs quickly and reproducibly.",c,j,t,n,L="Currently supported commands are:",v,F,Y,R,G="<li><code>trl dpo</code>: fine-tune a LLM with DPO</li> <li><code>trl grpo</code>: fine-tune a LLM with GRPO</li> <li><code>trl kto</code>: fine-tune a LLM with KTO</li> <li><code>trl reward</code>: train a Reward Model</li> <li><code>trl rloo</code>: fine-tune a LLM with RLOO</li> <li><code>trl sft</code>: fine-tune a LLM with SFT</li>",Tt,z,bt,H,le="<li><code>trl env</code>: get the system information</li> <li><code>trl vllm-serve</code>: serve a model with vLLM</li>",yt,E,jt,A,Ut,P,se="You can launch training directly from the CLI by specifying required arguments like the model and dataset:",_t,V,Zt,D,Wt,O,ne="To keep your CLI commands clean and reproducible, you can define all training arguments in a YAML configuration file:",Ct,S,It,q,Bt,K,ae='TRL CLI natively supports <a href="https://huggingface.co/docs/accelerate" rel="nofollow">🤗 Accelerate</a>, making it easy to scale training across multiple GPUs, machines, or use advanced setups like DeepSpeed — all from the same CLI.',Xt,tt,pe='You can pass any <code>accelerate launch</code> arguments directly to <code>trl</code>, such as <code>--num_processes</code>. For more information see <a href="https://huggingface.co/docs/accelerate/en/basic_tutorials/launch#using-accelerate-launch" rel="nofollow">Using accelerate launch</a>.',Ft,k,Rt,et,vt,lt,re='The <code>--accelerate_config</code> flag lets you easily configure distributed training with <a href="https://github.com/huggingface/accelerate" rel="nofollow">🤗 Accelerate</a>. This flag accepts either:',Yt,st,fe="<li>the name of a predefined config profile (built into TRL), or</li> <li>a path to a custom Accelerate YAML config file.</li>",Lt,nt,Gt,at,me="TRL provides several ready-to-use Accelerate configs to simplify common training setups:",Qt,pt,ie="<thead><tr><th>Name</th> <th>Description</th></tr></thead> <tbody><tr><td><code>fsdp1</code></td> <td>Fully Sharded Data Parallel Stage 1</td></tr> <tr><td><code>fsdp2</code></td> <td>Fully Sharded Data Parallel Stage 2</td></tr> <tr><td><code>zero1</code></td> <td>DeepSpeed ZeRO Stage 1</td></tr> <tr><td><code>zero2</code></td> <td>DeepSpeed ZeRO Stage 2</td></tr> <tr><td><code>zero3</code></td> <td>DeepSpeed ZeRO Stage 3</td></tr> <tr><td><code>multi_gpu</code></td> <td>Multi-GPU training</td></tr> <tr><td><code>single_gpu</code></td> <td>Single-GPU training</td></tr></tbody>",Vt,rt,$e="To use one of these, just pass the name to <code>--accelerate_config</code>. TRL will automatically load the corresponding config file from <code>trl/accelerate_config/</code>.",St,ft,kt,x,xt,mt,Nt,it,oe="You can use dataset mixtures to combine multiple datasets into a single training dataset. This is useful for training on diverse data sources or when you want to mix different types of data.",zt,N,Ht,$t,ce='To see all the available keywords for defining dataset mixtures, refer to the <a href="/docs/trl/pr_5607/en/script_utils#trl.scripts.utils.DatasetConfig">scripts.utils.DatasetConfig</a> and <a href="/docs/trl/pr_5607/en/script_utils#trl.DatasetMixtureConfig">DatasetMixtureConfig</a> classes.',Et,ot,At,ct,we="You can get the system information by running the following command:",Pt,wt,Dt,ut,ue="This will print out the system information, including the GPU information, the CUDA version, the PyTorch version, the transformers version, the TRL version, and any optional dependencies that are installed.",Ot,gt,qt,Jt,ge="This information is required when reporting an issue.",Kt,ht,te,dt,ee;return i=new Ie({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),$=new Q({props:{title:"Command Line Interfaces (CLIs)",local:"command-line-interfaces-clis",headingTag:"h1"}}),j=new Q({props:{title:"Commands",local:"commands",headingTag:"h2"}}),F=new Q({props:{title:"Training Commands",local:"training-commands",headingTag:"h3"}}),z=new Q({props:{title:"Other Commands",local:"other-commands",headingTag:"h3"}}),E=new Q({props:{title:"Fine-Tuning with the TRL CLI",local:"fine-tuning-with-the-trl-cli",headingTag:"h2"}}),A=new Q({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h3"}}),V=new Mt({props:{id:"trainer",options:["SFT","DPO","Reward","GRPO","RLOO","KTO"],$$slots:{default:[Ge]},$$scope:{ctx:_}}}),D=new Q({props:{title:"Using Configuration Files",local:"using-configuration-files",headingTag:"h3"}}),S=new Mt({props:{id:"trainer",options:["SFT","DPO","Reward","GRPO","RLOO","KTO"],$$slots:{default:[ze]},$$scope:{ctx:_}}}),q=new Q({props:{title:"Scaling Up with Accelerate",local:"scaling-up-with-accelerate",headingTag:"h3"}}),k=new Mt({props:{id:"trainer",options:["SFT","DPO","Reward","GRPO","RLOO","KTO"],$$slots:{default:[qe]},$$scope:{ctx:_}}}),et=new Q({props:{title:"Using --accelerate_config for Accelerate Configuration",local:"using---accelerateconfig-for-accelerate-configuration",headingTag:"h3"}}),nt=new Q({props:{title:"Predefined Config Profiles",local:"predefined-config-profiles",headingTag:"h4"}}),ft=new Q({props:{title:"Example Usage",local:"example-usage",headingTag:"h4"}}),x=new Mt({props:{id:"trainer",options:["SFT","DPO","Reward","GRPO","RLOO","KTO"],$$slots:{default:[al]},$$scope:{ctx:_}}}),mt=new Q({props:{title:"Using dataset mixtures",local:"using-dataset-mixtures",headingTag:"h3"}}),N=new Mt({props:{id:"trainer",options:["SFT","DPO","Reward","GRPO","RLOO","KTO"],$$slots:{default:[ol]},$$scope:{ctx:_}}}),ot=new Q({props:{title:"Getting the System Information",local:"getting-the-system-information",headingTag:"h2"}}),wt=new Z({props:{code:"dHJsJTIwZW52",highlighted:'trl <span class="hljs-built_in">env</span>',wrap:!1}}),gt=new Z({props:{code:"Q29weS1wYXN0ZSUyMHRoZSUyMGZvbGxvd2luZyUyMGluZm9ybWF0aW9uJTIwd2hlbiUyMHJlcG9ydGluZyUyMGFuJTIwaXNzdWUlM0ElMEElMEEtJTIwUGxhdGZvcm0lM0ElMjBMaW51eC01LjE1LjAtMTA0OC1hd3MteDg2XzY0LXdpdGgtZ2xpYmMyLjMxJTBBLSUyMFB5dGhvbiUyMHZlcnNpb24lM0ElMjAzLjExLjklMEEtJTIwUHlUb3JjaCUyMHZlcnNpb24lM0ElMjAyLjQuMSUwQS0lMjBhY2NlbGVyYXRvcihzKSUzQSUyME5WSURJQSUyMEgxMDAlMjA4MEdCJTIwSEJNMyUwQS0lMjBUcmFuc2Zvcm1lcnMlMjB2ZXJzaW9uJTNBJTIwNC40NS4wLmRldjAlMEEtJTIwQWNjZWxlcmF0ZSUyMHZlcnNpb24lM0ElMjAwLjM0LjIlMEEtJTIwQWNjZWxlcmF0ZSUyMGNvbmZpZyUzQSUyMCUwQSUyMCUyMC0lMjBjb21wdXRlX2Vudmlyb25tZW50JTNBJTIwTE9DQUxfTUFDSElORSUwQSUyMCUyMC0lMjBkaXN0cmlidXRlZF90eXBlJTNBJTIwREVFUFNQRUVEJTBBJTIwJTIwLSUyMG1peGVkX3ByZWNpc2lvbiUzQSUyMG5vJTBBJTIwJTIwLSUyMHVzZV9jcHUlM0ElMjBGYWxzZSUwQSUyMCUyMC0lMjBkZWJ1ZyUzQSUyMEZhbHNlJTBBJTIwJTIwLSUyMG51bV9wcm9jZXNzZXMlM0ElMjA0JTBBJTIwJTIwLSUyMG1hY2hpbmVfcmFuayUzQSUyMDAlMEElMjAlMjAtJTIwbnVtX21hY2hpbmVzJTNBJTIwMSUwQSUyMCUyMC0lMjByZHp2X2JhY2tlbmQlM0ElMjBzdGF0aWMlMEElMjAlMjAtJTIwc2FtZV9uZXR3b3JrJTNBJTIwVHJ1ZSUwQSUyMCUyMC0lMjBtYWluX3RyYWluaW5nX2Z1bmN0aW9uJTNBJTIwbWFpbiUwQSUyMCUyMC0lMjBlbmFibGVfY3B1X2FmZmluaXR5JTNBJTIwRmFsc2UlMEElMjAlMjAtJTIwZGVlcHNwZWVkX2NvbmZpZyUzQSUyMCU3QidncmFkaWVudF9hY2N1bXVsYXRpb25fc3RlcHMnJTNBJTIwNCUyQyUyMCdvZmZsb2FkX29wdGltaXplcl9kZXZpY2UnJTNBJTIwJ25vbmUnJTJDJTIwJ29mZmxvYWRfcGFyYW1fZGV2aWNlJyUzQSUyMCdub25lJyUyQyUyMCd6ZXJvM19pbml0X2ZsYWcnJTNBJTIwRmFsc2UlMkMlMjAnemVyb19zdGFnZSclM0ElMjAyJTdEJTBBJTIwJTIwLSUyMGRvd25jYXN0X2JmMTYlM0ElMjBubyUwQSUyMCUyMC0lMjB0cHVfdXNlX2NsdXN0ZXIlM0ElMjBGYWxzZSUwQSUyMCUyMC0lMjB0cHVfdXNlX3N1ZG8lM0ElMjBGYWxzZSUwQSUyMCUyMC0lMjB0cHVfZW52JTNBJTIwJTVCJTVEJTBBLSUyMERhdGFzZXRzJTIwdmVyc2lvbiUzQSUyMDMuMC4wJTBBLSUyMEhGJTIwSHViJTIwdmVyc2lvbiUzQSUyMDAuMjQuNyUwQS0lMjBUUkwlMjB2ZXJzaW9uJTNBJTIwMC4xMi4wLmRldjAlMkJhY2I0ZDcwJTBBLSUyMGJpdHNhbmRieXRlcyUyMHZlcnNpb24lM0ElMjAwLjQxLjElMEEtJTIwRGVlcFNwZWVkJTIwdmVyc2lvbiUzQSUyMDAuMTUuMSUwQS0lMjBEaWZmdXNlcnMlMjB2ZXJzaW9uJTNBJTIwMC4zMC4zJTBBLSUyMExpZ2VyLUtlcm5lbCUyMHZlcnNpb24lM0ElMjAwLjMuMCUwQS0lMjBMTE0tQmxlbmRlciUyMHZlcnNpb24lM0ElMjAwLjAuMiUwQS0lMjBPcGVuQUklMjB2ZXJzaW9uJTNBJTIwMS40Ni4wJTBBLSUyMFBFRlQlMjB2ZXJzaW9uJTNBJTIwMC4xMi4wJTBBLSUyMHZMTE0lMjB2ZXJzaW9uJTNBJTIwbm90JTIwaW5zdGFsbGVk",highlighted:`Copy-paste the following information when reporting an issue: | |
| - Platform: Linux-5.15.0-1048-aws-x86_64-with-glibc2.31 | |
| - Python version: 3.11.9 | |
| - PyTorch version: 2.4.1 | |
| - accelerator(s): NVIDIA H100 80GB HBM3 | |
| - Transformers version: 4.45.0.dev0 | |
| - Accelerate version: 0.34.2 | |
| - Accelerate config: | |
| - compute_environment: LOCAL_MACHINE | |
| - distributed_type: DEEPSPEED | |
| - mixed_precision: no | |
| - use_cpu: False | |
| - debug: False | |
| - num_processes: 4 | |
| - machine_rank: 0 | |
| - num_machines: 1 | |
| - rdzv_backend: static | |
| - same_network: True | |
| - main_training_function: main | |
| - enable_cpu_affinity: False | |
| - deepspeed_config: {'gradient_accumulation_steps': 4, 'offload_optimizer_device': 'none', 'offload_param_device': 'none', 'zero3_init_flag': False, 'zero_stage': 2} | |
| - downcast_bf16: no | |
| - tpu_use_cluster: False | |
| - tpu_use_sudo: False | |
| - tpu_env: [] | |
| - Datasets version: 3.0.0 | |
| - HF Hub version: 0.24.7 | |
| - TRL version: 0.12.0.dev0+acb4d70 | |
| - bitsandbytes version: 0.41.1 | |
| - DeepSpeed version: 0.15.1 | |
| - Diffusers version: 0.30.3 | |
| - Liger-Kernel version: 0.3.0 | |
| - LLM-Blender version: 0.0.2 | |
| - OpenAI version: 1.46.0 | |
| - PEFT version: 0.12.0 | |
| - vLLM version: not installed`,wrap:!1}}),ht=new Be({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/clis.md"}}),{c(){s=W("meta"),m=w(),l=W("p"),b=w(),g(i.$$.fragment),p=w(),g($.$$.fragment),e=w(),o=W("p"),o.textContent=U,c=w(),g(j.$$.fragment),t=w(),n=W("p"),n.textContent=L,v=w(),g(F.$$.fragment),Y=w(),R=W("ul"),R.innerHTML=G,Tt=w(),g(z.$$.fragment),bt=w(),H=W("ul"),H.innerHTML=le,yt=w(),g(E.$$.fragment),jt=w(),g(A.$$.fragment),Ut=w(),P=W("p"),P.textContent=se,_t=w(),g(V.$$.fragment),Zt=w(),g(D.$$.fragment),Wt=w(),O=W("p"),O.textContent=ne,Ct=w(),g(S.$$.fragment),It=w(),g(q.$$.fragment),Bt=w(),K=W("p"),K.innerHTML=ae,Xt=w(),tt=W("p"),tt.innerHTML=pe,Ft=w(),g(k.$$.fragment),Rt=w(),g(et.$$.fragment),vt=w(),lt=W("p"),lt.innerHTML=re,Yt=w(),st=W("ul"),st.innerHTML=fe,Lt=w(),g(nt.$$.fragment),Gt=w(),at=W("p"),at.textContent=me,Qt=w(),pt=W("table"),pt.innerHTML=ie,Vt=w(),rt=W("p"),rt.innerHTML=$e,St=w(),g(ft.$$.fragment),kt=w(),g(x.$$.fragment),xt=w(),g(mt.$$.fragment),Nt=w(),it=W("p"),it.textContent=oe,zt=w(),g(N.$$.fragment),Ht=w(),$t=W("p"),$t.innerHTML=ce,Et=w(),g(ot.$$.fragment),At=w(),ct=W("p"),ct.textContent=we,Pt=w(),g(wt.$$.fragment),Dt=w(),ut=W("p"),ut.textContent=ue,Ot=w(),g(gt.$$.fragment),qt=w(),Jt=W("p"),Jt.textContent=ge,Kt=w(),g(ht.$$.fragment),te=w(),dt=W("p"),this.h()},l(a){const y=We("svelte-u9bgzb",document.head);s=C(y,"META",{name:!0,content:!0}),y.forEach(r),m=u(a),l=C(a,"P",{}),be(l).forEach(r),b=u(a),J(i.$$.fragment,a),p=u(a),J($.$$.fragment,a),e=u(a),o=C(a,"P",{"data-svelte-h":!0}),I(o)!=="svelte-t2cqvz"&&(o.textContent=U),c=u(a),J(j.$$.fragment,a),t=u(a),n=C(a,"P",{"data-svelte-h":!0}),I(n)!=="svelte-ygyifu"&&(n.textContent=L),v=u(a),J(F.$$.fragment,a),Y=u(a),R=C(a,"UL",{"data-svelte-h":!0}),I(R)!=="svelte-1d74btz"&&(R.innerHTML=G),Tt=u(a),J(z.$$.fragment,a),bt=u(a),H=C(a,"UL",{"data-svelte-h":!0}),I(H)!=="svelte-edzbrw"&&(H.innerHTML=le),yt=u(a),J(E.$$.fragment,a),jt=u(a),J(A.$$.fragment,a),Ut=u(a),P=C(a,"P",{"data-svelte-h":!0}),I(P)!=="svelte-18cyqau"&&(P.textContent=se),_t=u(a),J(V.$$.fragment,a),Zt=u(a),J(D.$$.fragment,a),Wt=u(a),O=C(a,"P",{"data-svelte-h":!0}),I(O)!=="svelte-b5yyxt"&&(O.textContent=ne),Ct=u(a),J(S.$$.fragment,a),It=u(a),J(q.$$.fragment,a),Bt=u(a),K=C(a,"P",{"data-svelte-h":!0}),I(K)!=="svelte-azbtdl"&&(K.innerHTML=ae),Xt=u(a),tt=C(a,"P",{"data-svelte-h":!0}),I(tt)!=="svelte-184kioa"&&(tt.innerHTML=pe),Ft=u(a),J(k.$$.fragment,a),Rt=u(a),J(et.$$.fragment,a),vt=u(a),lt=C(a,"P",{"data-svelte-h":!0}),I(lt)!=="svelte-z1d0qa"&&(lt.innerHTML=re),Yt=u(a),st=C(a,"UL",{"data-svelte-h":!0}),I(st)!=="svelte-1agl91y"&&(st.innerHTML=fe),Lt=u(a),J(nt.$$.fragment,a),Gt=u(a),at=C(a,"P",{"data-svelte-h":!0}),I(at)!=="svelte-clmz98"&&(at.textContent=me),Qt=u(a),pt=C(a,"TABLE",{"data-svelte-h":!0}),I(pt)!=="svelte-argtw3"&&(pt.innerHTML=ie),Vt=u(a),rt=C(a,"P",{"data-svelte-h":!0}),I(rt)!=="svelte-k2cgsp"&&(rt.innerHTML=$e),St=u(a),J(ft.$$.fragment,a),kt=u(a),J(x.$$.fragment,a),xt=u(a),J(mt.$$.fragment,a),Nt=u(a),it=C(a,"P",{"data-svelte-h":!0}),I(it)!=="svelte-11qebc7"&&(it.textContent=oe),zt=u(a),J(N.$$.fragment,a),Ht=u(a),$t=C(a,"P",{"data-svelte-h":!0}),I($t)!=="svelte-7148la"&&($t.innerHTML=ce),Et=u(a),J(ot.$$.fragment,a),At=u(a),ct=C(a,"P",{"data-svelte-h":!0}),I(ct)!=="svelte-1ncagwe"&&(ct.textContent=we),Pt=u(a),J(wt.$$.fragment,a),Dt=u(a),ut=C(a,"P",{"data-svelte-h":!0}),I(ut)!=="svelte-13mw8c0"&&(ut.textContent=ue),Ot=u(a),J(gt.$$.fragment,a),qt=u(a),Jt=C(a,"P",{"data-svelte-h":!0}),I(Jt)!=="svelte-g7cib5"&&(Jt.textContent=ge),Kt=u(a),J(ht.$$.fragment,a),te=u(a),dt=C(a,"P",{}),be(dt).forEach(r),this.h()},h(){ye(s,"name","hf:doc:metadata"),ye(s,"content",wl)},m(a,y){Ce(document.head,s),f(a,m,y),f(a,l,y),f(a,b,y),h(i,a,y),f(a,p,y),h($,a,y),f(a,e,y),f(a,o,y),f(a,c,y),h(j,a,y),f(a,t,y),f(a,n,y),f(a,v,y),h(F,a,y),f(a,Y,y),f(a,R,y),f(a,Tt,y),h(z,a,y),f(a,bt,y),f(a,H,y),f(a,yt,y),h(E,a,y),f(a,jt,y),h(A,a,y),f(a,Ut,y),f(a,P,y),f(a,_t,y),h(V,a,y),f(a,Zt,y),h(D,a,y),f(a,Wt,y),f(a,O,y),f(a,Ct,y),h(S,a,y),f(a,It,y),h(q,a,y),f(a,Bt,y),f(a,K,y),f(a,Xt,y),f(a,tt,y),f(a,Ft,y),h(k,a,y),f(a,Rt,y),h(et,a,y),f(a,vt,y),f(a,lt,y),f(a,Yt,y),f(a,st,y),f(a,Lt,y),h(nt,a,y),f(a,Gt,y),f(a,at,y),f(a,Qt,y),f(a,pt,y),f(a,Vt,y),f(a,rt,y),f(a,St,y),h(ft,a,y),f(a,kt,y),h(x,a,y),f(a,xt,y),h(mt,a,y),f(a,Nt,y),f(a,it,y),f(a,zt,y),h(N,a,y),f(a,Ht,y),f(a,$t,y),f(a,Et,y),h(ot,a,y),f(a,At,y),f(a,ct,y),f(a,Pt,y),h(wt,a,y),f(a,Dt,y),f(a,ut,y),f(a,Ot,y),h(gt,a,y),f(a,qt,y),f(a,Jt,y),f(a,Kt,y),h(ht,a,y),f(a,te,y),f(a,dt,y),ee=!0},p(a,[y]){const Je={};y&2&&(Je.$$scope={dirty:y,ctx:a}),V.$set(Je);const he={};y&2&&(he.$$scope={dirty:y,ctx:a}),S.$set(he);const de={};y&2&&(de.$$scope={dirty:y,ctx:a}),k.$set(de);const Me={};y&2&&(Me.$$scope={dirty:y,ctx:a}),x.$set(Me);const Te={};y&2&&(Te.$$scope={dirty:y,ctx:a}),N.$set(Te)},i(a){ee||(d(i.$$.fragment,a),d($.$$.fragment,a),d(j.$$.fragment,a),d(F.$$.fragment,a),d(z.$$.fragment,a),d(E.$$.fragment,a),d(A.$$.fragment,a),d(V.$$.fragment,a),d(D.$$.fragment,a),d(S.$$.fragment,a),d(q.$$.fragment,a),d(k.$$.fragment,a),d(et.$$.fragment,a),d(nt.$$.fragment,a),d(ft.$$.fragment,a),d(x.$$.fragment,a),d(mt.$$.fragment,a),d(N.$$.fragment,a),d(ot.$$.fragment,a),d(wt.$$.fragment,a),d(gt.$$.fragment,a),d(ht.$$.fragment,a),ee=!0)},o(a){M(i.$$.fragment,a),M($.$$.fragment,a),M(j.$$.fragment,a),M(F.$$.fragment,a),M(z.$$.fragment,a),M(E.$$.fragment,a),M(A.$$.fragment,a),M(V.$$.fragment,a),M(D.$$.fragment,a),M(S.$$.fragment,a),M(q.$$.fragment,a),M(k.$$.fragment,a),M(et.$$.fragment,a),M(nt.$$.fragment,a),M(ft.$$.fragment,a),M(x.$$.fragment,a),M(mt.$$.fragment,a),M(N.$$.fragment,a),M(ot.$$.fragment,a),M(wt.$$.fragment,a),M(gt.$$.fragment,a),M(ht.$$.fragment,a),ee=!1},d(a){a&&(r(m),r(l),r(b),r(p),r(e),r(o),r(c),r(t),r(n),r(v),r(Y),r(R),r(Tt),r(bt),r(H),r(yt),r(jt),r(Ut),r(P),r(_t),r(Zt),r(Wt),r(O),r(Ct),r(It),r(Bt),r(K),r(Xt),r(tt),r(Ft),r(Rt),r(vt),r(lt),r(Yt),r(st),r(Lt),r(Gt),r(at),r(Qt),r(pt),r(Vt),r(rt),r(St),r(kt),r(xt),r(Nt),r(it),r(zt),r(Ht),r($t),r(Et),r(At),r(ct),r(Pt),r(Dt),r(ut),r(Ot),r(qt),r(Jt),r(Kt),r(te),r(dt)),r(s),T(i,a),T($,a),T(j,a),T(F,a),T(z,a),T(E,a),T(A,a),T(V,a),T(D,a),T(S,a),T(q,a),T(k,a),T(et,a),T(nt,a),T(ft,a),T(x,a),T(mt,a),T(N,a),T(ot,a),T(wt,a),T(gt,a),T(ht,a)}}}const wl='{"title":"Command Line Interfaces (CLIs)","local":"command-line-interfaces-clis","sections":[{"title":"Commands","local":"commands","sections":[{"title":"Training Commands","local":"training-commands","sections":[],"depth":3},{"title":"Other Commands","local":"other-commands","sections":[],"depth":3}],"depth":2},{"title":"Fine-Tuning with the TRL CLI","local":"fine-tuning-with-the-trl-cli","sections":[{"title":"Basic Usage","local":"basic-usage","sections":[],"depth":3},{"title":"Using Configuration Files","local":"using-configuration-files","sections":[],"depth":3},{"title":"Scaling Up with Accelerate","local":"scaling-up-with-accelerate","sections":[],"depth":3},{"title":"Using --accelerate_config for Accelerate Configuration","local":"using---accelerateconfig-for-accelerate-configuration","sections":[{"title":"Predefined Config Profiles","local":"predefined-config-profiles","sections":[],"depth":4},{"title":"Example Usage","local":"example-usage","sections":[],"depth":4}],"depth":3},{"title":"Using dataset mixtures","local":"using-dataset-mixtures","sections":[],"depth":3}],"depth":2},{"title":"Getting the System Information","local":"getting-the-system-information","sections":[],"depth":2}],"depth":1}';function ul(_){return Ue(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Tl extends _e{constructor(s){super(),Ze(this,s,ul,cl,je,{})}}export{Tl as component}; | |
Xet Storage Details
- Size:
- 69.9 kB
- Xet hash:
- 9a69c12adeaa0cbc0864f1510478a7168ffe533be88c5bf4a5d3a60162d1be35
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.