Buckets:
| import{s as Ts,o as os,n as is}from"../chunks/scheduler.7da89386.js";import{S as Js,i as Ms,g as u,s as p,r as c,A as us,h as w,f as a,c as m,j as ps,u as i,x as v,k as ms,y as ws,a as e,v as T,d as o,t as J,w as M}from"../chunks/index.20910acc.js";import{T as rs}from"../chunks/Tip.53e22153.js";import{C as x}from"../chunks/CodeBlock.143bd81e.js";import{H as cs,E as ds}from"../chunks/index.c9cd5e8b.js";function fs(W){let t,r='Documentation for the config file of sglang can be found <a href="https://docs.sglang.ai/backend/server_arguments.html" rel="nofollow">here</a>';return{c(){t=u("p"),t.innerHTML=r},l(n){t=w(n,"P",{"data-svelte-h":!0}),v(t)!=="svelte-1lvb592"&&(t.innerHTML=r)},m(n,d){e(n,t,d)},p:is,d(n){n&&a(t)}}}function hs(W){let t,r=`In the case of OOM issues, you might need to reduce the context size of the | |
| model as well as reduce the <code>mem_fraction_static</code> and <code>chunked_prefill_size</code> parameter.`;return{c(){t=u("p"),t.innerHTML=r},l(n){t=w(n,"P",{"data-svelte-h":!0}),v(t)!=="svelte-11bvtvc"&&(t.innerHTML=r)},m(n,d){e(n,t,d)},p:is,d(n){n&&a(t)}}}function js(W){let t,r,n,d,j,N,I,K=`Lighteval allows you to use <code>sglang</code> as backend allowing great speedups. | |
| To use, simply change the <code>model_args</code> to reflect the arguments you want to pass to sglang.`,C,g,H,b,ss=`<code>sglang</code> is able to distribute the model across multiple GPUs using data | |
| parallelism and tensor parallelism. | |
| You can choose the parallelism method by setting in the <code>model_args</code>.`,Y,y,ls="For example if you have 4 GPUs you can split it across using <code>tp_size</code>:",k,$,V,U,as="Or, if your model fits on a single GPU, you can use <code>dp_size</code> to speed up the evaluation:",F,_,S,B,z,Z,es=`For more advanced configurations, you can use a config file for the model. | |
| An example of a config file is shown below and can be found at <code>examples/model_configs/sglang_model_config.yaml</code>.`,Q,A,R,f,q,G,L,h,D,E,P,X,O;return j=new cs({props:{title:"Use SGLang as backend",local:"use-sglang-as-backend",headingTag:"h1"}}),g=new x({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmxlYWRlcmJvYXJkJTdDdHJ1dGhmdWxxYSUzQW1jJTdDMCU3QzAlMjI=",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0|0"</span>`,wrap:!1}}),$=new x({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyQ3RwX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTdDMCUyMg==",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16,tp_size=4"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0|0"</span>`,wrap:!1}}),_=new x({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyQ2RwX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTdDMCUyMg==",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16,dp_size=4"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0|0"</span>`,wrap:!1}}),B=new cs({props:{title:"Use a config file",local:"use-a-config-file",headingTag:"h2"}}),A=new x({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIyZXhhbXBsZXMlMkZtb2RlbF9jb25maWdzJTJGc2dsYW5nX21vZGVsX2NvbmZpZy55YW1sJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTdDMCUyMg==",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"examples/model_configs/sglang_model_config.yaml"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0|0"</span>`,wrap:!1}}),f=new rs({props:{warning:!1,$$slots:{default:[fs]},$$scope:{ctx:W}}}),G=new x({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0ElMjAlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEuN0ItSW5zdHJ1Y3QlMjIlMEElMjAlMjAlMjAlMjBkdHlwZSUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjB0cF9zaXplJTNBJTIwMSUwQSUyMCUyMCUyMCUyMGRwX3NpemUlM0ElMjAxJTBBJTIwJTIwJTIwJTIwY29udGV4dF9sZW5ndGglM0ElMjBudWxsJTBBJTIwJTIwJTIwJTIwcmFuZG9tX3NlZWQlM0ElMjAxJTBBJTIwJTIwJTIwJTIwdHJ1c3RfcmVtb3RlX2NvZGUlM0ElMjBGYWxzZSUwQSUyMCUyMCUyMCUyMHVzZV9jaGF0X3RlbXBsYXRlJTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjBkZXZpY2UlM0ElMjAlMjJjdWRhJTIyJTBBJTIwJTIwJTIwJTIwc2tpcF90b2tlbml6ZXJfaW5pdCUzQSUyMEZhbHNlJTBBJTIwJTIwJTIwJTIwa3ZfY2FjaGVfZHR5cGUlM0ElMjAlMjJhdXRvJTIyJTBBJTIwJTIwJTIwJTIwYWRkX3NwZWNpYWxfdG9rZW5zJTNBJTIwVHJ1ZSUwQSUyMCUyMCUyMCUyMHBhaXJ3aXNlX3Rva2VuaXphdGlvbiUzQSUyMEZhbHNlJTBBJTIwJTIwJTIwJTIwc2FtcGxpbmdfYmFja2VuZCUzQSUyMG51bGwlMEElMjAlMjAlMjAlMjBhdHRlbnRpb25fYmFja2VuZCUzQSUyMG51bGwlMEElMjAlMjAlMjAlMjBtZW1fZnJhY3Rpb25fc3RhdGljJTNBJTIwMC44JTBBJTIwJTIwJTIwJTIwY2h1bmtlZF9wcmVmaWxsX3NpemUlM0ElMjA0MDk2JTBBJTIwJTIwJTIwJTIwZ2VuZXJhdGlvbl9wYXJhbWV0ZXJzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwbWF4X25ld190b2tlbnMlM0ElMjAxMDI0JTBBJTIwJTIwJTIwJTIwJTIwJTIwbWluX25ld190b2tlbnMlM0ElMjAwJTBBJTIwJTIwJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0ElMjAxLjAlMEElMjAlMjAlMjAlMjAlMjAlMjB0b3BfayUzQSUyMDUwJTBBJTIwJTIwJTIwJTIwJTIwJTIwbWluX3AlM0ElMjAwLjAlMEElMjAlMjAlMjAlMjAlMjAlMjB0b3BfcCUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHByZXNlbmNlX3BlbmFsdHklM0ElMjAwLjAlMEElMjAlMjAlMjAlMjAlMjAlMjByZXBldGl0aW9uX3BlbmFsdHklM0ElMjAxLjAlMEElMjAlMjAlMjAlMjAlMjAlMjBmcmVxdWVuY3lfcGVuYWx0eSUzQSUyMDAuMA==",highlighted:`<span class="hljs-attr">model_parameters:</span> | |
| <span class="hljs-attr">model_name:</span> <span class="hljs-string">"HuggingFaceTB/SmolLM-1.7B-Instruct"</span> | |
| <span class="hljs-attr">dtype:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-attr">tp_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">dp_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">context_length:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">random_seed:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">trust_remote_code:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">use_chat_template:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">device:</span> <span class="hljs-string">"cuda"</span> | |
| <span class="hljs-attr">skip_tokenizer_init:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">kv_cache_dtype:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-attr">add_special_tokens:</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-attr">pairwise_tokenization:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">sampling_backend:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">attention_backend:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">mem_fraction_static:</span> <span class="hljs-number">0.8</span> | |
| <span class="hljs-attr">chunked_prefill_size:</span> <span class="hljs-number">4096</span> | |
| <span class="hljs-attr">generation_parameters:</span> | |
| <span class="hljs-attr">max_new_tokens:</span> <span class="hljs-number">1024</span> | |
| <span class="hljs-attr">min_new_tokens:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-attr">temperature:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">top_k:</span> <span class="hljs-number">50</span> | |
| <span class="hljs-attr">min_p:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">top_p:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">presence_penalty:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">repetition_penalty:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">frequency_penalty:</span> <span class="hljs-number">0.0</span>`,wrap:!1}}),h=new rs({props:{warning:!0,$$slots:{default:[hs]},$$scope:{ctx:W}}}),E=new ds({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-sglang-as-backend.mdx"}}),{c(){t=u("meta"),r=p(),n=u("p"),d=p(),c(j.$$.fragment),N=p(),I=u("p"),I.innerHTML=K,C=p(),c(g.$$.fragment),H=p(),b=u("p"),b.innerHTML=ss,Y=p(),y=u("p"),y.innerHTML=ls,k=p(),c($.$$.fragment),V=p(),U=u("p"),U.innerHTML=as,F=p(),c(_.$$.fragment),S=p(),c(B.$$.fragment),z=p(),Z=u("p"),Z.innerHTML=es,Q=p(),c(A.$$.fragment),R=p(),c(f.$$.fragment),q=p(),c(G.$$.fragment),L=p(),c(h.$$.fragment),D=p(),c(E.$$.fragment),P=p(),X=u("p"),this.h()},l(s){const l=us("svelte-u9bgzb",document.head);t=w(l,"META",{name:!0,content:!0}),l.forEach(a),r=m(s),n=w(s,"P",{}),ps(n).forEach(a),d=m(s),i(j.$$.fragment,s),N=m(s),I=w(s,"P",{"data-svelte-h":!0}),v(I)!=="svelte-b3ur2j"&&(I.innerHTML=K),C=m(s),i(g.$$.fragment,s),H=m(s),b=w(s,"P",{"data-svelte-h":!0}),v(b)!=="svelte-1l2vbvu"&&(b.innerHTML=ss),Y=m(s),y=w(s,"P",{"data-svelte-h":!0}),v(y)!=="svelte-6f58xy"&&(y.innerHTML=ls),k=m(s),i($.$$.fragment,s),V=m(s),U=w(s,"P",{"data-svelte-h":!0}),v(U)!=="svelte-nbw9jb"&&(U.innerHTML=as),F=m(s),i(_.$$.fragment,s),S=m(s),i(B.$$.fragment,s),z=m(s),Z=w(s,"P",{"data-svelte-h":!0}),v(Z)!=="svelte-tda3d8"&&(Z.innerHTML=es),Q=m(s),i(A.$$.fragment,s),R=m(s),i(f.$$.fragment,s),q=m(s),i(G.$$.fragment,s),L=m(s),i(h.$$.fragment,s),D=m(s),i(E.$$.fragment,s),P=m(s),X=w(s,"P",{}),ps(X).forEach(a),this.h()},h(){ms(t,"name","hf:doc:metadata"),ms(t,"content",Is)},m(s,l){ws(document.head,t),e(s,r,l),e(s,n,l),e(s,d,l),T(j,s,l),e(s,N,l),e(s,I,l),e(s,C,l),T(g,s,l),e(s,H,l),e(s,b,l),e(s,Y,l),e(s,y,l),e(s,k,l),T($,s,l),e(s,V,l),e(s,U,l),e(s,F,l),T(_,s,l),e(s,S,l),T(B,s,l),e(s,z,l),e(s,Z,l),e(s,Q,l),T(A,s,l),e(s,R,l),T(f,s,l),e(s,q,l),T(G,s,l),e(s,L,l),T(h,s,l),e(s,D,l),T(E,s,l),e(s,P,l),e(s,X,l),O=!0},p(s,[l]){const ts={};l&2&&(ts.$$scope={dirty:l,ctx:s}),f.$set(ts);const ns={};l&2&&(ns.$$scope={dirty:l,ctx:s}),h.$set(ns)},i(s){O||(o(j.$$.fragment,s),o(g.$$.fragment,s),o($.$$.fragment,s),o(_.$$.fragment,s),o(B.$$.fragment,s),o(A.$$.fragment,s),o(f.$$.fragment,s),o(G.$$.fragment,s),o(h.$$.fragment,s),o(E.$$.fragment,s),O=!0)},o(s){J(j.$$.fragment,s),J(g.$$.fragment,s),J($.$$.fragment,s),J(_.$$.fragment,s),J(B.$$.fragment,s),J(A.$$.fragment,s),J(f.$$.fragment,s),J(G.$$.fragment,s),J(h.$$.fragment,s),J(E.$$.fragment,s),O=!1},d(s){s&&(a(r),a(n),a(d),a(N),a(I),a(C),a(H),a(b),a(Y),a(y),a(k),a(V),a(U),a(F),a(S),a(z),a(Z),a(Q),a(R),a(q),a(L),a(D),a(P),a(X)),a(t),M(j,s),M(g,s),M($,s),M(_,s),M(B,s),M(A,s),M(f,s),M(G,s),M(h,s),M(E,s)}}}const Is='{"title":"Use SGLang as backend","local":"use-sglang-as-backend","sections":[{"title":"Use a config file","local":"use-a-config-file","sections":[],"depth":2}],"depth":1}';function gs(W){return os(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Bs extends Js{constructor(t){super(),Ms(this,t,gs,js,Ts,{})}}export{Bs as component}; | |
Xet Storage Details
- Size:
- 11.2 kB
- Xet hash:
- 4fe06243310980822ca6251b9dde3e398be2d044bd30ce1f453bd781b23bf0e8
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.