Buckets:
| import{s as xe,n as ze,o as Ne}from"../chunks/scheduler.5f3e6389.js";import{S as Ve,i as Qe,e as M,s,c as i,h as Fe,a as u,d as t,b as n,f as He,g as p,j as d,k as be,l as Le,m as a,n as r,t as m,o,p as c}from"../chunks/index.373ab25c.js";import{C as q}from"../chunks/CodeBlock.9962115b.js";import{H as f,E as Ye}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.d67236f8.js";function qe(_e){let g,X,L,P,y,R,j,Ie=`Lighteval allows you to use SGLang as a backend, providing significant speedups for model evaluation. | |
| To use SGLang, simply change the <code>model_args</code> to reflect the arguments you want to pass to SGLang.`,D,w,O,$,K,J,ee,U,Ce=`SGLang can distribute the model across multiple GPUs using data parallelism and tensor parallelism. | |
| You can choose the parallelism method by setting the appropriate parameters in the <code>model_args</code>.`,le,b,te,_,Be="For example, if you have 4 GPUs, you can split the model across them using tensor parallelism with <code>tp_size</code>:",ae,I,se,C,ne,B,Ge="If your model fits on a single GPU, you can use data parallelism with <code>dp_size</code> to speed up the evaluation:",ie,G,pe,k,re,v,ke=`For more advanced configurations, you can use a YAML configuration file for the model. | |
| An example configuration file is shown below and can be found at <code>examples/model_configs/sglang_model_config.yaml</code>.`,me,Z,oe,T,ve='<p>Documentation for SGLang server arguments can be found <a href="https://docs.sglang.ai/backend/server_arguments.html" rel="nofollow">here</a></p>',ce,S,Me,h,Ze=`<p>In case of out-of-memory (OOM) issues, you might need to reduce the context size of the | |
| model as well as reduce the <code>mem_fraction_static</code> and <code>chunked_prefill_size</code> parameters.</p>`,ue,A,de,W,fe,E,Se="<li><code>mem_fraction_static</code>: Fraction of GPU memory to allocate for static tensors (default: 0.8)</li> <li><code>chunked_prefill_size</code>: Size of chunks for prefill operations (default: 4096)</li> <li><code>context_length</code>: Maximum context length for the model</li> <li><code>kv_cache_dtype</code>: Data type for key-value cache</li>",ge,H,Te,x,Ae="<li><code>tp_size</code>: Number of GPUs for tensor parallelism</li> <li><code>dp_size</code>: Number of GPUs for data parallelism</li>",he,z,ye,N,We="<li><code>dtype</code>: Data type for model weights (“auto”, “float16”, “bfloat16”, etc.)</li> <li><code>device</code>: Device to run the model on (“cuda”, “cpu”)</li> <li><code>trust_remote_code</code>: Whether to trust remote code from the model</li> <li><code>skip_tokenizer_init</code>: Skip tokenizer initialization for faster startup</li>",je,V,we,Q,Ee="<li><code>temperature</code>: Controls randomness in generation (0.0 = deterministic, 1.0 = random)</li> <li><code>top_p</code>: Nucleus sampling parameter</li> <li><code>top_k</code>: Top-k sampling parameter</li> <li><code>max_new_tokens</code>: Maximum number of tokens to generate</li> <li><code>repetition_penalty</code>: Penalty for repeating tokens</li> <li><code>presence_penalty</code>: Penalty for token presence</li> <li><code>frequency_penalty</code>: Penalty for token frequency</li>",$e,F,Je,Y,Ue;return y=new f({props:{title:"Using SGLang as Backend",local:"using-sglang-as-backend",headingTag:"h1"}}),w=new f({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h2"}}),$=new q({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmxlYWRlcmJvYXJkJTdDdHJ1dGhmdWxxYSUzQW1jJTdDMCUyMg==",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0"</span>`,wrap:!1}}),J=new f({props:{title:"Parallelism Options",local:"parallelism-options",headingTag:"h2"}}),b=new f({props:{title:"Tensor Parallelism",local:"tensor-parallelism",headingTag:"h3"}}),I=new q({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyQ3RwX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTIy",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16,tp_size=4"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0"</span>`,wrap:!1}}),C=new f({props:{title:"Data Parallelism",local:"data-parallelism",headingTag:"h3"}}),G=new q({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyQ2RwX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTIy",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16,dp_size=4"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0"</span>`,wrap:!1}}),k=new f({props:{title:"Using a Configuration File",local:"using-a-configuration-file",headingTag:"h2"}}),Z=new q({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIyZXhhbXBsZXMlMkZtb2RlbF9jb25maWdzJTJGc2dsYW5nX21vZGVsX2NvbmZpZy55YW1sJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTIy",highlighted:`lighteval sglang \\ | |
| <span class="hljs-string">"examples/model_configs/sglang_model_config.yaml"</span> \\ | |
| <span class="hljs-string">"leaderboard|truthfulqa:mc|0"</span>`,wrap:!1}}),S=new q({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0ElMjAlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEuN0ItSW5zdHJ1Y3QlMjIlMEElMjAlMjAlMjAlMjBkdHlwZSUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjB0cF9zaXplJTNBJTIwMSUwQSUyMCUyMCUyMCUyMGRwX3NpemUlM0ElMjAxJTBBJTIwJTIwJTIwJTIwY29udGV4dF9sZW5ndGglM0ElMjBudWxsJTBBJTIwJTIwJTIwJTIwcmFuZG9tX3NlZWQlM0ElMjAxJTBBJTIwJTIwJTIwJTIwdHJ1c3RfcmVtb3RlX2NvZGUlM0ElMjBGYWxzZSUwQSUyMCUyMCUyMCUyMGRldmljZSUzQSUyMCUyMmN1ZGElMjIlMEElMjAlMjAlMjAlMjBza2lwX3Rva2VuaXplcl9pbml0JTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjBrdl9jYWNoZV9kdHlwZSUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjBhZGRfc3BlY2lhbF90b2tlbnMlM0ElMjBUcnVlJTBBJTIwJTIwJTIwJTIwcGFpcndpc2VfdG9rZW5pemF0aW9uJTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjBzYW1wbGluZ19iYWNrZW5kJTNBJTIwbnVsbCUwQSUyMCUyMCUyMCUyMGF0dGVudGlvbl9iYWNrZW5kJTNBJTIwbnVsbCUwQSUyMCUyMCUyMCUyMG1lbV9mcmFjdGlvbl9zdGF0aWMlM0ElMjAwLjglMEElMjAlMjAlMjAlMjBjaHVua2VkX3ByZWZpbGxfc2l6ZSUzQSUyMDQwOTYlMEElMjAlMjAlMjAlMjBnZW5lcmF0aW9uX3BhcmFtZXRlcnMlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzQSUyMDEwMjQlMEElMjAlMjAlMjAlMjAlMjAlMjBtaW5fbmV3X3Rva2VucyUzQSUyMDAlMEElMjAlMjAlMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHRvcF9rJTNBJTIwNTAlMEElMjAlMjAlMjAlMjAlMjAlMjBtaW5fcCUzQSUyMDAuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHRvcF9wJTNBJTIwMS4wJTBBJTIwJTIwJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzQSUyMDAuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHJlcGV0aXRpb25fcGVuYWx0eSUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMGZyZXF1ZW5jeV9wZW5hbHR5JTNBJTIwMC4w",highlighted:`<span class="hljs-attr">model_parameters:</span> | |
| <span class="hljs-attr">model_name:</span> <span class="hljs-string">"HuggingFaceTB/SmolLM-1.7B-Instruct"</span> | |
| <span class="hljs-attr">dtype:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-attr">tp_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">dp_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">context_length:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">random_seed:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">trust_remote_code:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">device:</span> <span class="hljs-string">"cuda"</span> | |
| <span class="hljs-attr">skip_tokenizer_init:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">kv_cache_dtype:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-attr">add_special_tokens:</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-attr">pairwise_tokenization:</span> <span class="hljs-literal">False</span> | |
| <span class="hljs-attr">sampling_backend:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">attention_backend:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">mem_fraction_static:</span> <span class="hljs-number">0.8</span> | |
| <span class="hljs-attr">chunked_prefill_size:</span> <span class="hljs-number">4096</span> | |
| <span class="hljs-attr">generation_parameters:</span> | |
| <span class="hljs-attr">max_new_tokens:</span> <span class="hljs-number">1024</span> | |
| <span class="hljs-attr">min_new_tokens:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-attr">temperature:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">top_k:</span> <span class="hljs-number">50</span> | |
| <span class="hljs-attr">min_p:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">top_p:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">presence_penalty:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">repetition_penalty:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">frequency_penalty:</span> <span class="hljs-number">0.0</span>`,wrap:!1}}),A=new f({props:{title:"Key SGLang Parameters",local:"key-sglang-parameters",headingTag:"h2"}}),W=new f({props:{title:"Memory Management",local:"memory-management",headingTag:"h3"}}),H=new f({props:{title:"Parallelism Settings",local:"parallelism-settings",headingTag:"h3"}}),z=new f({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h3"}}),V=new f({props:{title:"Generation Parameters",local:"generation-parameters",headingTag:"h3"}}),F=new Ye({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-sglang-as-backend.mdx"}}),{c(){g=M("meta"),X=s(),L=M("p"),P=s(),i(y.$$.fragment),R=s(),j=M("p"),j.innerHTML=Ie,D=s(),i(w.$$.fragment),O=s(),i($.$$.fragment),K=s(),i(J.$$.fragment),ee=s(),U=M("p"),U.innerHTML=Ce,le=s(),i(b.$$.fragment),te=s(),_=M("p"),_.innerHTML=Be,ae=s(),i(I.$$.fragment),se=s(),i(C.$$.fragment),ne=s(),B=M("p"),B.innerHTML=Ge,ie=s(),i(G.$$.fragment),pe=s(),i(k.$$.fragment),re=s(),v=M("p"),v.innerHTML=ke,me=s(),i(Z.$$.fragment),oe=s(),T=M("blockquote"),T.innerHTML=ve,ce=s(),i(S.$$.fragment),Me=s(),h=M("blockquote"),h.innerHTML=Ze,ue=s(),i(A.$$.fragment),de=s(),i(W.$$.fragment),fe=s(),E=M("ul"),E.innerHTML=Se,ge=s(),i(H.$$.fragment),Te=s(),x=M("ul"),x.innerHTML=Ae,he=s(),i(z.$$.fragment),ye=s(),N=M("ul"),N.innerHTML=We,je=s(),i(V.$$.fragment),we=s(),Q=M("ul"),Q.innerHTML=Ee,$e=s(),i(F.$$.fragment),Je=s(),Y=M("p"),this.h()},l(e){const l=Fe("svelte-u9bgzb",document.head);g=u(l,"META",{name:!0,content:!0}),l.forEach(t),X=n(e),L=u(e,"P",{}),He(L).forEach(t),P=n(e),p(y.$$.fragment,e),R=n(e),j=u(e,"P",{"data-svelte-h":!0}),d(j)!=="svelte-1bw11uc"&&(j.innerHTML=Ie),D=n(e),p(w.$$.fragment,e),O=n(e),p($.$$.fragment,e),K=n(e),p(J.$$.fragment,e),ee=n(e),U=u(e,"P",{"data-svelte-h":!0}),d(U)!=="svelte-10zaw8k"&&(U.innerHTML=Ce),le=n(e),p(b.$$.fragment,e),te=n(e),_=u(e,"P",{"data-svelte-h":!0}),d(_)!=="svelte-odna94"&&(_.innerHTML=Be),ae=n(e),p(I.$$.fragment,e),se=n(e),p(C.$$.fragment,e),ne=n(e),B=u(e,"P",{"data-svelte-h":!0}),d(B)!=="svelte-15m6api"&&(B.innerHTML=Ge),ie=n(e),p(G.$$.fragment,e),pe=n(e),p(k.$$.fragment,e),re=n(e),v=u(e,"P",{"data-svelte-h":!0}),d(v)!=="svelte-560e5r"&&(v.innerHTML=ke),me=n(e),p(Z.$$.fragment,e),oe=n(e),T=u(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),d(T)!=="svelte-b8ev60"&&(T.innerHTML=ve),ce=n(e),p(S.$$.fragment,e),Me=n(e),h=u(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),d(h)!=="svelte-loo1zp"&&(h.innerHTML=Ze),ue=n(e),p(A.$$.fragment,e),de=n(e),p(W.$$.fragment,e),fe=n(e),E=u(e,"UL",{"data-svelte-h":!0}),d(E)!=="svelte-h1z7e"&&(E.innerHTML=Se),ge=n(e),p(H.$$.fragment,e),Te=n(e),x=u(e,"UL",{"data-svelte-h":!0}),d(x)!=="svelte-1k89zlt"&&(x.innerHTML=Ae),he=n(e),p(z.$$.fragment,e),ye=n(e),N=u(e,"UL",{"data-svelte-h":!0}),d(N)!=="svelte-19ebxti"&&(N.innerHTML=We),je=n(e),p(V.$$.fragment,e),we=n(e),Q=u(e,"UL",{"data-svelte-h":!0}),d(Q)!=="svelte-1s61mt7"&&(Q.innerHTML=Ee),$e=n(e),p(F.$$.fragment,e),Je=n(e),Y=u(e,"P",{}),He(Y).forEach(t),this.h()},h(){be(g,"name","hf:doc:metadata"),be(g,"content",Xe),be(T,"class","tip"),be(h,"class","warning")},m(e,l){Le(document.head,g),a(e,X,l),a(e,L,l),a(e,P,l),r(y,e,l),a(e,R,l),a(e,j,l),a(e,D,l),r(w,e,l),a(e,O,l),r($,e,l),a(e,K,l),r(J,e,l),a(e,ee,l),a(e,U,l),a(e,le,l),r(b,e,l),a(e,te,l),a(e,_,l),a(e,ae,l),r(I,e,l),a(e,se,l),r(C,e,l),a(e,ne,l),a(e,B,l),a(e,ie,l),r(G,e,l),a(e,pe,l),r(k,e,l),a(e,re,l),a(e,v,l),a(e,me,l),r(Z,e,l),a(e,oe,l),a(e,T,l),a(e,ce,l),r(S,e,l),a(e,Me,l),a(e,h,l),a(e,ue,l),r(A,e,l),a(e,de,l),r(W,e,l),a(e,fe,l),a(e,E,l),a(e,ge,l),r(H,e,l),a(e,Te,l),a(e,x,l),a(e,he,l),r(z,e,l),a(e,ye,l),a(e,N,l),a(e,je,l),r(V,e,l),a(e,we,l),a(e,Q,l),a(e,$e,l),r(F,e,l),a(e,Je,l),a(e,Y,l),Ue=!0},p:ze,i(e){Ue||(m(y.$$.fragment,e),m(w.$$.fragment,e),m($.$$.fragment,e),m(J.$$.fragment,e),m(b.$$.fragment,e),m(I.$$.fragment,e),m(C.$$.fragment,e),m(G.$$.fragment,e),m(k.$$.fragment,e),m(Z.$$.fragment,e),m(S.$$.fragment,e),m(A.$$.fragment,e),m(W.$$.fragment,e),m(H.$$.fragment,e),m(z.$$.fragment,e),m(V.$$.fragment,e),m(F.$$.fragment,e),Ue=!0)},o(e){o(y.$$.fragment,e),o(w.$$.fragment,e),o($.$$.fragment,e),o(J.$$.fragment,e),o(b.$$.fragment,e),o(I.$$.fragment,e),o(C.$$.fragment,e),o(G.$$.fragment,e),o(k.$$.fragment,e),o(Z.$$.fragment,e),o(S.$$.fragment,e),o(A.$$.fragment,e),o(W.$$.fragment,e),o(H.$$.fragment,e),o(z.$$.fragment,e),o(V.$$.fragment,e),o(F.$$.fragment,e),Ue=!1},d(e){e&&(t(X),t(L),t(P),t(R),t(j),t(D),t(O),t(K),t(ee),t(U),t(le),t(te),t(_),t(ae),t(se),t(ne),t(B),t(ie),t(pe),t(re),t(v),t(me),t(oe),t(T),t(ce),t(Me),t(h),t(ue),t(de),t(fe),t(E),t(ge),t(Te),t(x),t(he),t(ye),t(N),t(je),t(we),t(Q),t($e),t(Je),t(Y)),t(g),c(y,e),c(w,e),c($,e),c(J,e),c(b,e),c(I,e),c(C,e),c(G,e),c(k,e),c(Z,e),c(S,e),c(A,e),c(W,e),c(H,e),c(z,e),c(V,e),c(F,e)}}}const Xe='{"title":"Using SGLang as Backend","local":"using-sglang-as-backend","sections":[{"title":"Basic Usage","local":"basic-usage","sections":[],"depth":2},{"title":"Parallelism Options","local":"parallelism-options","sections":[{"title":"Tensor Parallelism","local":"tensor-parallelism","sections":[],"depth":3},{"title":"Data Parallelism","local":"data-parallelism","sections":[],"depth":3}],"depth":2},{"title":"Using a Configuration File","local":"using-a-configuration-file","sections":[],"depth":2},{"title":"Key SGLang Parameters","local":"key-sglang-parameters","sections":[{"title":"Memory Management","local":"memory-management","sections":[],"depth":3},{"title":"Parallelism Settings","local":"parallelism-settings","sections":[],"depth":3},{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":3},{"title":"Generation Parameters","local":"generation-parameters","sections":[],"depth":3}],"depth":2}],"depth":1}';function Pe(_e){return Ne(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class el extends Ve{constructor(g){super(),Qe(this,g,Pe,qe,xe,{})}}export{el as component}; | |
Xet Storage Details
- Size:
- 15.5 kB
- Xet hash:
- afef3274214bd1f0db361294eec257a7bfaa8ea60314ed82571ee05be73a1605
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.