Buckets:

rtrm's picture
download
raw
15.4 kB
import{s as Le,n as Qe,o as Fe}from"../chunks/scheduler.3a17fb72.js";import{S as Ve,i as Ye,e as M,s,c as i,h as Ne,a as u,d as t,b as n,f as He,g as p,j as f,k as Ie,l as Xe,m as a,n as m,t as r,o,p as c}from"../chunks/index.093f8863.js";import{C as Re,H as d,E as Pe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.40db53ca.js";import{C as R}from"../chunks/CodeBlock.627906db.js";function qe(Ce){let g,P,N,q,h,D,y,O,w,Be=`Lighteval allows you to use SGLang as a backend, providing significant speedups for model evaluation.
To use SGLang, simply change the <code>model_args</code> to reflect the arguments you want to pass to SGLang.`,K,j,ee,U,le,J,te,_,ke=`SGLang can distribute the model across multiple GPUs using data parallelism and tensor parallelism.
You can choose the parallelism method by setting the appropriate parameters in the <code>model_args</code>.`,ae,b,se,I,Se="For example, if you have 4 GPUs, you can split the model across them using tensor parallelism with <code>tp_size</code>:",ne,C,ie,B,pe,k,ve="If your model fits on a single GPU, you can use data parallelism with <code>dp_size</code> to speed up the evaluation:",me,S,re,v,oe,G,Ge=`For more advanced configurations, you can use a YAML configuration file for the model.
An example configuration file is shown below and can be found at <code>examples/model_configs/sglang_model_config.yaml</code>.`,ce,x,Me,T,xe='<p>Documentation for SGLang server arguments can be found <a href="https://docs.sglang.ai/backend/server_arguments.html" rel="nofollow">here</a></p>',ue,A,fe,$,Ae=`<p>In case of out-of-memory (OOM) issues, you might need to reduce the context size of the
model as well as reduce the <code>mem_fraction_static</code> and <code>chunked_prefill_size</code> parameters.</p>`,de,Z,ge,W,Te,E,Ze="<li><code>mem_fraction_static</code>: Fraction of GPU memory to allocate for static tensors (default: 0.8)</li> <li><code>chunked_prefill_size</code>: Size of chunks for prefill operations (default: 4096)</li> <li><code>context_length</code>: Maximum context length for the model</li> <li><code>kv_cache_dtype</code>: Data type for key-value cache</li>",$e,z,he,H,We="<li><code>tp_size</code>: Number of GPUs for tensor parallelism</li> <li><code>dp_size</code>: Number of GPUs for data parallelism</li>",ye,L,we,Q,Ee="<li><code>dtype</code>: Data type for model weights (“auto”, “float16”, “bfloat16”, etc.)</li> <li><code>device</code>: Device to run the model on (“cuda”, “cpu”)</li> <li><code>trust_remote_code</code>: Whether to trust remote code from the model</li> <li><code>skip_tokenizer_init</code>: Skip tokenizer initialization for faster startup</li>",je,F,Ue,V,ze="<li><code>temperature</code>: Controls randomness in generation (0.0 = deterministic, 1.0 = random)</li> <li><code>top_p</code>: Nucleus sampling parameter</li> <li><code>top_k</code>: Top-k sampling parameter</li> <li><code>max_new_tokens</code>: Maximum number of tokens to generate</li> <li><code>repetition_penalty</code>: Penalty for repeating tokens</li> <li><code>presence_penalty</code>: Penalty for token presence</li> <li><code>frequency_penalty</code>: Penalty for token frequency</li>",Je,Y,_e,X,be;return h=new Re({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new d({props:{title:"Using SGLang as Backend",local:"using-sglang-as-backend",headingTag:"h1"}}),j=new d({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h2"}}),U=new R({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMHRydXRoZnVscWElM0FtYw==",highlighted:`lighteval sglang \\
<span class="hljs-string">&quot;model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16&quot;</span> \\
truthfulqa:mc`,wrap:!1}}),J=new d({props:{title:"Parallelism Options",local:"parallelism-options",headingTag:"h2"}}),b=new d({props:{title:"Tensor Parallelism",local:"tensor-parallelism",headingTag:"h3"}}),C=new R({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyQ3RwX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwdHJ1dGhmdWxxYSUzQW1j",highlighted:`lighteval sglang \\
<span class="hljs-string">&quot;model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16,tp_size=4&quot;</span> \\
truthfulqa:mc`,wrap:!1}}),B=new d({props:{title:"Data Parallelism",local:"data-parallelism",headingTag:"h3"}}),S=new R({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWxfbmFtZSUzREh1Z2dpbmdGYWNlSDQlMkZ6ZXBoeXItN2ItYmV0YSUyQ2R0eXBlJTNEZmxvYXQxNiUyQ2RwX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwdHJ1dGhmdWxxYSUzQW1j",highlighted:`lighteval sglang \\
<span class="hljs-string">&quot;model_name=HuggingFaceH4/zephyr-7b-beta,dtype=float16,dp_size=4&quot;</span> \\
truthfulqa:mc`,wrap:!1}}),v=new d({props:{title:"Using a Configuration File",local:"using-a-configuration-file",headingTag:"h2"}}),x=new R({props:{code:"bGlnaHRldmFsJTIwc2dsYW5nJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIyZXhhbXBsZXMlMkZtb2RlbF9jb25maWdzJTJGc2dsYW5nX21vZGVsX2NvbmZpZy55YW1sJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwdHJ1dGhmdWxxYSUzQW1j",highlighted:`lighteval sglang \\
<span class="hljs-string">&quot;examples/model_configs/sglang_model_config.yaml&quot;</span> \\
truthfulqa:mc`,wrap:!1}}),A=new R({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0ElMjAlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEuN0ItSW5zdHJ1Y3QlMjIlMEElMjAlMjAlMjAlMjBkdHlwZSUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjB0cF9zaXplJTNBJTIwMSUwQSUyMCUyMCUyMCUyMGRwX3NpemUlM0ElMjAxJTBBJTIwJTIwJTIwJTIwY29udGV4dF9sZW5ndGglM0ElMjBudWxsJTBBJTIwJTIwJTIwJTIwcmFuZG9tX3NlZWQlM0ElMjAxJTBBJTIwJTIwJTIwJTIwdHJ1c3RfcmVtb3RlX2NvZGUlM0ElMjBGYWxzZSUwQSUyMCUyMCUyMCUyMGRldmljZSUzQSUyMCUyMmN1ZGElMjIlMEElMjAlMjAlMjAlMjBza2lwX3Rva2VuaXplcl9pbml0JTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjBrdl9jYWNoZV9kdHlwZSUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjBhZGRfc3BlY2lhbF90b2tlbnMlM0ElMjBUcnVlJTBBJTIwJTIwJTIwJTIwcGFpcndpc2VfdG9rZW5pemF0aW9uJTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjBzYW1wbGluZ19iYWNrZW5kJTNBJTIwbnVsbCUwQSUyMCUyMCUyMCUyMGF0dGVudGlvbl9iYWNrZW5kJTNBJTIwbnVsbCUwQSUyMCUyMCUyMCUyMG1lbV9mcmFjdGlvbl9zdGF0aWMlM0ElMjAwLjglMEElMjAlMjAlMjAlMjBjaHVua2VkX3ByZWZpbGxfc2l6ZSUzQSUyMDQwOTYlMEElMjAlMjAlMjAlMjBnZW5lcmF0aW9uX3BhcmFtZXRlcnMlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzQSUyMDEwMjQlMEElMjAlMjAlMjAlMjAlMjAlMjBtaW5fbmV3X3Rva2VucyUzQSUyMDAlMEElMjAlMjAlMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHRvcF9rJTNBJTIwNTAlMEElMjAlMjAlMjAlMjAlMjAlMjBtaW5fcCUzQSUyMDAuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHRvcF9wJTNBJTIwMS4wJTBBJTIwJTIwJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzQSUyMDAuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHJlcGV0aXRpb25fcGVuYWx0eSUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMGZyZXF1ZW5jeV9wZW5hbHR5JTNBJTIwMC4w",highlighted:`<span class="hljs-attr">model_parameters:</span>
<span class="hljs-attr">model_name:</span> <span class="hljs-string">&quot;HuggingFaceTB/SmolLM-1.7B-Instruct&quot;</span>
<span class="hljs-attr">dtype:</span> <span class="hljs-string">&quot;auto&quot;</span>
<span class="hljs-attr">tp_size:</span> <span class="hljs-number">1</span>
<span class="hljs-attr">dp_size:</span> <span class="hljs-number">1</span>
<span class="hljs-attr">context_length:</span> <span class="hljs-literal">null</span>
<span class="hljs-attr">random_seed:</span> <span class="hljs-number">1</span>
<span class="hljs-attr">trust_remote_code:</span> <span class="hljs-literal">False</span>
<span class="hljs-attr">device:</span> <span class="hljs-string">&quot;cuda&quot;</span>
<span class="hljs-attr">skip_tokenizer_init:</span> <span class="hljs-literal">False</span>
<span class="hljs-attr">kv_cache_dtype:</span> <span class="hljs-string">&quot;auto&quot;</span>
<span class="hljs-attr">add_special_tokens:</span> <span class="hljs-literal">True</span>
<span class="hljs-attr">pairwise_tokenization:</span> <span class="hljs-literal">False</span>
<span class="hljs-attr">sampling_backend:</span> <span class="hljs-literal">null</span>
<span class="hljs-attr">attention_backend:</span> <span class="hljs-literal">null</span>
<span class="hljs-attr">mem_fraction_static:</span> <span class="hljs-number">0.8</span>
<span class="hljs-attr">chunked_prefill_size:</span> <span class="hljs-number">4096</span>
<span class="hljs-attr">generation_parameters:</span>
<span class="hljs-attr">max_new_tokens:</span> <span class="hljs-number">1024</span>
<span class="hljs-attr">min_new_tokens:</span> <span class="hljs-number">0</span>
<span class="hljs-attr">temperature:</span> <span class="hljs-number">1.0</span>
<span class="hljs-attr">top_k:</span> <span class="hljs-number">50</span>
<span class="hljs-attr">min_p:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">top_p:</span> <span class="hljs-number">1.0</span>
<span class="hljs-attr">presence_penalty:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">repetition_penalty:</span> <span class="hljs-number">1.0</span>
<span class="hljs-attr">frequency_penalty:</span> <span class="hljs-number">0.0</span>`,wrap:!1}}),Z=new d({props:{title:"Key SGLang Parameters",local:"key-sglang-parameters",headingTag:"h2"}}),W=new d({props:{title:"Memory Management",local:"memory-management",headingTag:"h3"}}),z=new d({props:{title:"Parallelism Settings",local:"parallelism-settings",headingTag:"h3"}}),L=new d({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h3"}}),F=new d({props:{title:"Generation Parameters",local:"generation-parameters",headingTag:"h3"}}),Y=new Pe({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-sglang-as-backend.mdx"}}),{c(){g=M("meta"),P=s(),N=M("p"),q=s(),i(h.$$.fragment),D=s(),i(y.$$.fragment),O=s(),w=M("p"),w.innerHTML=Be,K=s(),i(j.$$.fragment),ee=s(),i(U.$$.fragment),le=s(),i(J.$$.fragment),te=s(),_=M("p"),_.innerHTML=ke,ae=s(),i(b.$$.fragment),se=s(),I=M("p"),I.innerHTML=Se,ne=s(),i(C.$$.fragment),ie=s(),i(B.$$.fragment),pe=s(),k=M("p"),k.innerHTML=ve,me=s(),i(S.$$.fragment),re=s(),i(v.$$.fragment),oe=s(),G=M("p"),G.innerHTML=Ge,ce=s(),i(x.$$.fragment),Me=s(),T=M("blockquote"),T.innerHTML=xe,ue=s(),i(A.$$.fragment),fe=s(),$=M("blockquote"),$.innerHTML=Ae,de=s(),i(Z.$$.fragment),ge=s(),i(W.$$.fragment),Te=s(),E=M("ul"),E.innerHTML=Ze,$e=s(),i(z.$$.fragment),he=s(),H=M("ul"),H.innerHTML=We,ye=s(),i(L.$$.fragment),we=s(),Q=M("ul"),Q.innerHTML=Ee,je=s(),i(F.$$.fragment),Ue=s(),V=M("ul"),V.innerHTML=ze,Je=s(),i(Y.$$.fragment),_e=s(),X=M("p"),this.h()},l(e){const l=Ne("svelte-u9bgzb",document.head);g=u(l,"META",{name:!0,content:!0}),l.forEach(t),P=n(e),N=u(e,"P",{}),He(N).forEach(t),q=n(e),p(h.$$.fragment,e),D=n(e),p(y.$$.fragment,e),O=n(e),w=u(e,"P",{"data-svelte-h":!0}),f(w)!=="svelte-1bw11uc"&&(w.innerHTML=Be),K=n(e),p(j.$$.fragment,e),ee=n(e),p(U.$$.fragment,e),le=n(e),p(J.$$.fragment,e),te=n(e),_=u(e,"P",{"data-svelte-h":!0}),f(_)!=="svelte-10zaw8k"&&(_.innerHTML=ke),ae=n(e),p(b.$$.fragment,e),se=n(e),I=u(e,"P",{"data-svelte-h":!0}),f(I)!=="svelte-odna94"&&(I.innerHTML=Se),ne=n(e),p(C.$$.fragment,e),ie=n(e),p(B.$$.fragment,e),pe=n(e),k=u(e,"P",{"data-svelte-h":!0}),f(k)!=="svelte-15m6api"&&(k.innerHTML=ve),me=n(e),p(S.$$.fragment,e),re=n(e),p(v.$$.fragment,e),oe=n(e),G=u(e,"P",{"data-svelte-h":!0}),f(G)!=="svelte-560e5r"&&(G.innerHTML=Ge),ce=n(e),p(x.$$.fragment,e),Me=n(e),T=u(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),f(T)!=="svelte-b8ev60"&&(T.innerHTML=xe),ue=n(e),p(A.$$.fragment,e),fe=n(e),$=u(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),f($)!=="svelte-loo1zp"&&($.innerHTML=Ae),de=n(e),p(Z.$$.fragment,e),ge=n(e),p(W.$$.fragment,e),Te=n(e),E=u(e,"UL",{"data-svelte-h":!0}),f(E)!=="svelte-h1z7e"&&(E.innerHTML=Ze),$e=n(e),p(z.$$.fragment,e),he=n(e),H=u(e,"UL",{"data-svelte-h":!0}),f(H)!=="svelte-1k89zlt"&&(H.innerHTML=We),ye=n(e),p(L.$$.fragment,e),we=n(e),Q=u(e,"UL",{"data-svelte-h":!0}),f(Q)!=="svelte-19ebxti"&&(Q.innerHTML=Ee),je=n(e),p(F.$$.fragment,e),Ue=n(e),V=u(e,"UL",{"data-svelte-h":!0}),f(V)!=="svelte-1s61mt7"&&(V.innerHTML=ze),Je=n(e),p(Y.$$.fragment,e),_e=n(e),X=u(e,"P",{}),He(X).forEach(t),this.h()},h(){Ie(g,"name","hf:doc:metadata"),Ie(g,"content",De),Ie(T,"class","tip"),Ie($,"class","warning")},m(e,l){Xe(document.head,g),a(e,P,l),a(e,N,l),a(e,q,l),m(h,e,l),a(e,D,l),m(y,e,l),a(e,O,l),a(e,w,l),a(e,K,l),m(j,e,l),a(e,ee,l),m(U,e,l),a(e,le,l),m(J,e,l),a(e,te,l),a(e,_,l),a(e,ae,l),m(b,e,l),a(e,se,l),a(e,I,l),a(e,ne,l),m(C,e,l),a(e,ie,l),m(B,e,l),a(e,pe,l),a(e,k,l),a(e,me,l),m(S,e,l),a(e,re,l),m(v,e,l),a(e,oe,l),a(e,G,l),a(e,ce,l),m(x,e,l),a(e,Me,l),a(e,T,l),a(e,ue,l),m(A,e,l),a(e,fe,l),a(e,$,l),a(e,de,l),m(Z,e,l),a(e,ge,l),m(W,e,l),a(e,Te,l),a(e,E,l),a(e,$e,l),m(z,e,l),a(e,he,l),a(e,H,l),a(e,ye,l),m(L,e,l),a(e,we,l),a(e,Q,l),a(e,je,l),m(F,e,l),a(e,Ue,l),a(e,V,l),a(e,Je,l),m(Y,e,l),a(e,_e,l),a(e,X,l),be=!0},p:Qe,i(e){be||(r(h.$$.fragment,e),r(y.$$.fragment,e),r(j.$$.fragment,e),r(U.$$.fragment,e),r(J.$$.fragment,e),r(b.$$.fragment,e),r(C.$$.fragment,e),r(B.$$.fragment,e),r(S.$$.fragment,e),r(v.$$.fragment,e),r(x.$$.fragment,e),r(A.$$.fragment,e),r(Z.$$.fragment,e),r(W.$$.fragment,e),r(z.$$.fragment,e),r(L.$$.fragment,e),r(F.$$.fragment,e),r(Y.$$.fragment,e),be=!0)},o(e){o(h.$$.fragment,e),o(y.$$.fragment,e),o(j.$$.fragment,e),o(U.$$.fragment,e),o(J.$$.fragment,e),o(b.$$.fragment,e),o(C.$$.fragment,e),o(B.$$.fragment,e),o(S.$$.fragment,e),o(v.$$.fragment,e),o(x.$$.fragment,e),o(A.$$.fragment,e),o(Z.$$.fragment,e),o(W.$$.fragment,e),o(z.$$.fragment,e),o(L.$$.fragment,e),o(F.$$.fragment,e),o(Y.$$.fragment,e),be=!1},d(e){e&&(t(P),t(N),t(q),t(D),t(O),t(w),t(K),t(ee),t(le),t(te),t(_),t(ae),t(se),t(I),t(ne),t(ie),t(pe),t(k),t(me),t(re),t(oe),t(G),t(ce),t(Me),t(T),t(ue),t(fe),t($),t(de),t(ge),t(Te),t(E),t($e),t(he),t(H),t(ye),t(we),t(Q),t(je),t(Ue),t(V),t(Je),t(_e),t(X)),t(g),c(h,e),c(y,e),c(j,e),c(U,e),c(J,e),c(b,e),c(C,e),c(B,e),c(S,e),c(v,e),c(x,e),c(A,e),c(Z,e),c(W,e),c(z,e),c(L,e),c(F,e),c(Y,e)}}}const De='{"title":"Using SGLang as Backend","local":"using-sglang-as-backend","sections":[{"title":"Basic Usage","local":"basic-usage","sections":[],"depth":2},{"title":"Parallelism Options","local":"parallelism-options","sections":[{"title":"Tensor Parallelism","local":"tensor-parallelism","sections":[],"depth":3},{"title":"Data Parallelism","local":"data-parallelism","sections":[],"depth":3}],"depth":2},{"title":"Using a Configuration File","local":"using-a-configuration-file","sections":[],"depth":2},{"title":"Key SGLang Parameters","local":"key-sglang-parameters","sections":[{"title":"Memory Management","local":"memory-management","sections":[],"depth":3},{"title":"Parallelism Settings","local":"parallelism-settings","sections":[],"depth":3},{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":3},{"title":"Generation Parameters","local":"generation-parameters","sections":[],"depth":3}],"depth":2}],"depth":1}';function Oe(Ce){return Fe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class al extends Ve{constructor(g){super(),Ye(this,g,Oe,qe,Le,{})}}export{al as component};

Xet Storage Details

Size:
15.4 kB
·
Xet hash:
6b40f778ef580b7849efce1fe195a2e766f2eca3df52d538600caa750a3ae1ff

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.