Buckets:
| import{s as Xe,n as Re,o as Qe}from"../chunks/scheduler.3a17fb72.js";import{S as Pe,i as Fe,e as u,s as a,c as i,h as Ne,a as f,d as s,b as n,f as ze,g as p,j as U,k as Le,l as Ye,m as t,n as m,t as r,o,p as M}from"../chunks/index.093f8863.js";import{C as qe,H as c,E as Oe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.fd4683b8.js";import{C as Y}from"../chunks/CodeBlock.c9649b11.js";function De(ve){let y,q,F,O,$,D,T,K,h,Ve=`Lighteval allows you to use VLLM as a backend, providing significant speedups for model evaluation. | |
| To use VLLM, simply change the <code>model_args</code> to reflect the arguments you want to pass to VLLM.`,ee,g,xe='<p>Documentation for VLLM engine arguments can be found <a href="https://docs.vllm.ai/en/latest/serving/engine_args.html" rel="nofollow">here</a></p>',le,w,se,j,te,J,ae,b,Ge=`VLLM can distribute the model across multiple GPUs using data parallelism, pipeline parallelism, or tensor parallelism. | |
| You can choose the parallelism method by setting the appropriate parameters in the <code>model_args</code>.`,ne,C,ie,_,We="For example, if you have 4 GPUs, you can split the model across them using tensor parallelism:",pe,I,me,L,re,v,Be="If your model fits on a single GPU, you can use data parallelism to speed up the evaluation:",oe,V,Me,x,ue,G,Ee=`For more advanced configurations, you can use a YAML configuration file for the model. | |
| An example configuration file is shown below and can be found at <code>examples/model_configs/vllm_model_config.yaml</code>.`,fe,W,ce,B,Ue,d,Ae=`<p>In case of out-of-memory (OOM) issues, you might need to reduce the context size of the | |
| model as well as reduce the <code>gpu_memory_utilization</code> parameter.</p>`,ye,E,ge,A,de,H,He="<li><code>gpu_memory_utilization</code>: Controls how much GPU memory VLLM can use (default: 0.9)</li> <li><code>max_model_length</code>: Maximum sequence length for the model</li> <li><code>swap_space</code>: Amount of CPU memory to use for swapping (in GB)</li>",$e,Z,Te,k,Ze="<li><code>tensor_parallel_size</code>: Number of GPUs for tensor parallelism</li> <li><code>data_parallel_size</code>: Number of GPUs for data parallelism</li> <li><code>pipeline_parallel_size</code>: Number of GPUs for pipeline parallelism</li>",he,S,we,z,ke="<li><code>temperature</code>: Controls randomness in generation (0.0 = deterministic, 1.0 = random)</li> <li><code>top_p</code>: Nucleus sampling parameter</li> <li><code>top_k</code>: Top-k sampling parameter</li> <li><code>max_new_tokens</code>: Maximum number of tokens to generate</li> <li><code>repetition_penalty</code>: Penalty for repeating tokens</li>",je,X,Je,R,be,Q,Se="<li><strong>Out of Memory Errors</strong>: Reduce <code>gpu_memory_utilization</code> or <code>max_model_length</code></li> <li><strong>Worker Process Issues</strong>: Ensure <code>VLLM_WORKER_MULTIPROC_METHOD=spawn</code> is set for multi-GPU setups</li> <li><strong>Model Loading Errors</strong>: Check that the model name and revision are correct</li>",Ce,P,_e,N,Ie;return $=new qe({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),T=new c({props:{title:"Using VLLM as Backend",local:"using-vllm-as-backend",headingTag:"h1"}}),w=new c({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h2"}}),j=new Y({props:{code:"bGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMjIlMjAlNUMlMEElMjAlMjAlMjAlMjBpZmV2YWw=",highlighted:`lighteval vllm \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta"</span> \\ | |
| ifeval`,wrap:!1}}),J=new c({props:{title:"Parallelism Options",local:"parallelism-options",headingTag:"h2"}}),C=new c({props:{title:"Tensor Parallelism",local:"tensor-parallelism",headingTag:"h3"}}),I=new Y({props:{code:"ZXhwb3J0JTIwVkxMTV9XT1JLRVJfTVVMVElQUk9DX01FVEhPRCUzRHNwYXduJTIwJTI2JTI2JTIwbGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMkN0ZW5zb3JfcGFyYWxsZWxfc2l6ZSUzRDQlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjBpZmV2YWw=",highlighted:`<span class="hljs-built_in">export</span> VLLM_WORKER_MULTIPROC_METHOD=spawn && lighteval vllm \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,tensor_parallel_size=4"</span> \\ | |
| ifeval`,wrap:!1}}),L=new c({props:{title:"Data Parallelism",local:"data-parallelism",headingTag:"h3"}}),V=new Y({props:{code:"ZXhwb3J0JTIwVkxMTV9XT1JLRVJfTVVMVElQUk9DX01FVEhPRCUzRHNwYXduJTIwJTI2JTI2JTIwbGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMkNkYXRhX3BhcmFsbGVsX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwaWZldmFs",highlighted:`<span class="hljs-built_in">export</span> VLLM_WORKER_MULTIPROC_METHOD=spawn && lighteval vllm \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta,data_parallel_size=4"</span> \\ | |
| ifeval`,wrap:!1}}),x=new c({props:{title:"Using a Configuration File",local:"using-a-configuration-file",headingTag:"h2"}}),W=new Y({props:{code:"bGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmV4YW1wbGVzJTJGbW9kZWxfY29uZmlncyUyRnZsbG1fbW9kZWxfY29uZmlnLnlhbWwlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjBpZmV2YWw=",highlighted:`lighteval vllm \\ | |
| <span class="hljs-string">"examples/model_configs/vllm_model_config.yaml"</span> \\ | |
| ifeval`,wrap:!1}}),B=new Y({props:{code:"bW9kZWxfcGFyYW1ldGVycyUzQSUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0ElMjAlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEuN0ItSW5zdHJ1Y3QlMjIlMEElMjAlMjAlMjAlMjByZXZpc2lvbiUzQSUyMCUyMm1haW4lMjIlMEElMjAlMjAlMjAlMjBkdHlwZSUzQSUyMCUyMmJmbG9hdDE2JTIyJTBBJTIwJTIwJTIwJTIwdGVuc29yX3BhcmFsbGVsX3NpemUlM0ElMjAxJTBBJTIwJTIwJTIwJTIwZGF0YV9wYXJhbGxlbF9zaXplJTNBJTIwMSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFsbGVsX3NpemUlM0ElMjAxJTBBJTIwJTIwJTIwJTIwZ3B1X21lbW9yeV91dGlsaXphdGlvbiUzQSUyMDAuOSUwQSUyMCUyMCUyMCUyMG1heF9tb2RlbF9sZW5ndGglM0ElMjAyMDQ4JTBBJTIwJTIwJTIwJTIwc3dhcF9zcGFjZSUzQSUyMDQlMEElMjAlMjAlMjAlMjBzZWVkJTNBJTIwMSUwQSUyMCUyMCUyMCUyMHRydXN0X3JlbW90ZV9jb2RlJTNBJTIwVHJ1ZSUwQSUyMCUyMCUyMCUyMGFkZF9zcGVjaWFsX3Rva2VucyUzQSUyMFRydWUlMEElMjAlMjAlMjAlMjBtdWx0aWNob2ljZV9jb250aW51YXRpb25zX3N0YXJ0X3NwYWNlJTNBJTIwVHJ1ZSUwQSUyMCUyMCUyMCUyMHBhaXJ3aXNlX3Rva2VuaXphdGlvbiUzQSUyMFRydWUlMEElMjAlMjAlMjAlMjBzdWJmb2xkZXIlM0ElMjBudWxsJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdGlvbl9wYXJhbWV0ZXJzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzQSUyMDAuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHJlcGV0aXRpb25fcGVuYWx0eSUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMGZyZXF1ZW5jeV9wZW5hbHR5JTNBJTIwMC4wJTBBJTIwJTIwJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0ElMjAxLjAlMEElMjAlMjAlMjAlMjAlMjAlMjB0b3BfayUzQSUyMDUwJTBBJTIwJTIwJTIwJTIwJTIwJTIwbWluX3AlM0ElMjAwLjAlMEElMjAlMjAlMjAlMjAlMjAlMjB0b3BfcCUzQSUyMDEuMCUwQSUyMCUyMCUyMCUyMCUyMCUyMHNlZWQlM0ElMjA0MiUwQSUyMCUyMCUyMCUyMCUyMCUyMHN0b3BfdG9rZW5zJTNBJTIwbnVsbCUwQSUyMCUyMCUyMCUyMCUyMCUyMG1heF9uZXdfdG9rZW5zJTNBJTIwMTAyNCUwQSUyMCUyMCUyMCUyMCUyMCUyMG1pbl9uZXdfdG9rZW5zJTNBJTIwMA==",highlighted:`<span class="hljs-attr">model_parameters:</span> | |
| <span class="hljs-attr">model_name:</span> <span class="hljs-string">"HuggingFaceTB/SmolLM-1.7B-Instruct"</span> | |
| <span class="hljs-attr">revision:</span> <span class="hljs-string">"main"</span> | |
| <span class="hljs-attr">dtype:</span> <span class="hljs-string">"bfloat16"</span> | |
| <span class="hljs-attr">tensor_parallel_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">data_parallel_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">pipeline_parallel_size:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">gpu_memory_utilization:</span> <span class="hljs-number">0.9</span> | |
| <span class="hljs-attr">max_model_length:</span> <span class="hljs-number">2048</span> | |
| <span class="hljs-attr">swap_space:</span> <span class="hljs-number">4</span> | |
| <span class="hljs-attr">seed:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">trust_remote_code:</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-attr">add_special_tokens:</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-attr">multichoice_continuations_start_space:</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-attr">pairwise_tokenization:</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-attr">subfolder:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">generation_parameters:</span> | |
| <span class="hljs-attr">presence_penalty:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">repetition_penalty:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">frequency_penalty:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">temperature:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">top_k:</span> <span class="hljs-number">50</span> | |
| <span class="hljs-attr">min_p:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-attr">top_p:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">seed:</span> <span class="hljs-number">42</span> | |
| <span class="hljs-attr">stop_tokens:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">max_new_tokens:</span> <span class="hljs-number">1024</span> | |
| <span class="hljs-attr">min_new_tokens:</span> <span class="hljs-number">0</span>`,wrap:!1}}),E=new c({props:{title:"Key VLLM Parameters",local:"key-vllm-parameters",headingTag:"h2"}}),A=new c({props:{title:"Memory Management",local:"memory-management",headingTag:"h3"}}),Z=new c({props:{title:"Parallelism Settings",local:"parallelism-settings",headingTag:"h3"}}),S=new c({props:{title:"Generation Parameters",local:"generation-parameters",headingTag:"h3"}}),X=new c({props:{title:"Troubleshooting",local:"troubleshooting",headingTag:"h2"}}),R=new c({props:{title:"Common Issues",local:"common-issues",headingTag:"h3"}}),P=new Oe({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-vllm-as-backend.mdx"}}),{c(){y=u("meta"),q=a(),F=u("p"),O=a(),i($.$$.fragment),D=a(),i(T.$$.fragment),K=a(),h=u("p"),h.innerHTML=Ve,ee=a(),g=u("blockquote"),g.innerHTML=xe,le=a(),i(w.$$.fragment),se=a(),i(j.$$.fragment),te=a(),i(J.$$.fragment),ae=a(),b=u("p"),b.innerHTML=Ge,ne=a(),i(C.$$.fragment),ie=a(),_=u("p"),_.textContent=We,pe=a(),i(I.$$.fragment),me=a(),i(L.$$.fragment),re=a(),v=u("p"),v.textContent=Be,oe=a(),i(V.$$.fragment),Me=a(),i(x.$$.fragment),ue=a(),G=u("p"),G.innerHTML=Ee,fe=a(),i(W.$$.fragment),ce=a(),i(B.$$.fragment),Ue=a(),d=u("blockquote"),d.innerHTML=Ae,ye=a(),i(E.$$.fragment),ge=a(),i(A.$$.fragment),de=a(),H=u("ul"),H.innerHTML=He,$e=a(),i(Z.$$.fragment),Te=a(),k=u("ul"),k.innerHTML=Ze,he=a(),i(S.$$.fragment),we=a(),z=u("ul"),z.innerHTML=ke,je=a(),i(X.$$.fragment),Je=a(),i(R.$$.fragment),be=a(),Q=u("ol"),Q.innerHTML=Se,Ce=a(),i(P.$$.fragment),_e=a(),N=u("p"),this.h()},l(e){const l=Ne("svelte-u9bgzb",document.head);y=f(l,"META",{name:!0,content:!0}),l.forEach(s),q=n(e),F=f(e,"P",{}),ze(F).forEach(s),O=n(e),p($.$$.fragment,e),D=n(e),p(T.$$.fragment,e),K=n(e),h=f(e,"P",{"data-svelte-h":!0}),U(h)!=="svelte-yn9yfv"&&(h.innerHTML=Ve),ee=n(e),g=f(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(g)!=="svelte-lk22ul"&&(g.innerHTML=xe),le=n(e),p(w.$$.fragment,e),se=n(e),p(j.$$.fragment,e),te=n(e),p(J.$$.fragment,e),ae=n(e),b=f(e,"P",{"data-svelte-h":!0}),U(b)!=="svelte-8yg3ux"&&(b.innerHTML=Ge),ne=n(e),p(C.$$.fragment,e),ie=n(e),_=f(e,"P",{"data-svelte-h":!0}),U(_)!=="svelte-1yq70yf"&&(_.textContent=We),pe=n(e),p(I.$$.fragment,e),me=n(e),p(L.$$.fragment,e),re=n(e),v=f(e,"P",{"data-svelte-h":!0}),U(v)!=="svelte-4fixmv"&&(v.textContent=Be),oe=n(e),p(V.$$.fragment,e),Me=n(e),p(x.$$.fragment,e),ue=n(e),G=f(e,"P",{"data-svelte-h":!0}),U(G)!=="svelte-1kxd4tc"&&(G.innerHTML=Ee),fe=n(e),p(W.$$.fragment,e),ce=n(e),p(B.$$.fragment,e),Ue=n(e),d=f(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(d)!=="svelte-14cogx9"&&(d.innerHTML=Ae),ye=n(e),p(E.$$.fragment,e),ge=n(e),p(A.$$.fragment,e),de=n(e),H=f(e,"UL",{"data-svelte-h":!0}),U(H)!=="svelte-9negh4"&&(H.innerHTML=He),$e=n(e),p(Z.$$.fragment,e),Te=n(e),k=f(e,"UL",{"data-svelte-h":!0}),U(k)!=="svelte-1ngmbm6"&&(k.innerHTML=Ze),he=n(e),p(S.$$.fragment,e),we=n(e),z=f(e,"UL",{"data-svelte-h":!0}),U(z)!=="svelte-a3kczx"&&(z.innerHTML=ke),je=n(e),p(X.$$.fragment,e),Je=n(e),p(R.$$.fragment,e),be=n(e),Q=f(e,"OL",{"data-svelte-h":!0}),U(Q)!=="svelte-togz7f"&&(Q.innerHTML=Se),Ce=n(e),p(P.$$.fragment,e),_e=n(e),N=f(e,"P",{}),ze(N).forEach(s),this.h()},h(){Le(y,"name","hf:doc:metadata"),Le(y,"content",Ke),Le(g,"class","tip"),Le(d,"class","warning")},m(e,l){Ye(document.head,y),t(e,q,l),t(e,F,l),t(e,O,l),m($,e,l),t(e,D,l),m(T,e,l),t(e,K,l),t(e,h,l),t(e,ee,l),t(e,g,l),t(e,le,l),m(w,e,l),t(e,se,l),m(j,e,l),t(e,te,l),m(J,e,l),t(e,ae,l),t(e,b,l),t(e,ne,l),m(C,e,l),t(e,ie,l),t(e,_,l),t(e,pe,l),m(I,e,l),t(e,me,l),m(L,e,l),t(e,re,l),t(e,v,l),t(e,oe,l),m(V,e,l),t(e,Me,l),m(x,e,l),t(e,ue,l),t(e,G,l),t(e,fe,l),m(W,e,l),t(e,ce,l),m(B,e,l),t(e,Ue,l),t(e,d,l),t(e,ye,l),m(E,e,l),t(e,ge,l),m(A,e,l),t(e,de,l),t(e,H,l),t(e,$e,l),m(Z,e,l),t(e,Te,l),t(e,k,l),t(e,he,l),m(S,e,l),t(e,we,l),t(e,z,l),t(e,je,l),m(X,e,l),t(e,Je,l),m(R,e,l),t(e,be,l),t(e,Q,l),t(e,Ce,l),m(P,e,l),t(e,_e,l),t(e,N,l),Ie=!0},p:Re,i(e){Ie||(r($.$$.fragment,e),r(T.$$.fragment,e),r(w.$$.fragment,e),r(j.$$.fragment,e),r(J.$$.fragment,e),r(C.$$.fragment,e),r(I.$$.fragment,e),r(L.$$.fragment,e),r(V.$$.fragment,e),r(x.$$.fragment,e),r(W.$$.fragment,e),r(B.$$.fragment,e),r(E.$$.fragment,e),r(A.$$.fragment,e),r(Z.$$.fragment,e),r(S.$$.fragment,e),r(X.$$.fragment,e),r(R.$$.fragment,e),r(P.$$.fragment,e),Ie=!0)},o(e){o($.$$.fragment,e),o(T.$$.fragment,e),o(w.$$.fragment,e),o(j.$$.fragment,e),o(J.$$.fragment,e),o(C.$$.fragment,e),o(I.$$.fragment,e),o(L.$$.fragment,e),o(V.$$.fragment,e),o(x.$$.fragment,e),o(W.$$.fragment,e),o(B.$$.fragment,e),o(E.$$.fragment,e),o(A.$$.fragment,e),o(Z.$$.fragment,e),o(S.$$.fragment,e),o(X.$$.fragment,e),o(R.$$.fragment,e),o(P.$$.fragment,e),Ie=!1},d(e){e&&(s(q),s(F),s(O),s(D),s(K),s(h),s(ee),s(g),s(le),s(se),s(te),s(ae),s(b),s(ne),s(ie),s(_),s(pe),s(me),s(re),s(v),s(oe),s(Me),s(ue),s(G),s(fe),s(ce),s(Ue),s(d),s(ye),s(ge),s(de),s(H),s($e),s(Te),s(k),s(he),s(we),s(z),s(je),s(Je),s(be),s(Q),s(Ce),s(_e),s(N)),s(y),M($,e),M(T,e),M(w,e),M(j,e),M(J,e),M(C,e),M(I,e),M(L,e),M(V,e),M(x,e),M(W,e),M(B,e),M(E,e),M(A,e),M(Z,e),M(S,e),M(X,e),M(R,e),M(P,e)}}}const Ke='{"title":"Using VLLM as Backend","local":"using-vllm-as-backend","sections":[{"title":"Basic Usage","local":"basic-usage","sections":[],"depth":2},{"title":"Parallelism Options","local":"parallelism-options","sections":[{"title":"Tensor Parallelism","local":"tensor-parallelism","sections":[],"depth":3},{"title":"Data Parallelism","local":"data-parallelism","sections":[],"depth":3}],"depth":2},{"title":"Using a Configuration File","local":"using-a-configuration-file","sections":[],"depth":2},{"title":"Key VLLM Parameters","local":"key-vllm-parameters","sections":[{"title":"Memory Management","local":"memory-management","sections":[],"depth":3},{"title":"Parallelism Settings","local":"parallelism-settings","sections":[],"depth":3},{"title":"Generation Parameters","local":"generation-parameters","sections":[],"depth":3}],"depth":2},{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Common Issues","local":"common-issues","sections":[],"depth":3}],"depth":2}],"depth":1}';function el(ve){return Qe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class nl extends Pe{constructor(y){super(),Fe(this,y,el,De,Xe,{})}}export{nl as component}; | |
Xet Storage Details
- Size:
- 15.8 kB
- Xet hash:
- 7617a88cf61a87683ce725f2e4cc702afe7362bdfdde881aedf230d65dc13655
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.