Buckets:

rtrm's picture
download
raw
12.7 kB
import{s as ue,o as ye,n as he}from"../chunks/scheduler.7da89386.js";import{S as Je,i as fe,g as w,s as t,r as m,A as je,h as T,f as s,c as n,j as we,u as c,x as y,k as Te,y as be,a,v as r,d as M,t as o,w as d}from"../chunks/index.0b7befd3.js";import{T as Ue}from"../chunks/Tip.1e71740f.js";import{C as k}from"../chunks/CodeBlock.ce33a881.js";import{H as ne,E as ge}from"../chunks/EditOnGithub.0cb2bc8e.js";function Ie(F){let p,h=`In the case of OOM issues, you might need to reduce the context size of the
model as well as reduce the <code>gpu_memory_utilization</code> parameter.`;return{c(){p=w("p"),p.innerHTML=h},l(i){p=T(i,"P",{"data-svelte-h":!0}),y(p)!=="svelte-19r2iaz"&&(p.innerHTML=h)},m(i,R){a(i,p,R)},p:he,d(i){i&&s(p)}}}function $e(F){let p,h,i,R,J,N,f,pe=`Lighteval allows you to use <code>vllm</code> as backend allowing great speedups.
To use, simply change the <code>model_args</code> to reflect the arguments you want to pass to vllm.`,z,j,V,b,ie=`<code>vllm</code> is able to distribute the model across multiple GPUs using data
parallelism, pipeline parallelism or tensor parallelism.
You can choose the parallelism method by setting in the the <code>model_args</code>.`,L,U,me="For example if you have 4 GPUs you can split it across using <code>tensor_parallelism</code>:",X,g,x,I,ce="Or, if your model fits on a single GPU, you can use <code>data_parallelism</code> to speed up the evaluation:",Q,$,S,Z,Y,W,re=`For more advanced configurations, you can use a config file for the model.
An example of a config file is shown below and can be found at <code>examples/model_configs/vllm_model_config.yaml</code>.`,q,B,P,_,D,u,O,C,K,G,Me=`For special kinds of metrics like <code>Pass@K</code> or LiveCodeBench’s <code>codegen</code> metric, you may need to pass specific values like the number of
generations. This can be done in the <code>yaml</code> file in the following way:`,ee,v,le,A,oe=`An optional key <code>metric_options</code> can be passed in the yaml file,
using the name of the metric or metrics, as defined in the <code>Metric.metric_name</code>.
In this case, the <code>codegen_pass@1:16</code> metric defined in our tasks will have the <code>num_samples</code> updated to 16,
independently of the number defined by default.`,se,H,ae,E,te;return J=new ne({props:{title:"Use VLLM as backend",local:"use-vllm-as-backend",headingTag:"h1"}}),j=new k({props:{code:"bGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMnByZXRyYWluZWQlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMkNkdHlwZSUzRGZsb2F0MTYlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJsZWFkZXJib2FyZCU3Q3RydXRoZnVscWElM0FtYyU3QzAlN0MwJTIy",highlighted:`lighteval vllm \\
<span class="hljs-string">&quot;pretrained=HuggingFaceH4/zephyr-7b-beta,dtype=float16&quot;</span> \\
<span class="hljs-string">&quot;leaderboard|truthfulqa:mc|0|0&quot;</span>`,wrap:!1}}),g=new k({props:{code:"ZXhwb3J0JTIwVkxMTV9XT1JLRVJfTVVMVElQUk9DX01FVEhPRCUzRHNwYXduJTIwJTI2JTI2JTIwbGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMnByZXRyYWluZWQlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMkNkdHlwZSUzRGZsb2F0MTYlMkN0ZW5zb3JfcGFyYWxsZWxfc2l6ZSUzRDQlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJsZWFkZXJib2FyZCU3Q3RydXRoZnVscWElM0FtYyU3QzAlN0MwJTIy",highlighted:`<span class="hljs-built_in">export</span> VLLM_WORKER_MULTIPROC_METHOD=spawn &amp;&amp; lighteval vllm \\
<span class="hljs-string">&quot;pretrained=HuggingFaceH4/zephyr-7b-beta,dtype=float16,tensor_parallel_size=4&quot;</span> \\
<span class="hljs-string">&quot;leaderboard|truthfulqa:mc|0|0&quot;</span>`,wrap:!1}}),$=new k({props:{code:"bGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMnByZXRyYWluZWQlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMkNkdHlwZSUzRGZsb2F0MTYlMkNkYXRhX3BhcmFsbGVsX3NpemUlM0Q0JTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGVhZGVyYm9hcmQlN0N0cnV0aGZ1bHFhJTNBbWMlN0MwJTdDMCUyMg==",highlighted:`lighteval vllm \\
<span class="hljs-string">&quot;pretrained=HuggingFaceH4/zephyr-7b-beta,dtype=float16,data_parallel_size=4&quot;</span> \\
<span class="hljs-string">&quot;leaderboard|truthfulqa:mc|0|0&quot;</span>`,wrap:!1}}),Z=new ne({props:{title:"Use a config file",local:"use-a-config-file",headingTag:"h2"}}),B=new k({props:{code:"bGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmV4YW1wbGVzJTJGbW9kZWxfY29uZmlncyUyRnZsbG1fbW9kZWxfY29uZmlnLnlhbWwlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJsZWFkZXJib2FyZCU3Q3RydXRoZnVscWElM0FtYyU3QzAlN0MwJTIy",highlighted:`lighteval vllm \\
<span class="hljs-string">&quot;examples/model_configs/vllm_model_config.yaml&quot;</span> \\
<span class="hljs-string">&quot;leaderboard|truthfulqa:mc|0|0&quot;</span>`,wrap:!1}}),_=new k({props:{code:"bW9kZWwlM0ElMjAlMjMlMjBNb2RlbCUyMHNwZWNpZmljJTIwcGFyYW1ldGVycyUwQSUyMCUyMGJhc2VfcGFyYW1zJTNBJTBBJTIwJTIwJTIwJTIwbW9kZWxfYXJncyUzQSUyMCUyMnByZXRyYWluZWQlM0RIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEuN0IlMkNyZXZpc2lvbiUzRG1haW4lMkNkdHlwZSUzRGJmbG9hdDE2JTIyJTIwJTIzJTIwTW9kZWwlMjBhcmdzJTIwdGhhdCUyMHlvdSUyMHdvdWxkJTIwcGFzcyUyMGluJTIwdGhlJTIwY29tbWFuZCUyMGxpbmUlMEElMjAlMjBnZW5lcmF0aW9uJTNBJTIwJTIzJTIwR2VuZXJhdGlvbiUyMHNwZWNpZmljJTIwcGFyYW1ldGVycyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNBJTIwMC4zJTBBJTIwJTIwJTIwJTIwcmVwZXRpdGlvbl9wZW5hbHR5JTNBJTIwMS4wJTBBJTIwJTIwJTIwJTIwZnJlcXVlbmN5X3BlbmFsdHklM0ElMjAwLjAlMEElMjAlMjAlMjAlMjBwcmVzZW5jZV9wZW5hbHR5JTNBJTIwMC4wJTBBJTIwJTIwJTIwJTIwc2VlZCUzQSUyMDQyJTBBJTIwJTIwJTIwJTIwdG9wX2slM0ElMjAwJTBBJTIwJTIwJTIwJTIwbWluX3AlM0ElMjAwLjAlMEElMjAlMjAlMjAlMjB0b3BfcCUzQSUyMDAuOQ==",highlighted:`<span class="hljs-attr">model:</span> <span class="hljs-comment"># Model specific parameters</span>
<span class="hljs-attr">base_params:</span>
<span class="hljs-attr">model_args:</span> <span class="hljs-string">&quot;pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16&quot;</span> <span class="hljs-comment"># Model args that you would pass in the command line</span>
<span class="hljs-attr">generation:</span> <span class="hljs-comment"># Generation specific parameters</span>
<span class="hljs-attr">temperature:</span> <span class="hljs-number">0.3</span>
<span class="hljs-attr">repetition_penalty:</span> <span class="hljs-number">1.0</span>
<span class="hljs-attr">frequency_penalty:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">presence_penalty:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">seed:</span> <span class="hljs-number">42</span>
<span class="hljs-attr">top_k:</span> <span class="hljs-number">0</span>
<span class="hljs-attr">min_p:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">top_p:</span> <span class="hljs-number">0.9</span>`,wrap:!1}}),u=new Ue({props:{warning:!0,$$slots:{default:[Ie]},$$scope:{ctx:F}}}),C=new ne({props:{title:"Dynamically changing the metric configuration",local:"dynamically-changing-the-metric-configuration",headingTag:"h2"}}),v=new k({props:{code:"bW9kZWwlM0ElMjAlMjMlMjBNb2RlbCUyMHNwZWNpZmljJTIwcGFyYW1ldGVycyUwQSUyMCUyMGJhc2VfcGFyYW1zJTNBJTBBJTIwJTIwJTIwJTIwbW9kZWxfYXJncyUzQSUyMCUyMnByZXRyYWluZWQlM0RIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEuN0IlMkNyZXZpc2lvbiUzRG1haW4lMkNkdHlwZSUzRGJmbG9hdDE2JTIyJTIwJTIzJTIwTW9kZWwlMjBhcmdzJTIwdGhhdCUyMHlvdSUyMHdvdWxkJTIwcGFzcyUyMGluJTIwdGhlJTIwY29tbWFuZCUyMGxpbmUlMEElMjAlMjBnZW5lcmF0aW9uJTNBJTIwJTIzJTIwR2VuZXJhdGlvbiUyMHNwZWNpZmljJTIwcGFyYW1ldGVycyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNBJTIwMC4zJTBBJTIwJTIwJTIwJTIwcmVwZXRpdGlvbl9wZW5hbHR5JTNBJTIwMS4wJTBBJTIwJTIwJTIwJTIwZnJlcXVlbmN5X3BlbmFsdHklM0ElMjAwLjAlMEElMjAlMjAlMjAlMjBwcmVzZW5jZV9wZW5hbHR5JTNBJTIwMC4wJTBBJTIwJTIwJTIwJTIwc2VlZCUzQSUyMDQyJTBBJTIwJTIwJTIwJTIwdG9wX2slM0ElMjAwJTBBJTIwJTIwJTIwJTIwbWluX3AlM0ElMjAwLjAlMEElMjAlMjAlMjAlMjB0b3BfcCUzQSUyMDAuOSUwQSUyMCUyMG1ldHJpY19vcHRpb25zJTNBJTIwJTIzJTIwT3B0aW9uYWwlMjBtZXRyaWMlMjBhcmd1bWVudHMlMEElMjAlMjAlMjAlMjBjb2RlZ2VuX3Bhc3MlNDAxJTNBMTYlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjBudW1fc2FtcGxlcyUzQSUyMDE2",highlighted:`<span class="hljs-attr">model:</span> <span class="hljs-comment"># Model specific parameters</span>
<span class="hljs-attr">base_params:</span>
<span class="hljs-attr">model_args:</span> <span class="hljs-string">&quot;pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16&quot;</span> <span class="hljs-comment"># Model args that you would pass in the command line</span>
<span class="hljs-attr">generation:</span> <span class="hljs-comment"># Generation specific parameters</span>
<span class="hljs-attr">temperature:</span> <span class="hljs-number">0.3</span>
<span class="hljs-attr">repetition_penalty:</span> <span class="hljs-number">1.0</span>
<span class="hljs-attr">frequency_penalty:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">presence_penalty:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">seed:</span> <span class="hljs-number">42</span>
<span class="hljs-attr">top_k:</span> <span class="hljs-number">0</span>
<span class="hljs-attr">min_p:</span> <span class="hljs-number">0.0</span>
<span class="hljs-attr">top_p:</span> <span class="hljs-number">0.9</span>
<span class="hljs-attr">metric_options:</span> <span class="hljs-comment"># Optional metric arguments</span>
<span class="hljs-string">codegen_pass@1:16:</span>
<span class="hljs-attr">num_samples:</span> <span class="hljs-number">16</span>`,wrap:!1}}),H=new ge({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/use-vllm-as-backend.mdx"}}),{c(){p=w("meta"),h=t(),i=w("p"),R=t(),m(J.$$.fragment),N=t(),f=w("p"),f.innerHTML=pe,z=t(),m(j.$$.fragment),V=t(),b=w("p"),b.innerHTML=ie,L=t(),U=w("p"),U.innerHTML=me,X=t(),m(g.$$.fragment),x=t(),I=w("p"),I.innerHTML=ce,Q=t(),m($.$$.fragment),S=t(),m(Z.$$.fragment),Y=t(),W=w("p"),W.innerHTML=re,q=t(),m(B.$$.fragment),P=t(),m(_.$$.fragment),D=t(),m(u.$$.fragment),O=t(),m(C.$$.fragment),K=t(),G=w("p"),G.innerHTML=Me,ee=t(),m(v.$$.fragment),le=t(),A=w("p"),A.innerHTML=oe,se=t(),m(H.$$.fragment),ae=t(),E=w("p"),this.h()},l(e){const l=je("svelte-u9bgzb",document.head);p=T(l,"META",{name:!0,content:!0}),l.forEach(s),h=n(e),i=T(e,"P",{}),we(i).forEach(s),R=n(e),c(J.$$.fragment,e),N=n(e),f=T(e,"P",{"data-svelte-h":!0}),y(f)!=="svelte-1ki3g9h"&&(f.innerHTML=pe),z=n(e),c(j.$$.fragment,e),V=n(e),b=T(e,"P",{"data-svelte-h":!0}),y(b)!=="svelte-1sv8wkg"&&(b.innerHTML=ie),L=n(e),U=T(e,"P",{"data-svelte-h":!0}),y(U)!=="svelte-44w0qa"&&(U.innerHTML=me),X=n(e),c(g.$$.fragment,e),x=n(e),I=T(e,"P",{"data-svelte-h":!0}),y(I)!=="svelte-190jhqw"&&(I.innerHTML=ce),Q=n(e),c($.$$.fragment,e),S=n(e),c(Z.$$.fragment,e),Y=n(e),W=T(e,"P",{"data-svelte-h":!0}),y(W)!=="svelte-1j0san7"&&(W.innerHTML=re),q=n(e),c(B.$$.fragment,e),P=n(e),c(_.$$.fragment,e),D=n(e),c(u.$$.fragment,e),O=n(e),c(C.$$.fragment,e),K=n(e),G=T(e,"P",{"data-svelte-h":!0}),y(G)!=="svelte-1t6h94h"&&(G.innerHTML=Me),ee=n(e),c(v.$$.fragment,e),le=n(e),A=T(e,"P",{"data-svelte-h":!0}),y(A)!=="svelte-1vl85gq"&&(A.innerHTML=oe),se=n(e),c(H.$$.fragment,e),ae=n(e),E=T(e,"P",{}),we(E).forEach(s),this.h()},h(){Te(p,"name","hf:doc:metadata"),Te(p,"content",Ze)},m(e,l){be(document.head,p),a(e,h,l),a(e,i,l),a(e,R,l),r(J,e,l),a(e,N,l),a(e,f,l),a(e,z,l),r(j,e,l),a(e,V,l),a(e,b,l),a(e,L,l),a(e,U,l),a(e,X,l),r(g,e,l),a(e,x,l),a(e,I,l),a(e,Q,l),r($,e,l),a(e,S,l),r(Z,e,l),a(e,Y,l),a(e,W,l),a(e,q,l),r(B,e,l),a(e,P,l),r(_,e,l),a(e,D,l),r(u,e,l),a(e,O,l),r(C,e,l),a(e,K,l),a(e,G,l),a(e,ee,l),r(v,e,l),a(e,le,l),a(e,A,l),a(e,se,l),r(H,e,l),a(e,ae,l),a(e,E,l),te=!0},p(e,[l]){const de={};l&2&&(de.$$scope={dirty:l,ctx:e}),u.$set(de)},i(e){te||(M(J.$$.fragment,e),M(j.$$.fragment,e),M(g.$$.fragment,e),M($.$$.fragment,e),M(Z.$$.fragment,e),M(B.$$.fragment,e),M(_.$$.fragment,e),M(u.$$.fragment,e),M(C.$$.fragment,e),M(v.$$.fragment,e),M(H.$$.fragment,e),te=!0)},o(e){o(J.$$.fragment,e),o(j.$$.fragment,e),o(g.$$.fragment,e),o($.$$.fragment,e),o(Z.$$.fragment,e),o(B.$$.fragment,e),o(_.$$.fragment,e),o(u.$$.fragment,e),o(C.$$.fragment,e),o(v.$$.fragment,e),o(H.$$.fragment,e),te=!1},d(e){e&&(s(h),s(i),s(R),s(N),s(f),s(z),s(V),s(b),s(L),s(U),s(X),s(x),s(I),s(Q),s(S),s(Y),s(W),s(q),s(P),s(D),s(O),s(K),s(G),s(ee),s(le),s(A),s(se),s(ae),s(E)),s(p),d(J,e),d(j,e),d(g,e),d($,e),d(Z,e),d(B,e),d(_,e),d(u,e),d(C,e),d(v,e),d(H,e)}}}const Ze='{"title":"Use VLLM as backend","local":"use-vllm-as-backend","sections":[{"title":"Use a config file","local":"use-a-config-file","sections":[],"depth":2},{"title":"Dynamically changing the metric configuration","local":"dynamically-changing-the-metric-configuration","sections":[],"depth":2}],"depth":1}';function We(F){return ye(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ae extends Je{constructor(p){super(),fe(this,p,We,$e,ue,{})}}export{Ae as component};

Xet Storage Details

Size:
12.7 kB
·
Xet hash:
1ee9e9cd2125e1dd0c581f9f5a8b63ad5fa58362b53f9ba417991315efabe5d7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.