Buckets:

HuggingFaceDocBuilder's picture
download
raw
16.9 kB
import{s as gt,a as $t,n as vt,o as Mt}from"../chunks/scheduler.3a17fb72.js";import{S as yt,i as Ut,e as s,s as n,c as m,h as bt,a as o,d as l,b as i,f as Xe,g as r,j as p,k as y,l as kt,m as a,n as c,t as u,o as f,p as d}from"../chunks/index.093f8863.js";import{C as wt,H as h,E as Ct}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.5e7ea2bd.js";import{C as ie}from"../chunks/CodeBlock.09235327.js";function Tt(Ve){let $,se,ae,oe,U,me,b,re,v,ze=`<p>We recommend using the <code>--help</code> flag to get more information about the
available options for each command.
<code>lighteval --help</code></p>`,pe,k,Oe="Lighteval can be used with several different commands, each optimized for different evaluation scenarios.",ce,w,ue,g,De,fe,C,de,T,he,x,Ke=`<li><code>lighteval eval</code>: Use <a href="https://inspect.aisi.org.uk/" rel="nofollow">inspect-ai</a> as backend to evaluate and inspect your models ! (prefered way)</li> <li><code>lighteval accelerate</code>: Evaluate models on CPU or one or more GPUs using <a href="https://github.com/huggingface/accelerate" rel="nofollow">🤗
Accelerate</a></li> <li><code>lighteval nanotron</code>: Evaluate models in distributed settings using <a href="https://github.com/huggingface/nanotron" rel="nofollow">⚡️
Nanotron</a></li> <li><code>lighteval vllm</code>: Evaluate models on one or more GPUs using <a href="https://github.com/vllm-project/vllm" rel="nofollow">🚀
VLLM</a></li> <li><code>lighteval custom</code>: Evaluate custom models (can be anything)</li> <li><code>lighteval sglang</code>: Evaluate models using <a href="https://github.com/sgl-project/sglang" rel="nofollow">SGLang</a> as backend</li> <li><code>lighteval endpoint</code>: Evaluate models using various endpoints as backend
<ul><li><code>lighteval endpoint inference-endpoint</code>: Evaluate models using Hugging Face’s <a href="https://huggingface.co/inference-endpoints/dedicated" rel="nofollow">Inference Endpoints API</a></li> <li><code>lighteval endpoint tgi</code>: Evaluate models using <a href="https://huggingface.co/docs/text-generation-inference/en/index" rel="nofollow">🔗 Text Generation Inference</a> running locally</li> <li><code>lighteval endpoint litellm</code>: Evaluate models on any compatible API using <a href="https://www.litellm.ai/" rel="nofollow">LiteLLM</a></li> <li><code>lighteval endpoint inference-providers</code>: Evaluate models using <a href="https://huggingface.co/docs/inference-providers/en/index" rel="nofollow">HuggingFace’s inference providers</a> as backend</li></ul></li>`,ge,L,$e,_,et="<li><code>lighteval baseline</code>: Compute baselines for given tasks</li>",ve,H,Me,R,tt=`<li><code>lighteval tasks</code>: List or inspect tasks
<ul><li><code>lighteval tasks list</code>: List all available tasks</li> <li><code>lighteval tasks inspect</code>: Inspect a specific task to see its configuration and samples</li> <li><code>lighteval tasks create</code>: Create a new task from a template</li></ul></li>`,ye,j,Ue,G,lt=`To evaluate <code>GPT-2</code> on the Truthful QA benchmark with <a href="https://github.com/huggingface/accelerate" rel="nofollow">🤗
Accelerate</a>, run:`,be,Z,ke,W,we,E,at="Tasks have a function applied at the sample level and one at the corpus level. For example,",Ce,Q,nt=`<li>an exact match can be applied per sample, then averaged over the corpus to give the final score</li> <li>samples can be left untouched before applying Corpus BLEU at the corpus level
etc.</li>`,Te,J,it=`If the task you are looking at has a sample level function (<code>sample_level_fn</code>) which can be parametrized, you can pass parameters in the CLI.
For example`,xe,A,Le,S,st=`All officially supported tasks can be found at the <a href="available-tasks">tasks_list</a> and in the
<a href="https://github.com/huggingface/lighteval/tree/main/src/lighteval/tasks/extended" rel="nofollow">extended folder</a>.
Moreover, community-provided tasks can be found in the
<a href="https://github.com/huggingface/lighteval/tree/main/community_tasks" rel="nofollow">community</a> folder.`,_e,q,ot=`For more details on the implementation of the tasks, such as how prompts are constructed or which metrics are used, you can examine the
<a href="https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/default_tasks.py" rel="nofollow">implementation file</a>.`,He,F,Re,I,mt=`Running multiple tasks is supported, either with a comma-separated list or by specifying a file path.
The file should be structured like <a href="https://github.com/huggingface/lighteval/blob/main/examples/tasks/recommended_set.txt" rel="nofollow">examples/tasks/recommended_set.txt</a>.
When specifying a path to a file, it should start with <code>./</code>.`,je,Y,Ge,N,Ze,P,We,B,rt=`The <code>model-args</code> argument takes a string representing a list of model
arguments. The arguments allowed vary depending on the backend you use and
correspond to the fields of the model configurations.`,Ee,X,pt='The model configurations can be found <a href="./package_reference/models">here</a>.',Qe,V,ct=`All models allow you to post-process your reasoning model predictions
to remove the thinking tokens from the trace used to compute the metrics,
using <code>--remove-reasoning-tags</code> and <code>--reasoning-tags</code> to specify which
reasoning tags to remove (defaults to <code>&lt;think&gt;</code> and <code>&lt;/think&gt;</code>).`,Je,z,ut=`Here’s an example with <code>mistralai/Magistral-Small-2507</code> which outputs custom
thinking tokens:`,Ae,O,Se,D,qe,K,ft="To evaluate a model trained with Nanotron on a single GPU:",Fe,M,dt="<p>Nanotron models cannot be evaluated without torchrun.</p>",Ie,ee,Ye,te,ht=`The <code>nproc-per-node</code> argument should match the data, tensor, and pipeline
parallelism configured in the <code>lighteval_config_template.yaml</code> file.
That is: <code>nproc-per-node = data_parallelism * tensor_parallelism * pipeline_parallelism</code>.`,Ne,le,Pe,ne,Be;return U=new wt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new h({props:{title:"Quick Tour",local:"quick-tour",headingTag:"h1"}}),w=new h({props:{title:"Find your benchmark",local:"find-your-benchmark",headingTag:"h2"}}),C=new h({props:{title:"Available Commands",local:"available-commands",headingTag:"h2"}}),T=new h({props:{title:"Evaluation Backends",local:"evaluation-backends",headingTag:"h3"}}),L=new h({props:{title:"Evaluation Utils",local:"evaluation-utils",headingTag:"h3"}}),H=new h({props:{title:"Utils",local:"utils",headingTag:"h3"}}),j=new h({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h2"}}),Z=new ie({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMHRydXRoZnVscWElM0FtYw==",highlighted:`lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
truthfulqa:mc`,wrap:!1}}),W=new h({props:{title:"Task Specification",local:"task-specification",headingTag:"h3"}}),A=new ie({props:{code:"JTdCdGFzayU3RCU0MCU3QnBhcmFtZXRlcl9uYW1lMSU3RCUzRCU3QnZhbHVlMSU3RCU0MCU3QnBhcmFtZXRlcl9uYW1lMiU3RCUzRCU3QnZhbHVlMiU3RCUyQy4uLiU3QzA=",highlighted:"{task}@{parameter_name1}={value1}@{parameter_name2}={value2},...|0",wrap:!1}}),F=new h({props:{title:"Running Multiple Tasks",local:"running-multiple-tasks",headingTag:"h3"}}),Y=new ie({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMC4lMkZwYXRoJTJGdG8lMkZsaWdodGV2YWwlMkZleGFtcGxlcyUyRnRhc2tzJTJGcmVjb21tZW5kZWRfc2V0LnR4dCUwQSUyMyUyMG9yJTJDJTIwZS5nLiUyQyUyMCUyMnRydXRoZnVscWElM0FtYyU3QzAlMkNnc204ayU3QzMlMjI=",highlighted:`lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
./path/to/lighteval/examples/tasks/recommended_set.txt
<span class="hljs-comment"># or, e.g., &quot;truthfulqa:mc|0,gsm8k|3&quot;</span>`,wrap:!1}}),N=new h({props:{title:"Backend Configuration",local:"backend-configuration",headingTag:"h2"}}),P=new h({props:{title:"General Information",local:"general-information",headingTag:"h3"}}),O=new ie({props:{code:"bGlnaHRldmFsJTIwdmxsbSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RtaXN0cmFsYWklMkZNYWdpc3RyYWwtU21hbGwtMjUwNyUyQ2R0eXBlJTNEZmxvYXQxNiUyQ2RhdGFfcGFyYWxsZWxfc2l6ZSUzRDQlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjBhaW1lMjQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlbW92ZS1yZWFzb25pbmctdGFncyUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcmVhc29uaW5nLXRhZ3MlM0QlMjIlNUIoJyU1QlRISU5LJTVEJyUyQyclNUIlMkZUSElOSyU1RCcpJTVEJTIy",highlighted:`lighteval vllm \\
<span class="hljs-string">&quot;model_name=mistralai/Magistral-Small-2507,dtype=float16,data_parallel_size=4&quot;</span> \\
aime24 \\
--remove-reasoning-tags \\
--reasoning-tags=<span class="hljs-string">&quot;[(&#x27;[THINK]&#x27;,&#x27;[/THINK]&#x27;)]&quot;</span>`,wrap:!1}}),D=new h({props:{title:"Nanotron",local:"nanotron",headingTag:"h3"}}),ee=new ie({props:{code:"dG9yY2hydW4lMjAtLXN0YW5kYWxvbmUlMjAtLW5ub2RlcyUzRDElMjAtLW5wcm9jLXBlci1ub2RlJTNEMSUyMCU1QyUwQSUyMCUyMCUyMCUyMHNyYyUyRmxpZ2h0ZXZhbCUyRl9fbWFpbl9fLnB5JTIwbmFub3Ryb24lMjAlNUMlMEElMjAlMjAlMjAlMjAtLWNoZWNrcG9pbnQtY29uZmlnLXBhdGglMjAuLiUyRm5hbm90cm9uJTJGY2hlY2twb2ludHMlMkYxMCUyRmNvbmZpZy55YW1sJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1saWdodGV2YWwtY29uZmlnLXBhdGglMjBleGFtcGxlcyUyRm5hbm90cm9uJTJGbGlnaHRldmFsX2NvbmZpZ19vdmVycmlkZV90ZW1wbGF0ZS55YW1s",highlighted:`torchrun --standalone --nnodes=1 --nproc-per-node=1 \\
src/lighteval/__main__.py nanotron \\
--checkpoint-config-path ../nanotron/checkpoints/10/config.yaml \\
--lighteval-config-path examples/nanotron/lighteval_config_override_template.yaml`,wrap:!1}}),le=new Ct({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/quicktour.mdx"}}),{c(){$=s("meta"),se=n(),ae=s("p"),oe=n(),m(U.$$.fragment),me=n(),m(b.$$.fragment),re=n(),v=s("blockquote"),v.innerHTML=ze,pe=n(),k=s("p"),k.textContent=Oe,ce=n(),m(w.$$.fragment),ue=n(),g=s("iframe"),fe=n(),m(C.$$.fragment),de=n(),m(T.$$.fragment),he=n(),x=s("ul"),x.innerHTML=Ke,ge=n(),m(L.$$.fragment),$e=n(),_=s("ul"),_.innerHTML=et,ve=n(),m(H.$$.fragment),Me=n(),R=s("ul"),R.innerHTML=tt,ye=n(),m(j.$$.fragment),Ue=n(),G=s("p"),G.innerHTML=lt,be=n(),m(Z.$$.fragment),ke=n(),m(W.$$.fragment),we=n(),E=s("p"),E.textContent=at,Ce=n(),Q=s("ul"),Q.innerHTML=nt,Te=n(),J=s("p"),J.innerHTML=it,xe=n(),m(A.$$.fragment),Le=n(),S=s("p"),S.innerHTML=st,_e=n(),q=s("p"),q.innerHTML=ot,He=n(),m(F.$$.fragment),Re=n(),I=s("p"),I.innerHTML=mt,je=n(),m(Y.$$.fragment),Ge=n(),m(N.$$.fragment),Ze=n(),m(P.$$.fragment),We=n(),B=s("p"),B.innerHTML=rt,Ee=n(),X=s("p"),X.innerHTML=pt,Qe=n(),V=s("p"),V.innerHTML=ct,Je=n(),z=s("p"),z.innerHTML=ut,Ae=n(),m(O.$$.fragment),Se=n(),m(D.$$.fragment),qe=n(),K=s("p"),K.textContent=ft,Fe=n(),M=s("blockquote"),M.innerHTML=dt,Ie=n(),m(ee.$$.fragment),Ye=n(),te=s("p"),te.innerHTML=ht,Ne=n(),m(le.$$.fragment),Pe=n(),ne=s("p"),this.h()},l(e){const t=bt("svelte-u9bgzb",document.head);$=o(t,"META",{name:!0,content:!0}),t.forEach(l),se=i(e),ae=o(e,"P",{}),Xe(ae).forEach(l),oe=i(e),r(U.$$.fragment,e),me=i(e),r(b.$$.fragment,e),re=i(e),v=o(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(v)!=="svelte-1a4xnl6"&&(v.innerHTML=ze),pe=i(e),k=o(e,"P",{"data-svelte-h":!0}),p(k)!=="svelte-1g3zc6w"&&(k.textContent=Oe),ce=i(e),r(w.$$.fragment,e),ue=i(e),g=o(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),Xe(g).forEach(l),fe=i(e),r(C.$$.fragment,e),de=i(e),r(T.$$.fragment,e),he=i(e),x=o(e,"UL",{"data-svelte-h":!0}),p(x)!=="svelte-1kzfh3j"&&(x.innerHTML=Ke),ge=i(e),r(L.$$.fragment,e),$e=i(e),_=o(e,"UL",{"data-svelte-h":!0}),p(_)!=="svelte-1hjb9o8"&&(_.innerHTML=et),ve=i(e),r(H.$$.fragment,e),Me=i(e),R=o(e,"UL",{"data-svelte-h":!0}),p(R)!=="svelte-1uuqk27"&&(R.innerHTML=tt),ye=i(e),r(j.$$.fragment,e),Ue=i(e),G=o(e,"P",{"data-svelte-h":!0}),p(G)!=="svelte-1jqacui"&&(G.innerHTML=lt),be=i(e),r(Z.$$.fragment,e),ke=i(e),r(W.$$.fragment,e),we=i(e),E=o(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-1vkd3du"&&(E.textContent=at),Ce=i(e),Q=o(e,"UL",{"data-svelte-h":!0}),p(Q)!=="svelte-1c8qycp"&&(Q.innerHTML=nt),Te=i(e),J=o(e,"P",{"data-svelte-h":!0}),p(J)!=="svelte-15xqaj2"&&(J.innerHTML=it),xe=i(e),r(A.$$.fragment,e),Le=i(e),S=o(e,"P",{"data-svelte-h":!0}),p(S)!=="svelte-e70eqq"&&(S.innerHTML=st),_e=i(e),q=o(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-kxdhy6"&&(q.innerHTML=ot),He=i(e),r(F.$$.fragment,e),Re=i(e),I=o(e,"P",{"data-svelte-h":!0}),p(I)!=="svelte-102effw"&&(I.innerHTML=mt),je=i(e),r(Y.$$.fragment,e),Ge=i(e),r(N.$$.fragment,e),Ze=i(e),r(P.$$.fragment,e),We=i(e),B=o(e,"P",{"data-svelte-h":!0}),p(B)!=="svelte-17uu0an"&&(B.innerHTML=rt),Ee=i(e),X=o(e,"P",{"data-svelte-h":!0}),p(X)!=="svelte-quwklj"&&(X.innerHTML=pt),Qe=i(e),V=o(e,"P",{"data-svelte-h":!0}),p(V)!=="svelte-1eknffc"&&(V.innerHTML=ct),Je=i(e),z=o(e,"P",{"data-svelte-h":!0}),p(z)!=="svelte-16brxvi"&&(z.innerHTML=ut),Ae=i(e),r(O.$$.fragment,e),Se=i(e),r(D.$$.fragment,e),qe=i(e),K=o(e,"P",{"data-svelte-h":!0}),p(K)!=="svelte-l3or2u"&&(K.textContent=ft),Fe=i(e),M=o(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(M)!=="svelte-1w9heqt"&&(M.innerHTML=dt),Ie=i(e),r(ee.$$.fragment,e),Ye=i(e),te=o(e,"P",{"data-svelte-h":!0}),p(te)!=="svelte-s5fucd"&&(te.innerHTML=ht),Ne=i(e),r(le.$$.fragment,e),Pe=i(e),ne=o(e,"P",{}),Xe(ne).forEach(l),this.h()},h(){y($,"name","hf:doc:metadata"),y($,"content",xt),y(v,"class","tip"),$t(g.src,De="https://openevals-open-benchmark-index.hf.space")||y(g,"src",De),y(g,"frameborder","0"),y(g,"width","850"),y(g,"height","450"),y(M,"class","warning")},m(e,t){kt(document.head,$),a(e,se,t),a(e,ae,t),a(e,oe,t),c(U,e,t),a(e,me,t),c(b,e,t),a(e,re,t),a(e,v,t),a(e,pe,t),a(e,k,t),a(e,ce,t),c(w,e,t),a(e,ue,t),a(e,g,t),a(e,fe,t),c(C,e,t),a(e,de,t),c(T,e,t),a(e,he,t),a(e,x,t),a(e,ge,t),c(L,e,t),a(e,$e,t),a(e,_,t),a(e,ve,t),c(H,e,t),a(e,Me,t),a(e,R,t),a(e,ye,t),c(j,e,t),a(e,Ue,t),a(e,G,t),a(e,be,t),c(Z,e,t),a(e,ke,t),c(W,e,t),a(e,we,t),a(e,E,t),a(e,Ce,t),a(e,Q,t),a(e,Te,t),a(e,J,t),a(e,xe,t),c(A,e,t),a(e,Le,t),a(e,S,t),a(e,_e,t),a(e,q,t),a(e,He,t),c(F,e,t),a(e,Re,t),a(e,I,t),a(e,je,t),c(Y,e,t),a(e,Ge,t),c(N,e,t),a(e,Ze,t),c(P,e,t),a(e,We,t),a(e,B,t),a(e,Ee,t),a(e,X,t),a(e,Qe,t),a(e,V,t),a(e,Je,t),a(e,z,t),a(e,Ae,t),c(O,e,t),a(e,Se,t),c(D,e,t),a(e,qe,t),a(e,K,t),a(e,Fe,t),a(e,M,t),a(e,Ie,t),c(ee,e,t),a(e,Ye,t),a(e,te,t),a(e,Ne,t),c(le,e,t),a(e,Pe,t),a(e,ne,t),Be=!0},p:vt,i(e){Be||(u(U.$$.fragment,e),u(b.$$.fragment,e),u(w.$$.fragment,e),u(C.$$.fragment,e),u(T.$$.fragment,e),u(L.$$.fragment,e),u(H.$$.fragment,e),u(j.$$.fragment,e),u(Z.$$.fragment,e),u(W.$$.fragment,e),u(A.$$.fragment,e),u(F.$$.fragment,e),u(Y.$$.fragment,e),u(N.$$.fragment,e),u(P.$$.fragment,e),u(O.$$.fragment,e),u(D.$$.fragment,e),u(ee.$$.fragment,e),u(le.$$.fragment,e),Be=!0)},o(e){f(U.$$.fragment,e),f(b.$$.fragment,e),f(w.$$.fragment,e),f(C.$$.fragment,e),f(T.$$.fragment,e),f(L.$$.fragment,e),f(H.$$.fragment,e),f(j.$$.fragment,e),f(Z.$$.fragment,e),f(W.$$.fragment,e),f(A.$$.fragment,e),f(F.$$.fragment,e),f(Y.$$.fragment,e),f(N.$$.fragment,e),f(P.$$.fragment,e),f(O.$$.fragment,e),f(D.$$.fragment,e),f(ee.$$.fragment,e),f(le.$$.fragment,e),Be=!1},d(e){e&&(l(se),l(ae),l(oe),l(me),l(re),l(v),l(pe),l(k),l(ce),l(ue),l(g),l(fe),l(de),l(he),l(x),l(ge),l($e),l(_),l(ve),l(Me),l(R),l(ye),l(Ue),l(G),l(be),l(ke),l(we),l(E),l(Ce),l(Q),l(Te),l(J),l(xe),l(Le),l(S),l(_e),l(q),l(He),l(Re),l(I),l(je),l(Ge),l(Ze),l(We),l(B),l(Ee),l(X),l(Qe),l(V),l(Je),l(z),l(Ae),l(Se),l(qe),l(K),l(Fe),l(M),l(Ie),l(Ye),l(te),l(Ne),l(Pe),l(ne)),l($),d(U,e),d(b,e),d(w,e),d(C,e),d(T,e),d(L,e),d(H,e),d(j,e),d(Z,e),d(W,e),d(A,e),d(F,e),d(Y,e),d(N,e),d(P,e),d(O,e),d(D,e),d(ee,e),d(le,e)}}}const xt='{"title":"Quick Tour","local":"quick-tour","sections":[{"title":"Find your benchmark","local":"find-your-benchmark","sections":[],"depth":2},{"title":"Available Commands","local":"available-commands","sections":[{"title":"Evaluation Backends","local":"evaluation-backends","sections":[],"depth":3},{"title":"Evaluation Utils","local":"evaluation-utils","sections":[],"depth":3},{"title":"Utils","local":"utils","sections":[],"depth":3}],"depth":2},{"title":"Basic Usage","local":"basic-usage","sections":[{"title":"Task Specification","local":"task-specification","sections":[],"depth":3},{"title":"Running Multiple Tasks","local":"running-multiple-tasks","sections":[],"depth":3}],"depth":2},{"title":"Backend Configuration","local":"backend-configuration","sections":[{"title":"General Information","local":"general-information","sections":[],"depth":3},{"title":"Nanotron","local":"nanotron","sections":[],"depth":3}],"depth":2}],"depth":1}';function Lt(Ve){return Mt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Gt extends yt{constructor($){super(),Ut(this,$,Lt,Tt,gt,{})}}export{Gt as component};

Xet Storage Details

Size:
16.9 kB
·
Xet hash:
326e46d404f4dae2de9f500f599fa5a97b2dd03972cb7272e2661cc8c8b92e55

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.