Buckets:
| import{s as He,a as Ie,n as Pe,o as Fe}from"../chunks/scheduler.3a17fb72.js";import{S as Ge,i as ze,e as f,s as i,c as s,h as Ze,a as g,d as l,b as n,f as be,g as o,j as w,k as d,l as je,m as a,n as r,t as u,o as m,p}from"../chunks/index.093f8863.js";import{C as Ae,H as c,E as qe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.5e7ea2bd.js";import{C as Se}from"../chunks/CodeBlock.09235327.js";function Be(we){let v,R,J,Q,y,V,k,D,M,ye=`🤗 Lighteval is your all-in-one toolkit for evaluating Large Language Models | |
| (LLMs) across multiple backends with ease. Dive deep into your model’s | |
| performance by saving and exploring detailed, sample-by-sample results to debug | |
| and see how your models stack up.`,O,b,ke='<p>Share your evaluation results with the community by pushing them to the Hugging Face Hub. If you open Pull Requests on model repositories with evaluation results, we will automatically show the results on benchmark dataset repositories. Let’s decentralize evaluation! Check out the <a href="https://huggingface.co/docs/hub/eval-results" rel="nofollow">docs</a>.</p>',X,x,K,C,N,T,Me="Evaluate your models using the most popular and efficient inference backends:",Y,L,xe=`<li><code>eval</code>: Use <a href="https://inspect.aisi.org.uk/" rel="nofollow">inspect-ai</a> as backend to evaluate and inspect your models! (prefered way)</li> <li><code>transformers</code>: Evaluate models on CPU or one or more GPUs using <a href="https://github.com/huggingface/transformers" rel="nofollow">🤗 | |
| Accelerate</a></li> <li><code>nanotron</code>: Evaluate models in distributed settings using <a href="https://github.com/huggingface/nanotron" rel="nofollow">⚡️ | |
| Nanotron</a></li> <li><code>vllm</code>: Evaluate models on one or more GPUs using <a href="https://github.com/vllm-project/vllm" rel="nofollow">🚀 | |
| VLLM</a></li> <li><code>custom</code>: Evaluate custom models (can be anything)</li> <li><code>sglang</code>: Evaluate models using <a href="https://github.com/sgl-project/sglang" rel="nofollow">SGLang</a> as backend</li> <li><code>inference-endpoint</code>: Evaluate models using Hugging Face’s <a href="https://huggingface.co/inference-endpoints/dedicated" rel="nofollow">Inference Endpoints API</a></li> <li><code>tgi</code>: Evaluate models using <a href="https://huggingface.co/docs/text-generation-inference/en/index" rel="nofollow">🔗 Text Generation Inference</a> running locally</li> <li><code>litellm</code>: Evaluate models on any compatible API using <a href="https://www.litellm.ai/" rel="nofollow">LiteLLM</a></li> <li><code>inference-providers</code>: Evaluate models using <a href="https://huggingface.co/docs/inference-providers/en/index" rel="nofollow">HuggingFace’s inference providers</a> as backend**: Distributed training and evaluation</li>`,ee,E,te,_,Ce="<li><strong>Extensive Task Library</strong>: 1000s pre-built evaluation tasks</li> <li><strong>Custom Task Creation</strong>: Build your own evaluation tasks</li> <li><strong>Flexible Metrics</strong>: Support for custom metrics and scoring</li> <li><strong>Detailed Analysis</strong>: Sample-by-sample results for deep insights</li>",le,U,ae,I,Te=`Customization at your fingertips: create <a href="adding-a-custom-task">new tasks</a>, | |
| <a href="adding-a-new-metric">metrics</a> or <a href="evaluating-a-custom-model">model</a> tailored to your needs, or browse all our existing tasks and metrics.`,ie,S,ne,H,Le="Seamlessly experiment, benchmark, and store your results on the Hugging Face Hub, S3, or locally.",se,P,oe,F,re,G,ue,z,me,Z,pe,h,Ee,fe,j,ge,A,de,q,_e="Resulting Space:",ce,$,Ue,he,B,$e,W,ve;return y=new Ae({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),k=new c({props:{title:"Lighteval",local:"lighteval",headingTag:"h1"}}),x=new c({props:{title:"Key Features",local:"key-features",headingTag:"h2"}}),C=new c({props:{title:"🚀 Multi-Backend Support",local:"-multi-backend-support",headingTag:"h3"}}),E=new c({props:{title:"📊 Comprehensive Evaluation",local:"-comprehensive-evaluation",headingTag:"h3"}}),U=new c({props:{title:"🔧 Easy Customization",local:"-easy-customization",headingTag:"h3"}}),S=new c({props:{title:"☁️ Seamless Integration",local:"-seamless-integration",headingTag:"h3"}}),P=new c({props:{title:"Quick Start",local:"quick-start",headingTag:"h2"}}),F=new c({props:{title:"Installation",local:"installation",headingTag:"h3"}}),G=new Se({props:{code:"cGlwJTIwaW5zdGFsbCUyMGxpZ2h0ZXZhbA==",highlighted:"pip install lighteval",wrap:!1}}),z=new c({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h3"}}),Z=new c({props:{title:"Find a task",local:"find-a-task",headingTag:"h4"}}),j=new c({props:{title:"Run your benchmark and push details to the hub",local:"run-your-benchmark-and-push-details-to-the-hub",headingTag:"h4"}}),A=new Se({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMCUyMmhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMGdwcWElM0FkaWFtb25kJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1idW5kbGUtZGlyJTIwZ3B0LW9zcy1idW5kbGUlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlcG8taWQlMjBPcGVuRXZhbHMlMkZldmFscw==",highlighted:`lighteval <span class="hljs-built_in">eval</span> <span class="hljs-string">"hf-inference-providers/openai/gpt-oss-20b"</span> \\ | |
| gpqa:diamond \\ | |
| --bundle-dir gpt-oss-bundle \\ | |
| --repo-id OpenEvals/evals`,wrap:!1}}),B=new qe({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/index.mdx"}}),{c(){v=f("meta"),R=i(),J=f("p"),Q=i(),s(y.$$.fragment),V=i(),s(k.$$.fragment),D=i(),M=f("p"),M.textContent=ye,O=i(),b=f("blockquote"),b.innerHTML=ke,X=i(),s(x.$$.fragment),K=i(),s(C.$$.fragment),N=i(),T=f("p"),T.textContent=Me,Y=i(),L=f("ul"),L.innerHTML=xe,ee=i(),s(E.$$.fragment),te=i(),_=f("ul"),_.innerHTML=Ce,le=i(),s(U.$$.fragment),ae=i(),I=f("p"),I.innerHTML=Te,ie=i(),s(S.$$.fragment),ne=i(),H=f("p"),H.textContent=Le,se=i(),s(P.$$.fragment),oe=i(),s(F.$$.fragment),re=i(),s(G.$$.fragment),ue=i(),s(z.$$.fragment),me=i(),s(Z.$$.fragment),pe=i(),h=f("iframe"),fe=i(),s(j.$$.fragment),ge=i(),s(A.$$.fragment),de=i(),q=f("p"),q.textContent=_e,ce=i(),$=f("iframe"),he=i(),s(B.$$.fragment),$e=i(),W=f("p"),this.h()},l(e){const t=Ze("svelte-u9bgzb",document.head);v=g(t,"META",{name:!0,content:!0}),t.forEach(l),R=n(e),J=g(e,"P",{}),be(J).forEach(l),Q=n(e),o(y.$$.fragment,e),V=n(e),o(k.$$.fragment,e),D=n(e),M=g(e,"P",{"data-svelte-h":!0}),w(M)!=="svelte-5jd4gg"&&(M.textContent=ye),O=n(e),b=g(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),w(b)!=="svelte-yucbvt"&&(b.innerHTML=ke),X=n(e),o(x.$$.fragment,e),K=n(e),o(C.$$.fragment,e),N=n(e),T=g(e,"P",{"data-svelte-h":!0}),w(T)!=="svelte-hqmq3z"&&(T.textContent=Me),Y=n(e),L=g(e,"UL",{"data-svelte-h":!0}),w(L)!=="svelte-fzdfht"&&(L.innerHTML=xe),ee=n(e),o(E.$$.fragment,e),te=n(e),_=g(e,"UL",{"data-svelte-h":!0}),w(_)!=="svelte-1kvd8am"&&(_.innerHTML=Ce),le=n(e),o(U.$$.fragment,e),ae=n(e),I=g(e,"P",{"data-svelte-h":!0}),w(I)!=="svelte-d94dpm"&&(I.innerHTML=Te),ie=n(e),o(S.$$.fragment,e),ne=n(e),H=g(e,"P",{"data-svelte-h":!0}),w(H)!=="svelte-12kzs6l"&&(H.textContent=Le),se=n(e),o(P.$$.fragment,e),oe=n(e),o(F.$$.fragment,e),re=n(e),o(G.$$.fragment,e),ue=n(e),o(z.$$.fragment,e),me=n(e),o(Z.$$.fragment,e),pe=n(e),h=g(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),be(h).forEach(l),fe=n(e),o(j.$$.fragment,e),ge=n(e),o(A.$$.fragment,e),de=n(e),q=g(e,"P",{"data-svelte-h":!0}),w(q)!=="svelte-xmt8gz"&&(q.textContent=_e),ce=n(e),$=g(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),be($).forEach(l),he=n(e),o(B.$$.fragment,e),$e=n(e),W=g(e,"P",{}),be(W).forEach(l),this.h()},h(){d(v,"name","hf:doc:metadata"),d(v,"content",Je),d(b,"class","tip"),Ie(h.src,Ee="https://openevals-open-benchmark-index.hf.space")||d(h,"src",Ee),d(h,"frameborder","0"),d(h,"width","850"),d(h,"height","450"),Ie($.src,Ue="https://openevals-evals.static.hf.space")||d($,"src",Ue),d($,"frameborder","0"),d($,"width","850"),d($,"height","450")},m(e,t){je(document.head,v),a(e,R,t),a(e,J,t),a(e,Q,t),r(y,e,t),a(e,V,t),r(k,e,t),a(e,D,t),a(e,M,t),a(e,O,t),a(e,b,t),a(e,X,t),r(x,e,t),a(e,K,t),r(C,e,t),a(e,N,t),a(e,T,t),a(e,Y,t),a(e,L,t),a(e,ee,t),r(E,e,t),a(e,te,t),a(e,_,t),a(e,le,t),r(U,e,t),a(e,ae,t),a(e,I,t),a(e,ie,t),r(S,e,t),a(e,ne,t),a(e,H,t),a(e,se,t),r(P,e,t),a(e,oe,t),r(F,e,t),a(e,re,t),r(G,e,t),a(e,ue,t),r(z,e,t),a(e,me,t),r(Z,e,t),a(e,pe,t),a(e,h,t),a(e,fe,t),r(j,e,t),a(e,ge,t),r(A,e,t),a(e,de,t),a(e,q,t),a(e,ce,t),a(e,$,t),a(e,he,t),r(B,e,t),a(e,$e,t),a(e,W,t),ve=!0},p:Pe,i(e){ve||(u(y.$$.fragment,e),u(k.$$.fragment,e),u(x.$$.fragment,e),u(C.$$.fragment,e),u(E.$$.fragment,e),u(U.$$.fragment,e),u(S.$$.fragment,e),u(P.$$.fragment,e),u(F.$$.fragment,e),u(G.$$.fragment,e),u(z.$$.fragment,e),u(Z.$$.fragment,e),u(j.$$.fragment,e),u(A.$$.fragment,e),u(B.$$.fragment,e),ve=!0)},o(e){m(y.$$.fragment,e),m(k.$$.fragment,e),m(x.$$.fragment,e),m(C.$$.fragment,e),m(E.$$.fragment,e),m(U.$$.fragment,e),m(S.$$.fragment,e),m(P.$$.fragment,e),m(F.$$.fragment,e),m(G.$$.fragment,e),m(z.$$.fragment,e),m(Z.$$.fragment,e),m(j.$$.fragment,e),m(A.$$.fragment,e),m(B.$$.fragment,e),ve=!1},d(e){e&&(l(R),l(J),l(Q),l(V),l(D),l(M),l(O),l(b),l(X),l(K),l(N),l(T),l(Y),l(L),l(ee),l(te),l(_),l(le),l(ae),l(I),l(ie),l(ne),l(H),l(se),l(oe),l(re),l(ue),l(me),l(pe),l(h),l(fe),l(ge),l(de),l(q),l(ce),l($),l(he),l($e),l(W)),l(v),p(y,e),p(k,e),p(x,e),p(C,e),p(E,e),p(U,e),p(S,e),p(P,e),p(F,e),p(G,e),p(z,e),p(Z,e),p(j,e),p(A,e),p(B,e)}}}const Je='{"title":"Lighteval","local":"lighteval","sections":[{"title":"Key Features","local":"key-features","sections":[{"title":"🚀 Multi-Backend Support","local":"-multi-backend-support","sections":[],"depth":3},{"title":"📊 Comprehensive Evaluation","local":"-comprehensive-evaluation","sections":[],"depth":3},{"title":"🔧 Easy Customization","local":"-easy-customization","sections":[],"depth":3},{"title":"☁️ Seamless Integration","local":"-seamless-integration","sections":[],"depth":3}],"depth":2},{"title":"Quick Start","local":"quick-start","sections":[{"title":"Installation","local":"installation","sections":[],"depth":3},{"title":"Basic Usage","local":"basic-usage","sections":[{"title":"Find a task","local":"find-a-task","sections":[],"depth":4},{"title":"Run your benchmark and push details to the hub","local":"run-your-benchmark-and-push-details-to-the-hub","sections":[],"depth":4}],"depth":3}],"depth":2}],"depth":1}';function We(we){return Fe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Oe extends Ge{constructor(v){super(),ze(this,v,We,Be,He,{})}}export{Oe as component}; | |
Xet Storage Details
- Size:
- 10.6 kB
- Xet hash:
- afe9d9fd4cc8fe2efe8266aa050269c19d0942f37d8a17465cbb1c19a2c99655
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.