Buckets:
| import{s as Ue,a as Le,n as Ie,o as Se}from"../chunks/scheduler.3a17fb72.js";import{S as Pe,i as Ge,e as f,s as i,c as s,h as Fe,a as g,d as l,b as n,f as he,g as r,j as J,k as h,l as He,m as a,n as o,t as m,o as u,p}from"../chunks/index.093f8863.js";import{C as Ze,H as d,E as je}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e28c70f3.js";import{C as _e}from"../chunks/CodeBlock.3509844b.js";function ze(ve){let v,q,W,R,b,Q,w,V,y,be=`🤗 Lighteval is your all-in-one toolkit for evaluating Large Language Models | |
| (LLMs) across multiple backends with ease. Dive deep into your model’s | |
| performance by saving and exploring detailed, sample-by-sample results to debug | |
| and see how your models stack up.`,D,k,X,M,N,x,we="Evaluate your models using the most popular and efficient inference backends:",K,C,ye=`<li><code>eval</code>: Use <a href="https://inspect.aisi.org.uk/" rel="nofollow">inspect-ai</a> as backend to evaluate and inspect your models ! (prefered way)</li> <li><code>transformers</code>: Evaluate models on CPU or one or more GPUs using <a href="https://github.com/huggingface/transformers" rel="nofollow">🤗 | |
| Accelerate</a></li> <li><code>nanotron</code>: Evaluate models in distributed settings using <a href="https://github.com/huggingface/nanotron" rel="nofollow">⚡️ | |
| Nanotron</a></li> <li><code>vllm</code>: Evaluate models on one or more GPUs using <a href="https://github.com/vllm-project/vllm" rel="nofollow">🚀 | |
| VLLM</a></li> <li><code>custom</code>: Evaluate custom models (can be anything)</li> <li><code>sglang</code>: Evaluate models using <a href="https://github.com/sgl-project/sglang" rel="nofollow">SGLang</a> as backend</li> <li><code>inference-endpoint</code>: Evaluate models using Hugging Face’s <a href="https://huggingface.co/inference-endpoints/dedicated" rel="nofollow">Inference Endpoints API</a></li> <li><code>tgi</code>: Evaluate models using <a href="https://huggingface.co/docs/text-generation-inference/en/index" rel="nofollow">🔗 Text Generation Inference</a> running locally</li> <li><code>litellm</code>: Evaluate models on any compatible API using <a href="https://www.litellm.ai/" rel="nofollow">LiteLLM</a></li> <li><code>inference-providers</code>: Evaluate models using <a href="https://huggingface.co/docs/inference-providers/en/index" rel="nofollow">HuggingFace’s inference providers</a> as backend**: Distributed training and evaluation</li>`,O,T,Y,E,ke="<li><strong>Extensive Task Library</strong>: 1000s pre-built evaluation tasks</li> <li><strong>Custom Task Creation</strong>: Build your own evaluation tasks</li> <li><strong>Flexible Metrics</strong>: Support for custom metrics and scoring</li> <li><strong>Detailed Analysis</strong>: Sample-by-sample results for deep insights</li>",ee,L,te,_,Me=`Customization at your fingertips: create <a href="adding-a-custom-task">new tasks</a>, | |
| <a href="adding-a-new-metric">metrics</a> or <a href="evaluating-a-custom-model">model</a> tailored to your needs, or browse all our existing tasks and metrics.`,le,U,ae,I,xe="Seamlessly experiment, benchmark, and store your results on the Hugging Face Hub, S3, or locally.",ie,S,ne,P,se,G,re,F,oe,H,me,$,Ce,ue,Z,pe,j,fe,z,Te="Resulting Space:",ge,c,Ee,de,A,$e,B,ce;return b=new Ze({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new d({props:{title:"Lighteval",local:"lighteval",headingTag:"h1"}}),k=new d({props:{title:"Key Features",local:"key-features",headingTag:"h2"}}),M=new d({props:{title:"🚀 Multi-Backend Support",local:"-multi-backend-support",headingTag:"h3"}}),T=new d({props:{title:"📊 Comprehensive Evaluation",local:"-comprehensive-evaluation",headingTag:"h3"}}),L=new d({props:{title:"🔧 Easy Customization",local:"-easy-customization",headingTag:"h3"}}),U=new d({props:{title:"☁️ Seamless Integration",local:"-seamless-integration",headingTag:"h3"}}),S=new d({props:{title:"Quick Start",local:"quick-start",headingTag:"h2"}}),P=new d({props:{title:"Installation",local:"installation",headingTag:"h3"}}),G=new _e({props:{code:"cGlwJTIwaW5zdGFsbCUyMGxpZ2h0ZXZhbA==",highlighted:"pip install lighteval",wrap:!1}}),F=new d({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h3"}}),H=new d({props:{title:"Find a task",local:"find-a-task",headingTag:"h4"}}),Z=new d({props:{title:"Run your benchmark and push details to the hub",local:"run-your-benchmark-and-push-details-to-the-hub",headingTag:"h4"}}),j=new _e({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMCUyMmhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMGdwcWElM0FkaWFtb25kJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1idW5kbGUtZGlyJTIwZ3B0LW9zcy1idW5kbGUlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXJlcG8taWQlMjBPcGVuRXZhbHMlMkZldmFscw==",highlighted:`lighteval <span class="hljs-built_in">eval</span> <span class="hljs-string">"hf-inference-providers/openai/gpt-oss-20b"</span> \\ | |
| gpqa:diamond \\ | |
| --bundle-dir gpt-oss-bundle \\ | |
| --repo-id OpenEvals/evals`,wrap:!1}}),A=new je({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/index.mdx"}}),{c(){v=f("meta"),q=i(),W=f("p"),R=i(),s(b.$$.fragment),Q=i(),s(w.$$.fragment),V=i(),y=f("p"),y.textContent=be,D=i(),s(k.$$.fragment),X=i(),s(M.$$.fragment),N=i(),x=f("p"),x.textContent=we,K=i(),C=f("ul"),C.innerHTML=ye,O=i(),s(T.$$.fragment),Y=i(),E=f("ul"),E.innerHTML=ke,ee=i(),s(L.$$.fragment),te=i(),_=f("p"),_.innerHTML=Me,le=i(),s(U.$$.fragment),ae=i(),I=f("p"),I.textContent=xe,ie=i(),s(S.$$.fragment),ne=i(),s(P.$$.fragment),se=i(),s(G.$$.fragment),re=i(),s(F.$$.fragment),oe=i(),s(H.$$.fragment),me=i(),$=f("iframe"),ue=i(),s(Z.$$.fragment),pe=i(),s(j.$$.fragment),fe=i(),z=f("p"),z.textContent=Te,ge=i(),c=f("iframe"),de=i(),s(A.$$.fragment),$e=i(),B=f("p"),this.h()},l(e){const t=Fe("svelte-u9bgzb",document.head);v=g(t,"META",{name:!0,content:!0}),t.forEach(l),q=n(e),W=g(e,"P",{}),he(W).forEach(l),R=n(e),r(b.$$.fragment,e),Q=n(e),r(w.$$.fragment,e),V=n(e),y=g(e,"P",{"data-svelte-h":!0}),J(y)!=="svelte-5jd4gg"&&(y.textContent=be),D=n(e),r(k.$$.fragment,e),X=n(e),r(M.$$.fragment,e),N=n(e),x=g(e,"P",{"data-svelte-h":!0}),J(x)!=="svelte-hqmq3z"&&(x.textContent=we),K=n(e),C=g(e,"UL",{"data-svelte-h":!0}),J(C)!=="svelte-d6biun"&&(C.innerHTML=ye),O=n(e),r(T.$$.fragment,e),Y=n(e),E=g(e,"UL",{"data-svelte-h":!0}),J(E)!=="svelte-1kvd8am"&&(E.innerHTML=ke),ee=n(e),r(L.$$.fragment,e),te=n(e),_=g(e,"P",{"data-svelte-h":!0}),J(_)!=="svelte-d94dpm"&&(_.innerHTML=Me),le=n(e),r(U.$$.fragment,e),ae=n(e),I=g(e,"P",{"data-svelte-h":!0}),J(I)!=="svelte-12kzs6l"&&(I.textContent=xe),ie=n(e),r(S.$$.fragment,e),ne=n(e),r(P.$$.fragment,e),se=n(e),r(G.$$.fragment,e),re=n(e),r(F.$$.fragment,e),oe=n(e),r(H.$$.fragment,e),me=n(e),$=g(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),he($).forEach(l),ue=n(e),r(Z.$$.fragment,e),pe=n(e),r(j.$$.fragment,e),fe=n(e),z=g(e,"P",{"data-svelte-h":!0}),J(z)!=="svelte-xmt8gz"&&(z.textContent=Te),ge=n(e),c=g(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),he(c).forEach(l),de=n(e),r(A.$$.fragment,e),$e=n(e),B=g(e,"P",{}),he(B).forEach(l),this.h()},h(){h(v,"name","hf:doc:metadata"),h(v,"content",Ae),Le($.src,Ce="https://openevals-open-benchmark-index.hf.space")||h($,"src",Ce),h($,"frameborder","0"),h($,"width","850"),h($,"height","450"),Le(c.src,Ee="https://openevals-evals.static.hf.space")||h(c,"src",Ee),h(c,"frameborder","0"),h(c,"width","850"),h(c,"height","450")},m(e,t){He(document.head,v),a(e,q,t),a(e,W,t),a(e,R,t),o(b,e,t),a(e,Q,t),o(w,e,t),a(e,V,t),a(e,y,t),a(e,D,t),o(k,e,t),a(e,X,t),o(M,e,t),a(e,N,t),a(e,x,t),a(e,K,t),a(e,C,t),a(e,O,t),o(T,e,t),a(e,Y,t),a(e,E,t),a(e,ee,t),o(L,e,t),a(e,te,t),a(e,_,t),a(e,le,t),o(U,e,t),a(e,ae,t),a(e,I,t),a(e,ie,t),o(S,e,t),a(e,ne,t),o(P,e,t),a(e,se,t),o(G,e,t),a(e,re,t),o(F,e,t),a(e,oe,t),o(H,e,t),a(e,me,t),a(e,$,t),a(e,ue,t),o(Z,e,t),a(e,pe,t),o(j,e,t),a(e,fe,t),a(e,z,t),a(e,ge,t),a(e,c,t),a(e,de,t),o(A,e,t),a(e,$e,t),a(e,B,t),ce=!0},p:Ie,i(e){ce||(m(b.$$.fragment,e),m(w.$$.fragment,e),m(k.$$.fragment,e),m(M.$$.fragment,e),m(T.$$.fragment,e),m(L.$$.fragment,e),m(U.$$.fragment,e),m(S.$$.fragment,e),m(P.$$.fragment,e),m(G.$$.fragment,e),m(F.$$.fragment,e),m(H.$$.fragment,e),m(Z.$$.fragment,e),m(j.$$.fragment,e),m(A.$$.fragment,e),ce=!0)},o(e){u(b.$$.fragment,e),u(w.$$.fragment,e),u(k.$$.fragment,e),u(M.$$.fragment,e),u(T.$$.fragment,e),u(L.$$.fragment,e),u(U.$$.fragment,e),u(S.$$.fragment,e),u(P.$$.fragment,e),u(G.$$.fragment,e),u(F.$$.fragment,e),u(H.$$.fragment,e),u(Z.$$.fragment,e),u(j.$$.fragment,e),u(A.$$.fragment,e),ce=!1},d(e){e&&(l(q),l(W),l(R),l(Q),l(V),l(y),l(D),l(X),l(N),l(x),l(K),l(C),l(O),l(Y),l(E),l(ee),l(te),l(_),l(le),l(ae),l(I),l(ie),l(ne),l(se),l(re),l(oe),l(me),l($),l(ue),l(pe),l(fe),l(z),l(ge),l(c),l(de),l($e),l(B)),l(v),p(b,e),p(w,e),p(k,e),p(M,e),p(T,e),p(L,e),p(U,e),p(S,e),p(P,e),p(G,e),p(F,e),p(H,e),p(Z,e),p(j,e),p(A,e)}}}const Ae='{"title":"Lighteval","local":"lighteval","sections":[{"title":"Key Features","local":"key-features","sections":[{"title":"🚀 Multi-Backend Support","local":"-multi-backend-support","sections":[],"depth":3},{"title":"📊 Comprehensive Evaluation","local":"-comprehensive-evaluation","sections":[],"depth":3},{"title":"🔧 Easy Customization","local":"-easy-customization","sections":[],"depth":3},{"title":"☁️ Seamless Integration","local":"-seamless-integration","sections":[],"depth":3}],"depth":2},{"title":"Quick Start","local":"quick-start","sections":[{"title":"Installation","local":"installation","sections":[],"depth":3},{"title":"Basic Usage","local":"basic-usage","sections":[{"title":"Find a task","local":"find-a-task","sections":[],"depth":4},{"title":"Run your benchmark and push details to the hub","local":"run-your-benchmark-and-push-details-to-the-hub","sections":[],"depth":4}],"depth":3}],"depth":2}],"depth":1}';function Je(ve){return Se(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Qe extends Pe{constructor(v){super(),Ge(this,v,Je,ze,Ue,{})}}export{Qe as component}; | |
Xet Storage Details
- Size:
- 10 kB
- Xet hash:
- 49f215b98866e3eda9b0aedc873a4e9afee3b7151693c714076f01d093fe7d29
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.