Buckets:
| import{s as Mt,a as pt,n as ut,o as ft}from"../chunks/scheduler.3a17fb72.js";import{S as wt,i as yt,e as i,s as a,c as r,h as ot,a as m,d as l,b as n,f as Be,g as M,j as u,k as p,l as bt,m as s,n as f,t as w,o as y,p as o}from"../chunks/index.093f8863.js";import{C as ct,H as rt,E as ht}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.5e7ea2bd.js";import{C as b}from"../chunks/CodeBlock.09235327.js";function Tt(Ye){let T,le,ee,se,I,ae,W,ne,k,Ne=`Pick the right benchmarks with our benchmark finder: | |
| Search by language, task type, dataset name, or keywords.`,ie,v,ze="<p>Not all tasks are compatible with inspect-ai’s API as of yet, we are working on converting all of them !</p>",me,c,Qe,pe,G,Ee="Once you’ve chosen a benchmark, run it with <code>lighteval eval</code>. Below are examples for common setups.",re,x,Me,S,Fe="<li>Evaluate a model via Hugging Face Inference Providers.</li>",ue,H,fe,$,Xe="<li>Run multiple evals at the same time.</li>",we,_,ye,d,qe="<li>Compare providers for the same model.</li>",oe,V,be,A,Pe="You can also compare every providers serving one model in one line:",ce,R,he,U,De="<li>Evaluate a vLLM or SGLang model.</li>",Te,B,ve,g,Oe="<li>See the impact of few-shot on your model.</li>",$e,Y,de,Z,Ke="<li>Optimize custom server connections.</li>",Ue,N,ge,C,et="<li>Use multiple epochs for more reliable results.</li>",Ze,z,Ce,J,tt="<li>Push to the Hub to share results.</li>",Je,Q,je,E,lt="Resulting Space:",Le,h,st,Ie,j,at="<li>Change model behaviour</li>",We,F,nt="You can use any argument defined in inspect-ai’s API.",ke,X,Ge,L,it="<li>Use model-args to use any inference provider specific argument.</li>",xe,q,Se,P,He,D,mt="LightEval prints a per-model results table:",_e,O,Ve,K,Ae,te,Re;return I=new ct({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),W=new rt({props:{title:"Evaluate your model with Inspect-AI",local:"evaluate-your-model-with-inspect-ai",headingTag:"h1"}}),x=new rt({props:{title:"Examples",local:"examples",headingTag:"h3"}}),H=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMCUyMmhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMiUyMGdwcWElM0FkaWFtb25k",highlighted:'lighteval <span class="hljs-built_in">eval</span> <span class="hljs-string">"hf-inference-providers/openai/gpt-oss-20b"</span> gpqa:diamond',wrap:!1}}),_=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMCUyMmhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMiUyMGdwcWElM0FkaWFtb25kJTJDYWltZTI1",highlighted:'lighteval <span class="hljs-built_in">eval</span> <span class="hljs-string">"hf-inference-providers/openai/gpt-oss-20b"</span> gpqa:diamond,aime25',wrap:!1}}),V=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMCU1QyUwQSUyMCUyMCUyMCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUzQWZpcmV3b3Jrcy1haSUyMCU1QyUwQSUyMCUyMCUyMCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUzQXRvZ2V0aGVyJTIwJTVDJTBBJTIwJTIwJTIwJTIwaGYtaW5mZXJlbmNlLXByb3ZpZGVycyUyRm9wZW5haSUyRmdwdC1vc3MtMjBiJTNBbmViaXVzJTIwJTVDJTBBJTIwJTIwJTIwJTIwZ3BxYSUzQWRpYW1vbmQ=",highlighted:`lighteval <span class="hljs-built_in">eval</span> \\ | |
| hf-inference-providers/openai/gpt-oss-20b:fireworks-ai \\ | |
| hf-inference-providers/openai/gpt-oss-20b:together \\ | |
| hf-inference-providers/openai/gpt-oss-20b:nebius \\ | |
| gpqa:diamond`,wrap:!1}}),R=new b({props:{code:"JTIwJTIwJTIwJTIwaGYtaW5mZXJlbmNlLXByb3ZpZGVycyUyRm9wZW5haSUyRmdwdC1vc3MtMjBiJTNBYWxsJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIybGlnaHRldmFsJTdDZ3BxYSUzQWRpYW1vbmQlN0MwJTIy",highlighted:` hf-inference-providers/openai/gpt-oss-20b:all \\ | |
| <span class="hljs-string">"lighteval|gpqa:diamond|0"</span>`,wrap:!1}}),B=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMHZsbG0lMkZIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEzNU0tSW5zdHJ1Y3QlMjBncHFhJTNBZGlhbW9uZA==",highlighted:'lighteval <span class="hljs-built_in">eval</span> vllm/HuggingFaceTB/SmolLM-135M-Instruct gpqa:diamond',wrap:!1}}),Y=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMCUyMmdzbThrJTdDMCUyQ2dzbThrJTdDNSUyMg==",highlighted:'lighteval <span class="hljs-built_in">eval</span> hf-inference-providers/openai/gpt-oss-20b <span class="hljs-string">"gsm8k|0,gsm8k|5"</span>',wrap:!1}}),N=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMGdzbThrJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tYXgtY29ubmVjdGlvbnMlMjA1MCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tdGltZW91dCUyMDMwJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1yZXRyeS1vbi1lcnJvciUyMDElMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1heC1yZXRyaWVzJTIwMSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbWF4LXNhbXBsZXMlMjAxMA==",highlighted:`lighteval <span class="hljs-built_in">eval</span> hf-inference-providers/openai/gpt-oss-20b gsm8k \\ | |
| --max-connections 50 \\ | |
| --<span class="hljs-built_in">timeout</span> 30 \\ | |
| --retry-on-error 1 \\ | |
| --max-retries 1 \\ | |
| --max-samples 10`,wrap:!1}}),z=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMGFpbWUyNSUyMC0tZXBvY2hzJTIwMTYlMjAtLWVwb2Nocy1yZWR1Y2VyJTIwJTIycGFzc19hdF80JTIy",highlighted:'lighteval <span class="hljs-built_in">eval</span> hf-inference-providers/openai/gpt-oss-20b aime25 --epochs 16 --epochs-reducer <span class="hljs-string">"pass_at_4"</span>',wrap:!1}}),Q=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMGhsZSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tYnVuZGxlLWRpciUyMGdwdC1vc3MtYnVuZGxlJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1yZXBvLWlkJTIwT3BlbkV2YWxzJTJGZXZhbHMlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1heC1zYW1wbGVzJTIwMTAw",highlighted:`lighteval <span class="hljs-built_in">eval</span> hf-inference-providers/openai/gpt-oss-20b hle \\ | |
| --bundle-dir gpt-oss-bundle \\ | |
| --repo-id OpenEvals/evals \\ | |
| --max-samples 100`,wrap:!1}}),X=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMGhmLWluZmVyZW5jZS1wcm92aWRlcnMlMkZvcGVuYWklMkZncHQtb3NzLTIwYiUyMGFpbWUyNSUyMC0tdGVtcGVyYXR1cmUlMjAwLjE=",highlighted:'lighteval <span class="hljs-built_in">eval</span> hf-inference-providers/openai/gpt-oss-20b aime25 --temperature 0.1',wrap:!1}}),q=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMGdvb2dsZSUyRmdlbWluaS0yLjUtcHJvJTIwYWltZTI1JTIwLS1tb2RlbC1hcmdzJTIwbG9jYXRpb24lM0R1cy1lYXN0NQ==",highlighted:'lighteval <span class="hljs-built_in">eval</span> google/gemini-2.5-pro aime25 --model-args location=us-east5',wrap:!1}}),P=new b({props:{code:"bGlnaHRldmFsJTIwZXZhbCUyMG9wZW5haSUyRmdwdC00byUyMGdwcWElM0FkaWFtb25kJTIwLS1tb2RlbC1hcmdzJTIwc2VydmljZV90aWVyJTNEZmxleCUyQ2NsaWVudF90aW1lb3V0JTNEMTIwMA==",highlighted:'lighteval <span class="hljs-built_in">eval</span> openai/gpt-4o gpqa:diamond --model-args service_tier=flex,client_timeout=1200',wrap:!1}}),O=new b({props:{code:"Q29tcGxldGVkJTIwYWxsJTIwdGFza3MlMjBpbiUyMCdsaWdodGV2YWwtbG9ncyclMjBzdWNjZXNzZnVsbHklMEElMEElN0MlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBNb2RlbCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3Q2dwcWElN0NncHFhJTNBZGlhbW9uZCU3QyUwQSU3Qy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSU3Qy0tLSUzQSU3Qy0tLS0tLS0tLS0tJTNBJTdDJTBBJTdDdmxsbSUyRkh1Z2dpbmdGYWNlVEIlMkZTbW9sTE0tMTM1TS1JbnN0cnVjdCU3QzAuMDElN0MlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAwLjAxJTdDJTBBJTBBcmVzdWx0cyUyMHNhdmVkJTIwdG8lMjBsaWdodGV2YWwtbG9ncyUwQXJ1biUyMCUyMmluc3BlY3QlMjB2aWV3JTIwLS1sb2ctZGlyJTIwbGlnaHRldmFsLWxvZ3MlMjIlMjB0byUyMHZpZXclMjB0aGUlMjByZXN1bHRz",highlighted:`<span class="hljs-title class_">Completed</span> all tasks <span class="hljs-keyword">in</span> <span class="hljs-string">'lighteval-logs'</span> successfully | |
| | <span class="hljs-title class_">Model</span> |gpqa|<span class="hljs-symbol">gpqa:</span>diamond| | |
| |---------------------------------------|---<span class="hljs-symbol">:|-----------</span><span class="hljs-symbol">:|</span> | |
| |vllm/<span class="hljs-title class_">HuggingFaceTB</span>/<span class="hljs-title class_">SmolLM</span><span class="hljs-number">-135</span>M-<span class="hljs-title class_">Instruct</span>|<span class="hljs-number">0.01</span>| <span class="hljs-number">0.01</span>| | |
| results saved to lighteval-logs | |
| run <span class="hljs-string">"inspect view --log-dir lighteval-logs"</span> to view the results`,wrap:!1}}),K=new ht({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/inspect-ai.mdx"}}),{c(){T=i("meta"),le=a(),ee=i("p"),se=a(),r(I.$$.fragment),ae=a(),r(W.$$.fragment),ne=a(),k=i("p"),k.textContent=Ne,ie=a(),v=i("blockquote"),v.innerHTML=ze,me=a(),c=i("iframe"),pe=a(),G=i("p"),G.innerHTML=Ee,re=a(),r(x.$$.fragment),Me=a(),S=i("ol"),S.innerHTML=Fe,ue=a(),r(H.$$.fragment),fe=a(),$=i("ol"),$.innerHTML=Xe,we=a(),r(_.$$.fragment),ye=a(),d=i("ol"),d.innerHTML=qe,oe=a(),r(V.$$.fragment),be=a(),A=i("p"),A.textContent=Pe,ce=a(),r(R.$$.fragment),he=a(),U=i("ol"),U.innerHTML=De,Te=a(),r(B.$$.fragment),ve=a(),g=i("ol"),g.innerHTML=Oe,$e=a(),r(Y.$$.fragment),de=a(),Z=i("ol"),Z.innerHTML=Ke,Ue=a(),r(N.$$.fragment),ge=a(),C=i("ol"),C.innerHTML=et,Ze=a(),r(z.$$.fragment),Ce=a(),J=i("ol"),J.innerHTML=tt,Je=a(),r(Q.$$.fragment),je=a(),E=i("p"),E.textContent=lt,Le=a(),h=i("iframe"),Ie=a(),j=i("ol"),j.innerHTML=at,We=a(),F=i("p"),F.textContent=nt,ke=a(),r(X.$$.fragment),Ge=a(),L=i("ol"),L.innerHTML=it,xe=a(),r(q.$$.fragment),Se=a(),r(P.$$.fragment),He=a(),D=i("p"),D.textContent=mt,_e=a(),r(O.$$.fragment),Ve=a(),r(K.$$.fragment),Ae=a(),te=i("p"),this.h()},l(e){const t=ot("svelte-u9bgzb",document.head);T=m(t,"META",{name:!0,content:!0}),t.forEach(l),le=n(e),ee=m(e,"P",{}),Be(ee).forEach(l),se=n(e),M(I.$$.fragment,e),ae=n(e),M(W.$$.fragment,e),ne=n(e),k=m(e,"P",{"data-svelte-h":!0}),u(k)!=="svelte-wvxpw7"&&(k.textContent=Ne),ie=n(e),v=m(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(v)!=="svelte-13z5e58"&&(v.innerHTML=ze),me=n(e),c=m(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),Be(c).forEach(l),pe=n(e),G=m(e,"P",{"data-svelte-h":!0}),u(G)!=="svelte-1s8wmkx"&&(G.innerHTML=Ee),re=n(e),M(x.$$.fragment,e),Me=n(e),S=m(e,"OL",{"data-svelte-h":!0}),u(S)!=="svelte-1n0ds6r"&&(S.innerHTML=Fe),ue=n(e),M(H.$$.fragment,e),fe=n(e),$=m(e,"OL",{start:!0,"data-svelte-h":!0}),u($)!=="svelte-158v33h"&&($.innerHTML=Xe),we=n(e),M(_.$$.fragment,e),ye=n(e),d=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(d)!=="svelte-12sgcy5"&&(d.innerHTML=qe),oe=n(e),M(V.$$.fragment,e),be=n(e),A=m(e,"P",{"data-svelte-h":!0}),u(A)!=="svelte-r3sluq"&&(A.textContent=Pe),ce=n(e),M(R.$$.fragment,e),he=n(e),U=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(U)!=="svelte-h63d4v"&&(U.innerHTML=De),Te=n(e),M(B.$$.fragment,e),ve=n(e),g=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(g)!=="svelte-1lgb0oc"&&(g.innerHTML=Oe),$e=n(e),M(Y.$$.fragment,e),de=n(e),Z=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(Z)!=="svelte-xic7d6"&&(Z.innerHTML=Ke),Ue=n(e),M(N.$$.fragment,e),ge=n(e),C=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(C)!=="svelte-z8xpgu"&&(C.innerHTML=et),Ze=n(e),M(z.$$.fragment,e),Ce=n(e),J=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(J)!=="svelte-bkjaqd"&&(J.innerHTML=tt),Je=n(e),M(Q.$$.fragment,e),je=n(e),E=m(e,"P",{"data-svelte-h":!0}),u(E)!=="svelte-xmt8gz"&&(E.textContent=lt),Le=n(e),h=m(e,"IFRAME",{src:!0,frameborder:!0,width:!0,height:!0}),Be(h).forEach(l),Ie=n(e),j=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(j)!=="svelte-65smkt"&&(j.innerHTML=at),We=n(e),F=m(e,"P",{"data-svelte-h":!0}),u(F)!=="svelte-1hiqzh0"&&(F.textContent=nt),ke=n(e),M(X.$$.fragment,e),Ge=n(e),L=m(e,"OL",{start:!0,"data-svelte-h":!0}),u(L)!=="svelte-p39luw"&&(L.innerHTML=it),xe=n(e),M(q.$$.fragment,e),Se=n(e),M(P.$$.fragment,e),He=n(e),D=m(e,"P",{"data-svelte-h":!0}),u(D)!=="svelte-g5np0a"&&(D.textContent=mt),_e=n(e),M(O.$$.fragment,e),Ve=n(e),M(K.$$.fragment,e),Ae=n(e),te=m(e,"P",{}),Be(te).forEach(l),this.h()},h(){p(T,"name","hf:doc:metadata"),p(T,"content",vt),p(v,"class","warning"),pt(c.src,Qe="https://openevals-open-benchmark-index.hf.space")||p(c,"src",Qe),p(c,"frameborder","0"),p(c,"width","850"),p(c,"height","450"),p($,"start","2"),p(d,"start","3"),p(U,"start","4"),p(g,"start","5"),p(Z,"start","6"),p(C,"start","7"),p(J,"start","8"),pt(h.src,st="https://openevals-evals.static.hf.space")||p(h,"src",st),p(h,"frameborder","0"),p(h,"width","850"),p(h,"height","450"),p(j,"start","9"),p(L,"start","10")},m(e,t){bt(document.head,T),s(e,le,t),s(e,ee,t),s(e,se,t),f(I,e,t),s(e,ae,t),f(W,e,t),s(e,ne,t),s(e,k,t),s(e,ie,t),s(e,v,t),s(e,me,t),s(e,c,t),s(e,pe,t),s(e,G,t),s(e,re,t),f(x,e,t),s(e,Me,t),s(e,S,t),s(e,ue,t),f(H,e,t),s(e,fe,t),s(e,$,t),s(e,we,t),f(_,e,t),s(e,ye,t),s(e,d,t),s(e,oe,t),f(V,e,t),s(e,be,t),s(e,A,t),s(e,ce,t),f(R,e,t),s(e,he,t),s(e,U,t),s(e,Te,t),f(B,e,t),s(e,ve,t),s(e,g,t),s(e,$e,t),f(Y,e,t),s(e,de,t),s(e,Z,t),s(e,Ue,t),f(N,e,t),s(e,ge,t),s(e,C,t),s(e,Ze,t),f(z,e,t),s(e,Ce,t),s(e,J,t),s(e,Je,t),f(Q,e,t),s(e,je,t),s(e,E,t),s(e,Le,t),s(e,h,t),s(e,Ie,t),s(e,j,t),s(e,We,t),s(e,F,t),s(e,ke,t),f(X,e,t),s(e,Ge,t),s(e,L,t),s(e,xe,t),f(q,e,t),s(e,Se,t),f(P,e,t),s(e,He,t),s(e,D,t),s(e,_e,t),f(O,e,t),s(e,Ve,t),f(K,e,t),s(e,Ae,t),s(e,te,t),Re=!0},p:ut,i(e){Re||(w(I.$$.fragment,e),w(W.$$.fragment,e),w(x.$$.fragment,e),w(H.$$.fragment,e),w(_.$$.fragment,e),w(V.$$.fragment,e),w(R.$$.fragment,e),w(B.$$.fragment,e),w(Y.$$.fragment,e),w(N.$$.fragment,e),w(z.$$.fragment,e),w(Q.$$.fragment,e),w(X.$$.fragment,e),w(q.$$.fragment,e),w(P.$$.fragment,e),w(O.$$.fragment,e),w(K.$$.fragment,e),Re=!0)},o(e){y(I.$$.fragment,e),y(W.$$.fragment,e),y(x.$$.fragment,e),y(H.$$.fragment,e),y(_.$$.fragment,e),y(V.$$.fragment,e),y(R.$$.fragment,e),y(B.$$.fragment,e),y(Y.$$.fragment,e),y(N.$$.fragment,e),y(z.$$.fragment,e),y(Q.$$.fragment,e),y(X.$$.fragment,e),y(q.$$.fragment,e),y(P.$$.fragment,e),y(O.$$.fragment,e),y(K.$$.fragment,e),Re=!1},d(e){e&&(l(le),l(ee),l(se),l(ae),l(ne),l(k),l(ie),l(v),l(me),l(c),l(pe),l(G),l(re),l(Me),l(S),l(ue),l(fe),l($),l(we),l(ye),l(d),l(oe),l(be),l(A),l(ce),l(he),l(U),l(Te),l(ve),l(g),l($e),l(de),l(Z),l(Ue),l(ge),l(C),l(Ze),l(Ce),l(J),l(Je),l(je),l(E),l(Le),l(h),l(Ie),l(j),l(We),l(F),l(ke),l(Ge),l(L),l(xe),l(Se),l(He),l(D),l(_e),l(Ve),l(Ae),l(te)),l(T),o(I,e),o(W,e),o(x,e),o(H,e),o(_,e),o(V,e),o(R,e),o(B,e),o(Y,e),o(N,e),o(z,e),o(Q,e),o(X,e),o(q,e),o(P,e),o(O,e),o(K,e)}}}const vt='{"title":"Evaluate your model with Inspect-AI","local":"evaluate-your-model-with-inspect-ai","sections":[{"title":"Examples","local":"examples","sections":[],"depth":3}],"depth":1}';function $t(Ye){return ft(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ct extends wt{constructor(T){super(),yt(this,T,$t,Tt,Mt,{})}}export{Ct as component}; | |
Xet Storage Details
- Size:
- 14.6 kB
- Xet hash:
- 05628929bb4946777f83d9a96d3275b172a6a68d9172769997f70e0fa44a1819
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.