Buckets:
| import{s as ve,o as Ie,n as ge}from"../chunks/scheduler.7da89386.js";import{S as Ze,i as ke,g as r,s as a,r as o,A as Ce,h as m,f as t,c as i,j as _e,u as M,x as c,k as $e,y as Ge,a as s,v as u,d,t as f,w as h}from"../chunks/index.20910acc.js";import{T as Ue}from"../chunks/Tip.53e22153.js";import{C as z}from"../chunks/CodeBlock.143bd81e.js";import{H as Xe,E as Re}from"../chunks/getInferenceSnippets.360e857f.js";function Ee(N){let n,y='To see an example of a custom metric added along with a custom task, look at <a href="">the IFEval custom task</a>.';return{c(){n=r("p"),n.innerHTML=y},l(p){n=m(p,"P",{"data-svelte-h":!0}),c(n)!=="svelte-73seg9"&&(n.innerHTML=y)},m(p,j){s(p,n,j)},p:ge,d(p){p&&t(n)}}}function Ve(N){let n,y=`To contribute your custom metric to the lighteval repo, you would first need | |
| to install the required dev dependencies by running <code>pip install -e .[dev]</code> | |
| and then run <code>pre-commit install</code> to install the pre-commit hooks.`;return{c(){n=r("p"),n.innerHTML=y},l(p){n=m(p,"P",{"data-svelte-h":!0}),c(n)!=="svelte-l65q4m"&&(n.innerHTML=y)},m(p,j){s(p,n,j)},p:ge,d(p){p&&t(n)}}}function Be(N){let n,y,p,j,T,Y,b,oe=`First, check if you can use one of the parametrized functions in | |
| <a href="package_reference/metrics#corpus-metrics">Corpus Metrics</a> or | |
| <a href="package_reference/metrics#sample-metrics">Sample Metrics</a>.`,L,_,Me="If not, you can use the <code>custom_task</code> system to register your new metric:",S,w,x,J,Q,$,ue="<li>Create a new Python file which should contain the full logic of your metric.</li> <li>The file also needs to start with these imports</li>",F,U,q,g,de=`You need to define a sample level metric, all sample level metrics will have the same signature, taking a | |
| <code>~lighteval.types.Doc</code> and a <code>~lighteval.types.ModelResponse</code>. The metric should return a float or a | |
| boolean.`,P,v,D,I,fe="Here the sample level metric only returns one metric, if you want to return multiple metrics per sample you need to return a dictionary with the metrics as keys and the values as values.",K,Z,O,k,he="Then, you can define an aggregation function if needed, a common aggregation function is <code>np.mean</code>.",ee,C,le,G,ye=`Finally, you can define your metric. If it’s a sample level metric, you can use the following code | |
| with <a href="/docs/lighteval/pr_860/en/package_reference/metrics#lighteval.metrics.utils.metric_utils.SampleLevelMetric">SampleLevelMetric</a>:`,te,X,se,R,je=`If your metric defines multiple metrics per sample, you can use the following code | |
| with <a href="/docs/lighteval/pr_860/en/package_reference/metrics#lighteval.metrics.utils.metric_utils.SampleLevelMetricGrouping">SampleLevelMetricGrouping</a>:`,ne,E,ae,V,we=`To finish, add the following, so that it adds your metric to our metrics list | |
| when loaded as a module.`,ie,B,pe,W,Je="You can then give your custom metric to lighteval by using <code>--custom-tasks path_to_your_file</code> when launching it.",re,A,me,H,ce;return T=new Xe({props:{title:"Adding a New Metric",local:"adding-a-new-metric",headingTag:"h1"}}),w=new Ue({props:{warning:!1,$$slots:{default:[Ee]},$$scope:{ctx:N}}}),J=new Ue({props:{warning:!0,$$slots:{default:[Ve]},$$scope:{ctx:N}}}),U=new z({props:{code:"ZnJvbSUyMGFlbnVtJTIwaW1wb3J0JTIwZXh0ZW5kX2VudW0lMEFmcm9tJTIwbGlnaHRldmFsLm1ldHJpY3MlMjBpbXBvcnQlMjBNZXRyaWNz",highlighted:`<span class="hljs-keyword">from</span> aenum <span class="hljs-keyword">import</span> extend_enum | |
| <span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics`,wrap:!1}}),v=new z({props:{code:"ZGVmJTIwY3VzdG9tX21ldHJpYyhkb2MlM0ElMjBEb2MlMkMlMjBtb2RlbF9yZXNwb25zZSUzQSUyME1vZGVsUmVzcG9uc2UpJTIwLSUzRSUyMGJvb2wlM0ElMEElMjAlMjAlMjAlMjByZXNwb25zZSUyMCUzRCUyMG1vZGVsX3Jlc3BvbnNlLnRleHQlNUIwJTVEJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwcmVzcG9uc2UlMjAlM0QlM0QlMjBkb2MuY2hvaWNlcyU1QmRvYy5nb2xkX2luZGV4JTVE",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">custom_metric</span>(<span class="hljs-params">doc: Doc, model_response: ModelResponse</span>) -> <span class="hljs-built_in">bool</span>: | |
| response = model_response.text[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> response == doc.choices[doc.gold_index]`,wrap:!1}}),Z=new z({props:{code:"ZGVmJTIwY3VzdG9tX21ldHJpYyhkb2MlM0ElMjBEb2MlMkMlMjBtb2RlbF9yZXNwb25zZSUzQSUyME1vZGVsUmVzcG9uc2UpJTIwLSUzRSUyMGRpY3QlM0ElMEElMjAlMjAlMjAlMjByZXNwb25zZSUyMCUzRCUyMG1vZGVsX3Jlc3BvbnNlLnRleHQlNUIwJTVEJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTdCJTIyYWNjdXJhY3klMjIlM0ElMjByZXNwb25zZSUyMCUzRCUzRCUyMGRvYy5jaG9pY2VzJTVCZG9jLmdvbGRfaW5kZXglNUQlMkMlMjAlMjJvdGhlcl9tZXRyaWMlMjIlM0ElMjAwLjUlN0Q=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">custom_metric</span>(<span class="hljs-params">doc: Doc, model_response: ModelResponse</span>) -> <span class="hljs-built_in">dict</span>: | |
| response = model_response.text[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"accuracy"</span>: response == doc.choices[doc.gold_index], <span class="hljs-string">"other_metric"</span>: <span class="hljs-number">0.5</span>}`,wrap:!1}}),C=new z({props:{code:"ZGVmJTIwYWdnX2Z1bmN0aW9uKGl0ZW1zKSUzQSUwQSUyMCUyMCUyMCUyMGZsYXRfaXRlbXMlMjAlM0QlMjAlNUJpdGVtJTIwZm9yJTIwc3VibGlzdCUyMGluJTIwaXRlbXMlMjBmb3IlMjBpdGVtJTIwaW4lMjBzdWJsaXN0JTVEJTBBJTIwJTIwJTIwJTIwc2NvcmUlMjAlM0QlMjBzdW0oZmxhdF9pdGVtcyklMjAlMkYlMjBsZW4oZmxhdF9pdGVtcyklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBzY29yZQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">agg_function</span>(<span class="hljs-params">items</span>): | |
| flat_items = [item <span class="hljs-keyword">for</span> sublist <span class="hljs-keyword">in</span> items <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> sublist] | |
| score = <span class="hljs-built_in">sum</span>(flat_items) / <span class="hljs-built_in">len</span>(flat_items) | |
| <span class="hljs-keyword">return</span> score`,wrap:!1}}),X=new z({props:{code:"bXlfY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTdCY3VzdG9tX21ldHJpY19uYW1lJTdEJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRCU3QmVpdGhlciUyMFRydWUlMjBvciUyMEZhbHNlJTdEJTJDJTBBJTIwJTIwJTIwJTIwY2F0ZWdvcnklM0QlN0JTYW1wbGluZ01ldGhvZCU3RCUyQyUwQSUyMCUyMCUyMCUyMHNhbXBsZV9sZXZlbF9mbiUzRGN1c3RvbV9tZXRyaWMlMkMlMEElMjAlMjAlMjAlMjBjb3JwdXNfbGV2ZWxfZm4lM0RhZ2dfZnVuY3Rpb24lMkMlMEEp",highlighted:`my_custom_metric = SampleLevelMetric( | |
| metric_name={custom_metric_name}, | |
| higher_is_better={either <span class="hljs-literal">True</span> <span class="hljs-keyword">or</span> <span class="hljs-literal">False</span>}, | |
| category={SamplingMethod}, | |
| sample_level_fn=custom_metric, | |
| corpus_level_fn=agg_function, | |
| )`,wrap:!1}}),E=new z({props:{code:"Y3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljR3JvdXBpbmcoJTBBJTIwJTIwJTIwJTIwbWV0cmljX25hbWUlM0QlN0JzdWJtZXRyaWNfbmFtZXMlN0QlMkMlMEElMjAlMjAlMjAlMjBoaWdoZXJfaXNfYmV0dGVyJTNEJTdCbiUzQSUyMCU3QlRydWUlMjBvciUyMEZhbHNlJTdEJTIwZm9yJTIwbiUyMGluJTIwc3VibWV0cmljX25hbWVzJTdEJTJDJTBBJTIwJTIwJTIwJTIwY2F0ZWdvcnklM0QlN0JTYW1wbGluZ01ldGhvZCU3RCUyQyUwQSUyMCUyMCUyMCUyMHNhbXBsZV9sZXZlbF9mbiUzRGN1c3RvbV9tZXRyaWMlMkMlMEElMjAlMjAlMjAlMjBjb3JwdXNfbGV2ZWxfZm4lM0QlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJhY2N1cmFjeSUyMiUzQSUyMG5wLm1lYW4lMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvdGhlcl9tZXRyaWMlMjIlM0ElMjBhZ2dfZnVuY3Rpb24lMkMlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEEp",highlighted:`custom_metric = SampleLevelMetricGrouping( | |
| metric_name={submetric_names}, | |
| higher_is_better={n: {<span class="hljs-literal">True</span> <span class="hljs-keyword">or</span> <span class="hljs-literal">False</span>} <span class="hljs-keyword">for</span> n <span class="hljs-keyword">in</span> submetric_names}, | |
| category={SamplingMethod}, | |
| sample_level_fn=custom_metric, | |
| corpus_level_fn={ | |
| <span class="hljs-string">"accuracy"</span>: np.mean, | |
| <span class="hljs-string">"other_metric"</span>: agg_function, | |
| }, | |
| )`,wrap:!1}}),B=new z({props:{code:"JTIzJTIwQWRkcyUyMHRoZSUyMG1ldHJpYyUyMHRvJTIwdGhlJTIwbWV0cmljJTIwbGlzdCElMEFleHRlbmRfZW51bShNZXRyaWNzJTJDJTIwJTIybWV0cmljX25hbWUlMjIlMkMlMjBtZXRyaWNfZnVuY3Rpb24pJTBBaWYlMjBfX25hbWVfXyUyMCUzRCUzRCUyMCUyMl9fbWFpbl9fJTIyJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoJTIySW1wb3J0ZWQlMjBtZXRyaWMlMjIp",highlighted:`<span class="hljs-comment"># Adds the metric to the metric list!</span> | |
| extend_enum(Metrics, <span class="hljs-string">"metric_name"</span>, metric_function) | |
| <span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">"__main__"</span>: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Imported metric"</span>)`,wrap:!1}}),A=new Re({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-new-metric.mdx"}}),{c(){n=r("meta"),y=a(),p=r("p"),j=a(),o(T.$$.fragment),Y=a(),b=r("p"),b.innerHTML=oe,L=a(),_=r("p"),_.innerHTML=Me,S=a(),o(w.$$.fragment),x=a(),o(J.$$.fragment),Q=a(),$=r("ul"),$.innerHTML=ue,F=a(),o(U.$$.fragment),q=a(),g=r("p"),g.innerHTML=de,P=a(),o(v.$$.fragment),D=a(),I=r("p"),I.textContent=fe,K=a(),o(Z.$$.fragment),O=a(),k=r("p"),k.innerHTML=he,ee=a(),o(C.$$.fragment),le=a(),G=r("p"),G.innerHTML=ye,te=a(),o(X.$$.fragment),se=a(),R=r("p"),R.innerHTML=je,ne=a(),o(E.$$.fragment),ae=a(),V=r("p"),V.textContent=we,ie=a(),o(B.$$.fragment),pe=a(),W=r("p"),W.innerHTML=Je,re=a(),o(A.$$.fragment),me=a(),H=r("p"),this.h()},l(e){const l=Ce("svelte-u9bgzb",document.head);n=m(l,"META",{name:!0,content:!0}),l.forEach(t),y=i(e),p=m(e,"P",{}),_e(p).forEach(t),j=i(e),M(T.$$.fragment,e),Y=i(e),b=m(e,"P",{"data-svelte-h":!0}),c(b)!=="svelte-111c5am"&&(b.innerHTML=oe),L=i(e),_=m(e,"P",{"data-svelte-h":!0}),c(_)!=="svelte-9nbkyl"&&(_.innerHTML=Me),S=i(e),M(w.$$.fragment,e),x=i(e),M(J.$$.fragment,e),Q=i(e),$=m(e,"UL",{"data-svelte-h":!0}),c($)!=="svelte-43knas"&&($.innerHTML=ue),F=i(e),M(U.$$.fragment,e),q=i(e),g=m(e,"P",{"data-svelte-h":!0}),c(g)!=="svelte-vzuqxp"&&(g.innerHTML=de),P=i(e),M(v.$$.fragment,e),D=i(e),I=m(e,"P",{"data-svelte-h":!0}),c(I)!=="svelte-kqocna"&&(I.textContent=fe),K=i(e),M(Z.$$.fragment,e),O=i(e),k=m(e,"P",{"data-svelte-h":!0}),c(k)!=="svelte-1cz39lw"&&(k.innerHTML=he),ee=i(e),M(C.$$.fragment,e),le=i(e),G=m(e,"P",{"data-svelte-h":!0}),c(G)!=="svelte-18ynjr4"&&(G.innerHTML=ye),te=i(e),M(X.$$.fragment,e),se=i(e),R=m(e,"P",{"data-svelte-h":!0}),c(R)!=="svelte-rbykqz"&&(R.innerHTML=je),ne=i(e),M(E.$$.fragment,e),ae=i(e),V=m(e,"P",{"data-svelte-h":!0}),c(V)!=="svelte-ijjk5i"&&(V.textContent=we),ie=i(e),M(B.$$.fragment,e),pe=i(e),W=m(e,"P",{"data-svelte-h":!0}),c(W)!=="svelte-am1wmy"&&(W.innerHTML=Je),re=i(e),M(A.$$.fragment,e),me=i(e),H=m(e,"P",{}),_e(H).forEach(t),this.h()},h(){$e(n,"name","hf:doc:metadata"),$e(n,"content",We)},m(e,l){Ge(document.head,n),s(e,y,l),s(e,p,l),s(e,j,l),u(T,e,l),s(e,Y,l),s(e,b,l),s(e,L,l),s(e,_,l),s(e,S,l),u(w,e,l),s(e,x,l),u(J,e,l),s(e,Q,l),s(e,$,l),s(e,F,l),u(U,e,l),s(e,q,l),s(e,g,l),s(e,P,l),u(v,e,l),s(e,D,l),s(e,I,l),s(e,K,l),u(Z,e,l),s(e,O,l),s(e,k,l),s(e,ee,l),u(C,e,l),s(e,le,l),s(e,G,l),s(e,te,l),u(X,e,l),s(e,se,l),s(e,R,l),s(e,ne,l),u(E,e,l),s(e,ae,l),s(e,V,l),s(e,ie,l),u(B,e,l),s(e,pe,l),s(e,W,l),s(e,re,l),u(A,e,l),s(e,me,l),s(e,H,l),ce=!0},p(e,[l]){const Te={};l&2&&(Te.$$scope={dirty:l,ctx:e}),w.$set(Te);const be={};l&2&&(be.$$scope={dirty:l,ctx:e}),J.$set(be)},i(e){ce||(d(T.$$.fragment,e),d(w.$$.fragment,e),d(J.$$.fragment,e),d(U.$$.fragment,e),d(v.$$.fragment,e),d(Z.$$.fragment,e),d(C.$$.fragment,e),d(X.$$.fragment,e),d(E.$$.fragment,e),d(B.$$.fragment,e),d(A.$$.fragment,e),ce=!0)},o(e){f(T.$$.fragment,e),f(w.$$.fragment,e),f(J.$$.fragment,e),f(U.$$.fragment,e),f(v.$$.fragment,e),f(Z.$$.fragment,e),f(C.$$.fragment,e),f(X.$$.fragment,e),f(E.$$.fragment,e),f(B.$$.fragment,e),f(A.$$.fragment,e),ce=!1},d(e){e&&(t(y),t(p),t(j),t(Y),t(b),t(L),t(_),t(S),t(x),t(Q),t($),t(F),t(q),t(g),t(P),t(D),t(I),t(K),t(O),t(k),t(ee),t(le),t(G),t(te),t(se),t(R),t(ne),t(ae),t(V),t(ie),t(pe),t(W),t(re),t(me),t(H)),t(n),h(T,e),h(w,e),h(J,e),h(U,e),h(v,e),h(Z,e),h(C,e),h(X,e),h(E,e),h(B,e),h(A,e)}}}const We='{"title":"Adding a New Metric","local":"adding-a-new-metric","sections":[],"depth":1}';function Ae(N){return Ie(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Se extends Ze{constructor(n){super(),ke(this,n,Ae,Be,ve,{})}}export{Se as component}; | |
Xet Storage Details
- Size:
- 12.6 kB
- Xet hash:
- bcfcd4fb65b5c63cb1e2521908c1b3d0712cbb6f2dba5245d49a7c2cd18eb905
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.