Buckets:
| import{s as Ce,o as Ze,n as ge}from"../chunks/scheduler.7da89386.js";import{S as ve,i as ke,g as r,s as n,r as M,A as Ie,h as m,f as l,c as i,j as _e,u as o,x as c,k as be,y as Re,a as s,v as u,d as f,t as d,w as y}from"../chunks/index.0b7befd3.js";import{T as $e}from"../chunks/Tip.1e71740f.js";import{C as E}from"../chunks/CodeBlock.ce33a881.js";import{H as We,E as Xe}from"../chunks/EditOnGithub.0cb2bc8e.js";function Ne(H){let a,j='To see an example of a custom metric added along with a custom task, look at <a href="">the IFEval custom task</a>.';return{c(){a=r("p"),a.innerHTML=j},l(p){a=m(p,"P",{"data-svelte-h":!0}),c(a)!=="svelte-73seg9"&&(a.innerHTML=j)},m(p,h){s(p,a,h)},p:ge,d(p){p&&l(a)}}}function Ve(H){let a,j=`To contribute your custom metric to the lighteval repo, you would first need | |
| to install the required dev dependencies by running <code>pip install -e .[dev]</code> | |
| and then run <code>pre-commit install</code> to install the pre-commit hooks.`;return{c(){a=r("p"),a.innerHTML=j},l(p){a=m(p,"P",{"data-svelte-h":!0}),c(a)!=="svelte-l65q4m"&&(a.innerHTML=j)},m(p,h){s(p,a,h)},p:ge,d(p){p&&l(a)}}}function Ge(H){let a,j,p,h,T,Y,U,Me=`First, check if you can use one of the parametrized functions in | |
| <a href="package_reference/metrics#corpus-metrics">Corpus Metrics</a> or | |
| <a href="package_reference/metrics#sample-metrics">Sample Metrics</a>.`,Q,_,oe="If not, you can use the <code>custom_task</code> system to register your new metric:",S,w,x,J,L,b,ue="<li>Create a new Python file which should contain the full logic of your metric.</li> <li>The file also needs to start with these imports</li>",F,$,q,g,fe="You need to define a sample level metric:",P,C,D,Z,de="Here the sample level metric only returns one metric, if you want to return multiple metrics per sample you need to return a dictionary with the metrics as keys and the values as values.",K,v,O,k,ye="Then, you can define an aggregation function if needed, a common aggregation function is <code>np.mean</code>.",ee,I,te,R,je=`Finally, you can define your metric. If it’s a sample level metric, you can use the following code | |
| with <a href="/docs/lighteval/pr_476/en/package_reference/metrics#lighteval.metrics.utils.metric_utils.SampleLevelMetric">SampleLevelMetric</a>:`,le,W,se,X,he=`If your metric defines multiple metrics per sample, you can use the following code | |
| with <a href="/docs/lighteval/pr_476/en/package_reference/metrics#lighteval.metrics.utils.metric_utils.SampleLevelMetricGrouping">SampleLevelMetricGrouping</a>:`,ae,N,ne,V,we=`To finish, add the following, so that it adds your metric to our metrics list | |
| when loaded as a module.`,ie,G,pe,B,Je="You can then give your custom metric to lighteval by using <code>--custom-tasks path_to_your_file</code> when launching it.",re,A,me,z,ce;return T=new We({props:{title:"Adding a New Metric",local:"adding-a-new-metric",headingTag:"h1"}}),w=new $e({props:{warning:!1,$$slots:{default:[Ne]},$$scope:{ctx:H}}}),J=new $e({props:{warning:!0,$$slots:{default:[Ve]},$$scope:{ctx:H}}}),$=new E({props:{code:"ZnJvbSUyMGFlbnVtJTIwaW1wb3J0JTIwZXh0ZW5kX2VudW0lMEFmcm9tJTIwbGlnaHRldmFsLm1ldHJpY3MlMjBpbXBvcnQlMjBNZXRyaWNz",highlighted:`<span class="hljs-keyword">from</span> aenum <span class="hljs-keyword">import</span> extend_enum | |
| <span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics`,wrap:!1}}),C=new E({props:{code:"ZGVmJTIwY3VzdG9tX21ldHJpYyhwcmVkaWN0aW9ucyUzQSUyMGxpc3QlNUJzdHIlNUQlMkMlMjBmb3JtYXR0ZWRfZG9jJTNBJTIwRG9jJTJDJTIwKiprd2FyZ3MpJTIwLSUzRSUyMGJvb2wlM0ElMEElMjAlMjAlMjAlMjByZXNwb25zZSUyMCUzRCUyMHByZWRpY3Rpb25zJTVCMCU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMHJlc3BvbnNlJTIwJTNEJTNEJTIwZm9ybWF0dGVkX2RvYy5jaG9pY2VzJTVCZm9ybWF0dGVkX2RvYy5nb2xkX2luZGV4JTVE",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">custom_metric</span>(<span class="hljs-params">predictions: <span class="hljs-built_in">list</span>[<span class="hljs-built_in">str</span>], formatted_doc: Doc, **kwargs</span>) -> <span class="hljs-built_in">bool</span>: | |
| response = predictions[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> response == formatted_doc.choices[formatted_doc.gold_index]`,wrap:!1}}),v=new E({props:{code:"ZGVmJTIwY3VzdG9tX21ldHJpYyhwcmVkaWN0aW9ucyUzQSUyMGxpc3QlNUJzdHIlNUQlMkMlMjBmb3JtYXR0ZWRfZG9jJTNBJTIwRG9jJTJDJTIwKiprd2FyZ3MpJTIwLSUzRSUyMGRpY3QlM0ElMEElMjAlMjAlMjAlMjByZXNwb25zZSUyMCUzRCUyMHByZWRpY3Rpb25zJTVCMCU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMCU3QiUyMmFjY3VyYWN5JTIyJTNBJTIwcmVzcG9uc2UlMjAlM0QlM0QlMjBmb3JtYXR0ZWRfZG9jLmNob2ljZXMlNUJmb3JtYXR0ZWRfZG9jLmdvbGRfaW5kZXglNUQlMkMlMjAlMjJvdGhlcl9tZXRyaWMlMjIlM0ElMjAwLjUlN0Q=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">custom_metric</span>(<span class="hljs-params">predictions: <span class="hljs-built_in">list</span>[<span class="hljs-built_in">str</span>], formatted_doc: Doc, **kwargs</span>) -> <span class="hljs-built_in">dict</span>: | |
| response = predictions[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"accuracy"</span>: response == formatted_doc.choices[formatted_doc.gold_index], <span class="hljs-string">"other_metric"</span>: <span class="hljs-number">0.5</span>}`,wrap:!1}}),I=new E({props:{code:"ZGVmJTIwYWdnX2Z1bmN0aW9uKGl0ZW1zKSUzQSUwQSUyMCUyMCUyMCUyMGZsYXRfaXRlbXMlMjAlM0QlMjAlNUJpdGVtJTIwZm9yJTIwc3VibGlzdCUyMGluJTIwaXRlbXMlMjBmb3IlMjBpdGVtJTIwaW4lMjBzdWJsaXN0JTVEJTBBJTIwJTIwJTIwJTIwc2NvcmUlMjAlM0QlMjBzdW0oZmxhdF9pdGVtcyklMjAlMkYlMjBsZW4oZmxhdF9pdGVtcyklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBzY29yZQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">agg_function</span>(<span class="hljs-params">items</span>): | |
| flat_items = [item <span class="hljs-keyword">for</span> sublist <span class="hljs-keyword">in</span> items <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> sublist] | |
| score = <span class="hljs-built_in">sum</span>(flat_items) / <span class="hljs-built_in">len</span>(flat_items) | |
| <span class="hljs-keyword">return</span> score`,wrap:!1}}),W=new E({props:{code:"bXlfY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTdCY3VzdG9tX21ldHJpY19uYW1lJTdEJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRCU3QmVpdGhlciUyMFRydWUlMjBvciUyMEZhbHNlJTdEJTJDJTBBJTIwJTIwJTIwJTIwY2F0ZWdvcnklM0QlN0JNZXRyaWNDYXRlZ29yeSU3RCUyQyUwQSUyMCUyMCUyMCUyMHVzZV9jYXNlJTNEJTdCTWV0cmljVXNlQ2FzZSU3RCUyQyUwQSUyMCUyMCUyMCUyMHNhbXBsZV9sZXZlbF9mbiUzRGN1c3RvbV9tZXRyaWMlMkMlMEElMjAlMjAlMjAlMjBjb3JwdXNfbGV2ZWxfZm4lM0RhZ2dfZnVuY3Rpb24lMkMlMEEp",highlighted:`my_custom_metric = SampleLevelMetric( | |
| metric_name={custom_metric_name}, | |
| higher_is_better={either <span class="hljs-literal">True</span> <span class="hljs-keyword">or</span> <span class="hljs-literal">False</span>}, | |
| category={MetricCategory}, | |
| use_case={MetricUseCase}, | |
| sample_level_fn=custom_metric, | |
| corpus_level_fn=agg_function, | |
| )`,wrap:!1}}),N=new E({props:{code:"Y3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljR3JvdXBpbmcoJTBBJTIwJTIwJTIwJTIwbWV0cmljX25hbWUlM0QlN0JzdWJtZXRyaWNfbmFtZXMlN0QlMkMlMEElMjAlMjAlMjAlMjBoaWdoZXJfaXNfYmV0dGVyJTNEJTdCbiUzQSUyMCU3QlRydWUlMjBvciUyMEZhbHNlJTdEJTIwZm9yJTIwbiUyMGluJTIwc3VibWV0cmljX25hbWVzJTdEJTJDJTBBJTIwJTIwJTIwJTIwY2F0ZWdvcnklM0QlN0JNZXRyaWNDYXRlZ29yeSU3RCUyQyUwQSUyMCUyMCUyMCUyMHVzZV9jYXNlJTNEJTdCTWV0cmljVXNlQ2FzZSU3RCUyQyUwQSUyMCUyMCUyMCUyMHNhbXBsZV9sZXZlbF9mbiUzRGN1c3RvbV9tZXRyaWMlMkMlMEElMjAlMjAlMjAlMjBjb3JwdXNfbGV2ZWxfZm4lM0QlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJhY2N1cmFjeSUyMiUzQSUyMG5wLm1lYW4lMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvdGhlcl9tZXRyaWMlMjIlM0ElMjBhZ2dfZnVuY3Rpb24lMkMlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEEp",highlighted:`custom_metric = SampleLevelMetricGrouping( | |
| metric_name={submetric_names}, | |
| higher_is_better={n: {<span class="hljs-literal">True</span> <span class="hljs-keyword">or</span> <span class="hljs-literal">False</span>} <span class="hljs-keyword">for</span> n <span class="hljs-keyword">in</span> submetric_names}, | |
| category={MetricCategory}, | |
| use_case={MetricUseCase}, | |
| sample_level_fn=custom_metric, | |
| corpus_level_fn={ | |
| <span class="hljs-string">"accuracy"</span>: np.mean, | |
| <span class="hljs-string">"other_metric"</span>: agg_function, | |
| }, | |
| )`,wrap:!1}}),G=new E({props:{code:"JTIzJTIwQWRkcyUyMHRoZSUyMG1ldHJpYyUyMHRvJTIwdGhlJTIwbWV0cmljJTIwbGlzdCElMEFleHRlbmRfZW51bShNZXRyaWNzJTJDJTIwJTIybWV0cmljX25hbWUlMjIlMkMlMjBtZXRyaWNfZnVuY3Rpb24pJTBBaWYlMjBfX25hbWVfXyUyMCUzRCUzRCUyMCUyMl9fbWFpbl9fJTIyJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoJTIySW1wb3J0ZWQlMjBtZXRyaWMlMjIp",highlighted:`<span class="hljs-comment"># Adds the metric to the metric list!</span> | |
| extend_enum(Metrics, <span class="hljs-string">"metric_name"</span>, metric_function) | |
| <span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">"__main__"</span>: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Imported metric"</span>)`,wrap:!1}}),A=new Xe({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-new-metric.mdx"}}),{c(){a=r("meta"),j=n(),p=r("p"),h=n(),M(T.$$.fragment),Y=n(),U=r("p"),U.innerHTML=Me,Q=n(),_=r("p"),_.innerHTML=oe,S=n(),M(w.$$.fragment),x=n(),M(J.$$.fragment),L=n(),b=r("ul"),b.innerHTML=ue,F=n(),M($.$$.fragment),q=n(),g=r("p"),g.textContent=fe,P=n(),M(C.$$.fragment),D=n(),Z=r("p"),Z.textContent=de,K=n(),M(v.$$.fragment),O=n(),k=r("p"),k.innerHTML=ye,ee=n(),M(I.$$.fragment),te=n(),R=r("p"),R.innerHTML=je,le=n(),M(W.$$.fragment),se=n(),X=r("p"),X.innerHTML=he,ae=n(),M(N.$$.fragment),ne=n(),V=r("p"),V.textContent=we,ie=n(),M(G.$$.fragment),pe=n(),B=r("p"),B.innerHTML=Je,re=n(),M(A.$$.fragment),me=n(),z=r("p"),this.h()},l(e){const t=Ie("svelte-u9bgzb",document.head);a=m(t,"META",{name:!0,content:!0}),t.forEach(l),j=i(e),p=m(e,"P",{}),_e(p).forEach(l),h=i(e),o(T.$$.fragment,e),Y=i(e),U=m(e,"P",{"data-svelte-h":!0}),c(U)!=="svelte-111c5am"&&(U.innerHTML=Me),Q=i(e),_=m(e,"P",{"data-svelte-h":!0}),c(_)!=="svelte-9nbkyl"&&(_.innerHTML=oe),S=i(e),o(w.$$.fragment,e),x=i(e),o(J.$$.fragment,e),L=i(e),b=m(e,"UL",{"data-svelte-h":!0}),c(b)!=="svelte-43knas"&&(b.innerHTML=ue),F=i(e),o($.$$.fragment,e),q=i(e),g=m(e,"P",{"data-svelte-h":!0}),c(g)!=="svelte-11wquls"&&(g.textContent=fe),P=i(e),o(C.$$.fragment,e),D=i(e),Z=m(e,"P",{"data-svelte-h":!0}),c(Z)!=="svelte-kqocna"&&(Z.textContent=de),K=i(e),o(v.$$.fragment,e),O=i(e),k=m(e,"P",{"data-svelte-h":!0}),c(k)!=="svelte-1cz39lw"&&(k.innerHTML=ye),ee=i(e),o(I.$$.fragment,e),te=i(e),R=m(e,"P",{"data-svelte-h":!0}),c(R)!=="svelte-1ccy8vb"&&(R.innerHTML=je),le=i(e),o(W.$$.fragment,e),se=i(e),X=m(e,"P",{"data-svelte-h":!0}),c(X)!=="svelte-1qexim4"&&(X.innerHTML=he),ae=i(e),o(N.$$.fragment,e),ne=i(e),V=m(e,"P",{"data-svelte-h":!0}),c(V)!=="svelte-ijjk5i"&&(V.textContent=we),ie=i(e),o(G.$$.fragment,e),pe=i(e),B=m(e,"P",{"data-svelte-h":!0}),c(B)!=="svelte-am1wmy"&&(B.innerHTML=Je),re=i(e),o(A.$$.fragment,e),me=i(e),z=m(e,"P",{}),_e(z).forEach(l),this.h()},h(){be(a,"name","hf:doc:metadata"),be(a,"content",Be)},m(e,t){Re(document.head,a),s(e,j,t),s(e,p,t),s(e,h,t),u(T,e,t),s(e,Y,t),s(e,U,t),s(e,Q,t),s(e,_,t),s(e,S,t),u(w,e,t),s(e,x,t),u(J,e,t),s(e,L,t),s(e,b,t),s(e,F,t),u($,e,t),s(e,q,t),s(e,g,t),s(e,P,t),u(C,e,t),s(e,D,t),s(e,Z,t),s(e,K,t),u(v,e,t),s(e,O,t),s(e,k,t),s(e,ee,t),u(I,e,t),s(e,te,t),s(e,R,t),s(e,le,t),u(W,e,t),s(e,se,t),s(e,X,t),s(e,ae,t),u(N,e,t),s(e,ne,t),s(e,V,t),s(e,ie,t),u(G,e,t),s(e,pe,t),s(e,B,t),s(e,re,t),u(A,e,t),s(e,me,t),s(e,z,t),ce=!0},p(e,[t]){const Te={};t&2&&(Te.$$scope={dirty:t,ctx:e}),w.$set(Te);const Ue={};t&2&&(Ue.$$scope={dirty:t,ctx:e}),J.$set(Ue)},i(e){ce||(f(T.$$.fragment,e),f(w.$$.fragment,e),f(J.$$.fragment,e),f($.$$.fragment,e),f(C.$$.fragment,e),f(v.$$.fragment,e),f(I.$$.fragment,e),f(W.$$.fragment,e),f(N.$$.fragment,e),f(G.$$.fragment,e),f(A.$$.fragment,e),ce=!0)},o(e){d(T.$$.fragment,e),d(w.$$.fragment,e),d(J.$$.fragment,e),d($.$$.fragment,e),d(C.$$.fragment,e),d(v.$$.fragment,e),d(I.$$.fragment,e),d(W.$$.fragment,e),d(N.$$.fragment,e),d(G.$$.fragment,e),d(A.$$.fragment,e),ce=!1},d(e){e&&(l(j),l(p),l(h),l(Y),l(U),l(Q),l(_),l(S),l(x),l(L),l(b),l(F),l(q),l(g),l(P),l(D),l(Z),l(K),l(O),l(k),l(ee),l(te),l(R),l(le),l(se),l(X),l(ae),l(ne),l(V),l(ie),l(pe),l(B),l(re),l(me),l(z)),l(a),y(T,e),y(w,e),y(J,e),y($,e),y(C,e),y(v,e),y(I,e),y(W,e),y(N,e),y(G,e),y(A,e)}}}const Be='{"title":"Adding a New Metric","local":"adding-a-new-metric","sections":[],"depth":1}';function Ae(H){return Ze(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Se extends ve{constructor(a){super(),ke(this,a,Ae,Ge,Ce,{})}}export{Se as component}; | |
Xet Storage Details
- Size:
- 12.8 kB
- Xet hash:
- 306ddca32c95e8d0dfbf5687df1fa0e1d4211a170832fd33e238be7d15fef2b4
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.