Buckets:

rtrm's picture
download
raw
27.3 kB
import{s as de,o as $e,n as fe}from"../chunks/scheduler.7da89386.js";import{S as Ce,i as ge,g as c,s as a,r as M,A as Ie,h as r,f as s,c as n,j as je,u as i,x as J,k as he,y as be,a as l,v as p,d as m,t as o,w as y}from"../chunks/index.20910acc.js";import{T as we}from"../chunks/Tip.53e22153.js";import{C as d}from"../chunks/CodeBlock.143bd81e.js";import{H as U,E as ke}from"../chunks/getInferenceSnippets.375cdad5.js";function Ze(ot){let u,j='You can find examples of custom tasks in the <a href="https://github.com/huggingface/lighteval/tree/main/community_tasks" rel="nofollow">community_tasks</a> directory.';return{c(){u=c("p"),u.innerHTML=j},l(T){u=r(T,"P",{"data-svelte-h":!0}),J(u)!=="svelte-hvwsma"&&(u.innerHTML=j)},m(T,h){l(T,u,h)},p:fe,d(T){T&&s(u)}}}function Ae(ot){let u,j=`To contribute your custom task to the Lighteval repository, you would first need
to install the required dev dependencies by running <code>pip install -e .[dev]</code>
and then run <code>pre-commit install</code> to install the pre-commit hooks.`;return{c(){u=c("p"),u.innerHTML=j},l(T){u=r(T,"P",{"data-svelte-h":!0}),J(u)!=="svelte-1qp1uy3"&&(u.innerHTML=j)},m(T,h){l(T,u,h)},p:fe,d(T){T&&s(u)}}}function Be(ot){let u,j,T,h,$,ut,C,se="Lighteval provides a flexible framework for creating custom evaluation tasks. This guide explains how to create and integrate new tasks into the evaluation system.",ct,g,rt,I,le="Before creating a custom task, consider which category it belongs to:",Ut,b,Tt,k,ae=`Core evaluations are evaluations that only require standard logic in their
metrics and processing, and that we will add to our test suite to ensure non-regression through time. They already see high usage in the community.`,Jt,Z,jt,A,ne=`Extended evaluations are evaluations that require custom logic in their
metrics (complex normalization, an LLM as a judge, etc.), that we added to
facilitate the life of users. They already see high usage in the community.`,ht,B,wt,_,Me="Community evaluations are submissions by the community of new tasks.",ft,v,ie="A popular community evaluation can move to become an extended or core evaluation over time.",dt,w,$t,G,Ct,f,gt,S,It,X,pe="First, create a Python file under the <code>community_tasks</code> directory.",bt,N,kt,E,me=`You need to define a prompt function that will convert a line from your
dataset to a document to be used for evaluation.`,Zt,V,At,R,Bt,Q,oe='You can either use an existing metric (defined in <code>lighteval.metrics.metrics.Metrics</code>) or <a href="adding-a-new-metric">create a custom one</a>.',_t,W,vt,q,Gt,x,St,z,Xt,F,Nt,Y,ye='You can define a task with or without subsets using <a href="/docs/lighteval/pr_990/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a>.',Et,H,Vt,L,Rt,D,Qt,P,ue=`If you want to create a task with multiple subsets, add them to the
<code>SAMPLE_SUBSETS</code> list and create a task for each subset.`,Wt,K,qt,O,xt,tt,ce="Then you need to add your task to the <code>TASKS_TABLE</code> list.",zt,et,Ft,st,Yt,lt,re=`If your task has requirements, you need to create a <code>requirement.txt</code> file with
only the required dependencies so that anyone can run your task.`,Ht,at,Lt,nt,Ue="Once your file is created, you can run the evaluation with the following command:",Dt,Mt,Pt,it,Kt,pt,Ot,mt,te,yt,ee;return $=new U({props:{title:"Adding a Custom Task",local:"adding-a-custom-task",headingTag:"h1"}}),g=new U({props:{title:"Task Categories",local:"task-categories",headingTag:"h2"}}),b=new U({props:{title:"Core Evaluations",local:"core-evaluations",headingTag:"h3"}}),Z=new U({props:{title:"Extended Evaluations",local:"extended-evaluations",headingTag:"h3"}}),B=new U({props:{title:"Community Evaluations",local:"community-evaluations",headingTag:"h3"}}),w=new we({props:{warning:!1,$$slots:{default:[Ze]},$$scope:{ctx:ot}}}),G=new U({props:{title:"Step-by-Step Creation of a Custom Task",local:"step-by-step-creation-of-a-custom-task",headingTag:"h2"}}),f=new we({props:{warning:!0,$$slots:{default:[Ae]},$$scope:{ctx:ot}}}),S=new U({props:{title:"Step 1: Create the Task File",local:"step-1-create-the-task-file",headingTag:"h3"}}),N=new U({props:{title:"Step 2: Define the Prompt Function",local:"step-2-define-the-prompt-function",headingTag:"h3"}}),V=new d({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5yZXF1ZXN0cyUyMGltcG9ydCUyMERvYyUwQSUwQSUyMyUyMERlZmluZSUyMGFzJTIwbWFueSUyMGFzJTIweW91JTIwbmVlZCUyMGZvciUyMHlvdXIlMjBkaWZmZXJlbnQlMjB0YXNrcyUwQWRlZiUyMHByb21wdF9mbihsaW5lJTNBJTIwZGljdCUyQyUyMHRhc2tfbmFtZSUzQSUyMHN0ciklM0ElMEElMjAlMjAlMjAlMjAlMjIlMjIlMjJEZWZpbmVzJTIwaG93JTIwdG8lMjBnbyUyMGZyb20lMjBhJTIwZGF0YXNldCUyMGxpbmUlMjB0byUyMGElMjBkb2MlMjBvYmplY3QuJTBBJTIwJTIwJTIwJTIwRm9sbG93JTIwZXhhbXBsZXMlMjBpbiUyMHNyYyUyRmxpZ2h0ZXZhbCUyRnRhc2tzJTJGZGVmYXVsdF9wcm9tcHRzLnB5JTJDJTIwb3IlMjBnZXQlMjBtb3JlJTIwaW5mbyUwQSUyMCUyMCUyMCUyMGFib3V0JTIwd2hhdCUyMHRoaXMlMjBmdW5jdGlvbiUyMHNob3VsZCUyMGRvJTIwaW4lMjB0aGUlMjBSRUFETUUuJTBBJTIwJTIwJTIwJTIwJTIyJTIyJTIyJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwRG9jKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tfbmFtZSUzRHRhc2tfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHF1ZXJ5JTNEbGluZSU1QiUyMnF1ZXN0aW9uJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hvaWNlcyUzRCU1QmYlMjIlMjAlN0JjJTdEJTIyJTIwZm9yJTIwYyUyMGluJTIwbGluZSU1QiUyMmNob2ljZXMlMjIlNUQlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBnb2xkX2luZGV4JTNEbGluZSU1QiUyMmdvbGQlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.requests <span class="hljs-keyword">import</span> Doc
<span class="hljs-comment"># Define as many as you need for your different tasks</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">prompt_fn</span>(<span class="hljs-params">line: <span class="hljs-built_in">dict</span>, task_name: <span class="hljs-built_in">str</span></span>):
<span class="hljs-string">&quot;&quot;&quot;Defines how to go from a dataset line to a doc object.
Follow examples in src/lighteval/tasks/default_prompts.py, or get more info
about what this function should do in the README.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> Doc(
task_name=task_name,
query=line[<span class="hljs-string">&quot;question&quot;</span>],
choices=[<span class="hljs-string">f&quot; <span class="hljs-subst">{c}</span>&quot;</span> <span class="hljs-keyword">for</span> c <span class="hljs-keyword">in</span> line[<span class="hljs-string">&quot;choices&quot;</span>]],
gold_index=line[<span class="hljs-string">&quot;gold&quot;</span>],
)`,wrap:!1}}),R=new U({props:{title:"Step 3: Choose or Create Metrics",local:"step-3-choose-or-create-metrics",headingTag:"h3"}}),W=new U({props:{title:"Using Existing Metrics",local:"using-existing-metrics",headingTag:"h4"}}),q=new d({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzJTIwaW1wb3J0JTIwTWV0cmljcyUwQSUwQSUyMyUyMFVzZSUyMGFuJTIwZXhpc3RpbmclMjBtZXRyaWMlMEFtZXRyaWMlMjAlM0QlMjBNZXRyaWNzLkFDQ1VSQUNZ",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics
<span class="hljs-comment"># Use an existing metric</span>
metric = Metrics.ACCURACY`,wrap:!1}}),x=new U({props:{title:"Creating Custom Metrics",local:"creating-custom-metrics",headingTag:"h4"}}),z=new d({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzLnV0aWxzLm1ldHJpY191dGlscyUyMGltcG9ydCUyMFNhbXBsZUxldmVsTWV0cmljJTBBaW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTIybXlfY3VzdG9tX21ldHJpY19uYW1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBjYXRlZ29yeSUzRCUyMmFjY3VyYWN5JTIyJTJDJTBBJTIwJTIwJTIwJTIwc2FtcGxlX2xldmVsX2ZuJTNEbGFtYmRhJTIweCUzQSUyMHglMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGNvbXB1dGUlMjBzY29yZSUyMGZvciUyMG9uZSUyMHNhbXBsZSUwQSUyMCUyMCUyMCUyMGNvcnB1c19sZXZlbF9mbiUzRG5wLm1lYW4lMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGFnZ3JlZ2F0ZSUyMHRoZSUyMHNhbXBsZSUyMG1ldHJpY3MlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics.utils.metric_utils <span class="hljs-keyword">import</span> SampleLevelMetric
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
custom_metric = SampleLevelMetric(
metric_name=<span class="hljs-string">&quot;my_custom_metric_name&quot;</span>,
higher_is_better=<span class="hljs-literal">True</span>,
category=<span class="hljs-string">&quot;accuracy&quot;</span>,
sample_level_fn=<span class="hljs-keyword">lambda</span> x: x, <span class="hljs-comment"># How to compute score for one sample</span>
corpus_level_fn=np.mean, <span class="hljs-comment"># How to aggregate the sample metrics</span>
)`,wrap:!1}}),F=new U({props:{title:"Step 4: Define Your Task",local:"step-4-define-your-task",headingTag:"h3"}}),H=new U({props:{title:"Simple Task (No Subsets)",local:"simple-task-no-subsets",headingTag:"h4"}}),L=new d({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5saWdodGV2YWxfdGFzayUyMGltcG9ydCUyMExpZ2h0ZXZhbFRhc2tDb25maWclMEElMEElMjMlMjBUaGlzJTIwaXMlMjBob3clMjB5b3UlMjBjcmVhdGUlMjBhJTIwc2ltcGxlJTIwdGFzayUyMChsaWtlJTIwSGVsbGFTd2FnKSUyMHdoaWNoJTIwaGFzJTIwb25lJTIwc2luZ2xlJTIwc3Vic2V0JTBBJTIzJTIwYXR0YWNoZWQlMjB0byUyMGl0JTJDJTIwYW5kJTIwb25lJTIwZXZhbHVhdGlvbiUyMHBvc3NpYmxlLiUwQXRhc2slMjAlM0QlMjBMaWdodGV2YWxUYXNrQ29uZmlnKCUwQSUyMCUyMCUyMCUyMG5hbWUlM0QlMjJteW90aGVydGFzayUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdF9mdW5jdGlvbiUzRHByb21wdF9mbiUyQyUyMCUyMCUyMyUyME11c3QlMjBiZSUyMGRlZmluZWQlMjBpbiUyMHRoZSUyMGZpbGUlMjBvciUyMGltcG9ydGVkJTBBJTIwJTIwJTIwJTIwc3VpdGUlM0QlNUIlMjJjb21tdW5pdHklMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBoZl9yZXBvJTNEJTIyeW91cl9kYXRhc2V0X3JlcG9fb25faGYlMjIlMkMlMEElMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0QlMjJkZWZhdWx0JTIyJTJDJTBBJTIwJTIwJTIwJTIwaGZfYXZhaWxfc3BsaXRzJTNEJTVCJTIydHJhaW4lMjIlMkMlMjAlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbHVhdGlvbl9zcGxpdHMlM0QlNUIlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZmV3X3Nob3RzX3NwbGl0JTNEJTIydHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBmZXdfc2hvdHNfc2VsZWN0JTNEJTIycmFuZG9tX3NhbXBsaW5nX2Zyb21fdHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBtZXRyaWNzJTNEJTVCbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRpb25fc2l6ZSUzRDI1NiUyQyUwQSUyMCUyMCUyMCUyMHN0b3Bfc2VxdWVuY2UlM0QlNUIlMjIlNUNuJTIyJTJDJTIwJTIyUXVlc3Rpb24lM0ElMjIlNUQlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.lighteval_task <span class="hljs-keyword">import</span> LightevalTaskConfig
<span class="hljs-comment"># This is how you create a simple task (like HellaSwag) which has one single subset</span>
<span class="hljs-comment"># attached to it, and one evaluation possible.</span>
task = LightevalTaskConfig(
name=<span class="hljs-string">&quot;myothertask&quot;</span>,
prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span>
suite=[<span class="hljs-string">&quot;community&quot;</span>],
hf_repo=<span class="hljs-string">&quot;your_dataset_repo_on_hf&quot;</span>,
hf_subset=<span class="hljs-string">&quot;default&quot;</span>,
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>, <span class="hljs-string">&quot;test&quot;</span>],
evaluation_splits=[<span class="hljs-string">&quot;test&quot;</span>],
few_shots_split=<span class="hljs-string">&quot;train&quot;</span>,
few_shots_select=<span class="hljs-string">&quot;random_sampling_from_train&quot;</span>,
metrics=[metric], <span class="hljs-comment"># Select your metric in Metrics</span>
generation_size=<span class="hljs-number">256</span>,
stop_sequence=[<span class="hljs-string">&quot;\\n&quot;</span>, <span class="hljs-string">&quot;Question:&quot;</span>],
)`,wrap:!1}}),D=new U({props:{title:"Task with Multiple Subsets",local:"task-with-multiple-subsets",headingTag:"h4"}}),K=new d({props:{code:"U0FNUExFX1NVQlNFVFMlMjAlM0QlMjAlNUIlMjJzdWJzZXQxJTIyJTJDJTIwJTIyc3Vic2V0MiUyMiUyQyUyMCUyMnN1YnNldDMlMjIlNUQlMjAlMjAlMjMlMjBMaXN0JTIwb2YlMjBhbGwlMjB0aGUlMjBzdWJzZXRzJTIwdG8lMjB1c2UlMjBmb3IlMjB0aGlzJTIwZXZhbCUwQSUwQWNsYXNzJTIwQ3VzdG9tU3Vic2V0VGFzayhMaWdodGV2YWxUYXNrQ29uZmlnKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBuYW1lJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaGZfc3Vic2V0JTJDJTBBJTIwJTIwJTIwJTIwKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZSUzRG5hbWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0RoZl9zdWJzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHRfZnVuY3Rpb24lM0Rwcm9tcHRfZm4lMkMlMjAlMjAlMjMlMjBNdXN0JTIwYmUlMjBkZWZpbmVkJTIwaW4lMjB0aGUlMjBmaWxlJTIwb3IlMjBpbXBvcnRlZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGhmX3JlcG8lM0QlMjJ5b3VyX2RhdGFzZXRfbmFtZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1ldHJpY3MlM0QlNUJjdXN0b21fbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUyMG9yJTIwdXNlJTIweW91ciUyMGN1c3RvbV9tZXRyaWMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9hdmFpbF9zcGxpdHMlM0QlNUIlMjJ0cmFpbiUyMiUyQyUyMCUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3NwbGl0cyUzRCU1QiUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZXdfc2hvdHNfc3BsaXQlM0QlMjJ0cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZld19zaG90c19zZWxlY3QlM0QlMjJyYW5kb21fc2FtcGxpbmdfZnJvbV90cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1aXRlJTNEJTVCJTIyY29tbXVuaXR5JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZ2VuZXJhdGlvbl9zaXplJTNEMjU2JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RvcF9zZXF1ZW5jZSUzRCU1QiUyMiU1Q24lMjIlMkMlMjAlMjJRdWVzdGlvbiUzQSUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCklMEElMEFTVUJTRVRfVEFTS1MlMjAlM0QlMjAlNUJDdXN0b21TdWJzZXRUYXNrKG5hbWUlM0RmJTIybXl0YXNrJTNBJTdCc3Vic2V0JTdEJTIyJTJDJTIwaGZfc3Vic2V0JTNEc3Vic2V0KSUyMGZvciUyMHN1YnNldCUyMGluJTIwU0FNUExFX1NVQlNFVFMlNUQ=",highlighted:`SAMPLE_SUBSETS = [<span class="hljs-string">&quot;subset1&quot;</span>, <span class="hljs-string">&quot;subset2&quot;</span>, <span class="hljs-string">&quot;subset3&quot;</span>] <span class="hljs-comment"># List of all the subsets to use for this eval</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomSubsetTask</span>(<span class="hljs-title class_ inherited__">LightevalTaskConfig</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">
self,
name,
hf_subset,
</span>):
<span class="hljs-built_in">super</span>().__init__(
name=name,
hf_subset=hf_subset,
prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span>
hf_repo=<span class="hljs-string">&quot;your_dataset_name&quot;</span>,
metrics=[custom_metric], <span class="hljs-comment"># Select your metric in Metrics or use your custom_metric</span>
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>, <span class="hljs-string">&quot;test&quot;</span>],
evaluation_splits=[<span class="hljs-string">&quot;test&quot;</span>],
few_shots_split=<span class="hljs-string">&quot;train&quot;</span>,
few_shots_select=<span class="hljs-string">&quot;random_sampling_from_train&quot;</span>,
suite=[<span class="hljs-string">&quot;community&quot;</span>],
generation_size=<span class="hljs-number">256</span>,
stop_sequence=[<span class="hljs-string">&quot;\\n&quot;</span>, <span class="hljs-string">&quot;Question:&quot;</span>],
)
SUBSET_TASKS = [CustomSubsetTask(name=<span class="hljs-string">f&quot;mytask:<span class="hljs-subst">{subset}</span>&quot;</span>, hf_subset=subset) <span class="hljs-keyword">for</span> subset <span class="hljs-keyword">in</span> SAMPLE_SUBSETS]`,wrap:!1}}),O=new U({props:{title:"Step 5: Add Tasks to the Table",local:"step-5-add-tasks-to-the-table",headingTag:"h3"}}),et=new d({props:{code:"JTIzJTIwU1RPUkUlMjBZT1VSJTIwRVZBTFMlMEElMEElMjMlMjBUYXNrcyUyMHdpdGglMjBzdWJzZXRzJTNBJTBBVEFTS1NfVEFCTEUlMjAlM0QlMjBTVUJTRVRfVEFTS1MlMEElMEElMjMlMjBUYXNrcyUyMHdpdGhvdXQlMjBzdWJzZXRzJTNBJTBBJTIzJTIwVEFTS1NfVEFCTEUlMjAlM0QlMjAlNUJ0YXNrJTVE",highlighted:`<span class="hljs-comment"># STORE YOUR EVALS</span>
<span class="hljs-comment"># Tasks with subsets:</span>
TASKS_TABLE = SUBSET_TASKS
<span class="hljs-comment"># Tasks without subsets:</span>
<span class="hljs-comment"># TASKS_TABLE = [task]</span>`,wrap:!1}}),st=new U({props:{title:"Step 6: Creating a requirement file",local:"step-6-creating-a-requirement-file",headingTag:"h3"}}),at=new U({props:{title:"Running Your Custom Task",local:"running-your-custom-task",headingTag:"h2"}}),Mt=new d({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb21tdW5pdHklN0MlN0JjdXN0b21fdGFzayU3RCU3QyU3QmZld3Nob3RzJTdEJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jdXN0b20tdGFza3MlMjAlN0JwYXRoX3RvX3lvdXJfY3VzdG9tX3Rhc2tfZmlsZSU3RA==",highlighted:`lighteval accelerate \\
<span class="hljs-string">&quot;model_name=HuggingFaceH4/zephyr-7b-beta&quot;</span> \\
<span class="hljs-string">&quot;community|{custom_task}|{fewshots}&quot;</span> \\
--custom-tasks {path_to_your_custom_task_file}`,wrap:!1}}),it=new U({props:{title:"Example Usage",local:"example-usage",headingTag:"h3"}}),pt=new d({props:{code:"JTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwemVyby1zaG90JTIwZXZhbHVhdGlvbiUwQWxpZ2h0ZXZhbCUyMGFjY2VsZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJtb2RlbF9uYW1lJTNEb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb21tdW5pdHklN0NteW90aGVydGFzayU3QzAlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWN1c3RvbS10YXNrcyUyMGNvbW11bml0eV90YXNrcyUyRm15X2N1c3RvbV90YXNrLnB5JTBBJTBBJTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwZmV3LXNob3QlMjBldmFsdWF0aW9uJTBBbGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmNvbW11bml0eSU3Q215b3RoZXJ0YXNrJTdDMyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY3VzdG9tLXRhc2tzJTIwY29tbXVuaXR5X3Rhc2tzJTJGbXlfY3VzdG9tX3Rhc2sucHk=",highlighted:`<span class="hljs-comment"># Run a custom task with zero-shot evaluation</span>
lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
<span class="hljs-string">&quot;community|myothertask|0&quot;</span> \\
--custom-tasks community_tasks/my_custom_task.py
<span class="hljs-comment"># Run a custom task with few-shot evaluation</span>
lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
<span class="hljs-string">&quot;community|myothertask|3&quot;</span> \\
--custom-tasks community_tasks/my_custom_task.py`,wrap:!1}}),mt=new ke({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-custom-task.mdx"}}),{c(){u=c("meta"),j=a(),T=c("p"),h=a(),M($.$$.fragment),ut=a(),C=c("p"),C.textContent=se,ct=a(),M(g.$$.fragment),rt=a(),I=c("p"),I.textContent=le,Ut=a(),M(b.$$.fragment),Tt=a(),k=c("p"),k.textContent=ae,Jt=a(),M(Z.$$.fragment),jt=a(),A=c("p"),A.textContent=ne,ht=a(),M(B.$$.fragment),wt=a(),_=c("p"),_.textContent=Me,ft=a(),v=c("p"),v.textContent=ie,dt=a(),M(w.$$.fragment),$t=a(),M(G.$$.fragment),Ct=a(),M(f.$$.fragment),gt=a(),M(S.$$.fragment),It=a(),X=c("p"),X.innerHTML=pe,bt=a(),M(N.$$.fragment),kt=a(),E=c("p"),E.textContent=me,Zt=a(),M(V.$$.fragment),At=a(),M(R.$$.fragment),Bt=a(),Q=c("p"),Q.innerHTML=oe,_t=a(),M(W.$$.fragment),vt=a(),M(q.$$.fragment),Gt=a(),M(x.$$.fragment),St=a(),M(z.$$.fragment),Xt=a(),M(F.$$.fragment),Nt=a(),Y=c("p"),Y.innerHTML=ye,Et=a(),M(H.$$.fragment),Vt=a(),M(L.$$.fragment),Rt=a(),M(D.$$.fragment),Qt=a(),P=c("p"),P.innerHTML=ue,Wt=a(),M(K.$$.fragment),qt=a(),M(O.$$.fragment),xt=a(),tt=c("p"),tt.innerHTML=ce,zt=a(),M(et.$$.fragment),Ft=a(),M(st.$$.fragment),Yt=a(),lt=c("p"),lt.innerHTML=re,Ht=a(),M(at.$$.fragment),Lt=a(),nt=c("p"),nt.textContent=Ue,Dt=a(),M(Mt.$$.fragment),Pt=a(),M(it.$$.fragment),Kt=a(),M(pt.$$.fragment),Ot=a(),M(mt.$$.fragment),te=a(),yt=c("p"),this.h()},l(t){const e=Ie("svelte-u9bgzb",document.head);u=r(e,"META",{name:!0,content:!0}),e.forEach(s),j=n(t),T=r(t,"P",{}),je(T).forEach(s),h=n(t),i($.$$.fragment,t),ut=n(t),C=r(t,"P",{"data-svelte-h":!0}),J(C)!=="svelte-r5gjmm"&&(C.textContent=se),ct=n(t),i(g.$$.fragment,t),rt=n(t),I=r(t,"P",{"data-svelte-h":!0}),J(I)!=="svelte-1m3yy4f"&&(I.textContent=le),Ut=n(t),i(b.$$.fragment,t),Tt=n(t),k=r(t,"P",{"data-svelte-h":!0}),J(k)!=="svelte-5bncqd"&&(k.textContent=ae),Jt=n(t),i(Z.$$.fragment,t),jt=n(t),A=r(t,"P",{"data-svelte-h":!0}),J(A)!=="svelte-shw9at"&&(A.textContent=ne),ht=n(t),i(B.$$.fragment,t),wt=n(t),_=r(t,"P",{"data-svelte-h":!0}),J(_)!=="svelte-17x9tm3"&&(_.textContent=Me),ft=n(t),v=r(t,"P",{"data-svelte-h":!0}),J(v)!=="svelte-c3h151"&&(v.textContent=ie),dt=n(t),i(w.$$.fragment,t),$t=n(t),i(G.$$.fragment,t),Ct=n(t),i(f.$$.fragment,t),gt=n(t),i(S.$$.fragment,t),It=n(t),X=r(t,"P",{"data-svelte-h":!0}),J(X)!=="svelte-esiyg6"&&(X.innerHTML=pe),bt=n(t),i(N.$$.fragment,t),kt=n(t),E=r(t,"P",{"data-svelte-h":!0}),J(E)!=="svelte-dnqoqp"&&(E.textContent=me),Zt=n(t),i(V.$$.fragment,t),At=n(t),i(R.$$.fragment,t),Bt=n(t),Q=r(t,"P",{"data-svelte-h":!0}),J(Q)!=="svelte-58kl57"&&(Q.innerHTML=oe),_t=n(t),i(W.$$.fragment,t),vt=n(t),i(q.$$.fragment,t),Gt=n(t),i(x.$$.fragment,t),St=n(t),i(z.$$.fragment,t),Xt=n(t),i(F.$$.fragment,t),Nt=n(t),Y=r(t,"P",{"data-svelte-h":!0}),J(Y)!=="svelte-1he5l3l"&&(Y.innerHTML=ye),Et=n(t),i(H.$$.fragment,t),Vt=n(t),i(L.$$.fragment,t),Rt=n(t),i(D.$$.fragment,t),Qt=n(t),P=r(t,"P",{"data-svelte-h":!0}),J(P)!=="svelte-2g4h75"&&(P.innerHTML=ue),Wt=n(t),i(K.$$.fragment,t),qt=n(t),i(O.$$.fragment,t),xt=n(t),tt=r(t,"P",{"data-svelte-h":!0}),J(tt)!=="svelte-7umcxy"&&(tt.innerHTML=ce),zt=n(t),i(et.$$.fragment,t),Ft=n(t),i(st.$$.fragment,t),Yt=n(t),lt=r(t,"P",{"data-svelte-h":!0}),J(lt)!=="svelte-c0uxql"&&(lt.innerHTML=re),Ht=n(t),i(at.$$.fragment,t),Lt=n(t),nt=r(t,"P",{"data-svelte-h":!0}),J(nt)!=="svelte-1yzen0k"&&(nt.textContent=Ue),Dt=n(t),i(Mt.$$.fragment,t),Pt=n(t),i(it.$$.fragment,t),Kt=n(t),i(pt.$$.fragment,t),Ot=n(t),i(mt.$$.fragment,t),te=n(t),yt=r(t,"P",{}),je(yt).forEach(s),this.h()},h(){he(u,"name","hf:doc:metadata"),he(u,"content",_e)},m(t,e){be(document.head,u),l(t,j,e),l(t,T,e),l(t,h,e),p($,t,e),l(t,ut,e),l(t,C,e),l(t,ct,e),p(g,t,e),l(t,rt,e),l(t,I,e),l(t,Ut,e),p(b,t,e),l(t,Tt,e),l(t,k,e),l(t,Jt,e),p(Z,t,e),l(t,jt,e),l(t,A,e),l(t,ht,e),p(B,t,e),l(t,wt,e),l(t,_,e),l(t,ft,e),l(t,v,e),l(t,dt,e),p(w,t,e),l(t,$t,e),p(G,t,e),l(t,Ct,e),p(f,t,e),l(t,gt,e),p(S,t,e),l(t,It,e),l(t,X,e),l(t,bt,e),p(N,t,e),l(t,kt,e),l(t,E,e),l(t,Zt,e),p(V,t,e),l(t,At,e),p(R,t,e),l(t,Bt,e),l(t,Q,e),l(t,_t,e),p(W,t,e),l(t,vt,e),p(q,t,e),l(t,Gt,e),p(x,t,e),l(t,St,e),p(z,t,e),l(t,Xt,e),p(F,t,e),l(t,Nt,e),l(t,Y,e),l(t,Et,e),p(H,t,e),l(t,Vt,e),p(L,t,e),l(t,Rt,e),p(D,t,e),l(t,Qt,e),l(t,P,e),l(t,Wt,e),p(K,t,e),l(t,qt,e),p(O,t,e),l(t,xt,e),l(t,tt,e),l(t,zt,e),p(et,t,e),l(t,Ft,e),p(st,t,e),l(t,Yt,e),l(t,lt,e),l(t,Ht,e),p(at,t,e),l(t,Lt,e),l(t,nt,e),l(t,Dt,e),p(Mt,t,e),l(t,Pt,e),p(it,t,e),l(t,Kt,e),p(pt,t,e),l(t,Ot,e),p(mt,t,e),l(t,te,e),l(t,yt,e),ee=!0},p(t,[e]){const Te={};e&2&&(Te.$$scope={dirty:e,ctx:t}),w.$set(Te);const Je={};e&2&&(Je.$$scope={dirty:e,ctx:t}),f.$set(Je)},i(t){ee||(m($.$$.fragment,t),m(g.$$.fragment,t),m(b.$$.fragment,t),m(Z.$$.fragment,t),m(B.$$.fragment,t),m(w.$$.fragment,t),m(G.$$.fragment,t),m(f.$$.fragment,t),m(S.$$.fragment,t),m(N.$$.fragment,t),m(V.$$.fragment,t),m(R.$$.fragment,t),m(W.$$.fragment,t),m(q.$$.fragment,t),m(x.$$.fragment,t),m(z.$$.fragment,t),m(F.$$.fragment,t),m(H.$$.fragment,t),m(L.$$.fragment,t),m(D.$$.fragment,t),m(K.$$.fragment,t),m(O.$$.fragment,t),m(et.$$.fragment,t),m(st.$$.fragment,t),m(at.$$.fragment,t),m(Mt.$$.fragment,t),m(it.$$.fragment,t),m(pt.$$.fragment,t),m(mt.$$.fragment,t),ee=!0)},o(t){o($.$$.fragment,t),o(g.$$.fragment,t),o(b.$$.fragment,t),o(Z.$$.fragment,t),o(B.$$.fragment,t),o(w.$$.fragment,t),o(G.$$.fragment,t),o(f.$$.fragment,t),o(S.$$.fragment,t),o(N.$$.fragment,t),o(V.$$.fragment,t),o(R.$$.fragment,t),o(W.$$.fragment,t),o(q.$$.fragment,t),o(x.$$.fragment,t),o(z.$$.fragment,t),o(F.$$.fragment,t),o(H.$$.fragment,t),o(L.$$.fragment,t),o(D.$$.fragment,t),o(K.$$.fragment,t),o(O.$$.fragment,t),o(et.$$.fragment,t),o(st.$$.fragment,t),o(at.$$.fragment,t),o(Mt.$$.fragment,t),o(it.$$.fragment,t),o(pt.$$.fragment,t),o(mt.$$.fragment,t),ee=!1},d(t){t&&(s(j),s(T),s(h),s(ut),s(C),s(ct),s(rt),s(I),s(Ut),s(Tt),s(k),s(Jt),s(jt),s(A),s(ht),s(wt),s(_),s(ft),s(v),s(dt),s($t),s(Ct),s(gt),s(It),s(X),s(bt),s(kt),s(E),s(Zt),s(At),s(Bt),s(Q),s(_t),s(vt),s(Gt),s(St),s(Xt),s(Nt),s(Y),s(Et),s(Vt),s(Rt),s(Qt),s(P),s(Wt),s(qt),s(xt),s(tt),s(zt),s(Ft),s(Yt),s(lt),s(Ht),s(Lt),s(nt),s(Dt),s(Pt),s(Kt),s(Ot),s(te),s(yt)),s(u),y($,t),y(g,t),y(b,t),y(Z,t),y(B,t),y(w,t),y(G,t),y(f,t),y(S,t),y(N,t),y(V,t),y(R,t),y(W,t),y(q,t),y(x,t),y(z,t),y(F,t),y(H,t),y(L,t),y(D,t),y(K,t),y(O,t),y(et,t),y(st,t),y(at,t),y(Mt,t),y(it,t),y(pt,t),y(mt,t)}}}const _e='{"title":"Adding a Custom Task","local":"adding-a-custom-task","sections":[{"title":"Task Categories","local":"task-categories","sections":[{"title":"Core Evaluations","local":"core-evaluations","sections":[],"depth":3},{"title":"Extended Evaluations","local":"extended-evaluations","sections":[],"depth":3},{"title":"Community Evaluations","local":"community-evaluations","sections":[],"depth":3}],"depth":2},{"title":"Step-by-Step Creation of a Custom Task","local":"step-by-step-creation-of-a-custom-task","sections":[{"title":"Step 1: Create the Task File","local":"step-1-create-the-task-file","sections":[],"depth":3},{"title":"Step 2: Define the Prompt Function","local":"step-2-define-the-prompt-function","sections":[],"depth":3},{"title":"Step 3: Choose or Create Metrics","local":"step-3-choose-or-create-metrics","sections":[{"title":"Using Existing Metrics","local":"using-existing-metrics","sections":[],"depth":4},{"title":"Creating Custom Metrics","local":"creating-custom-metrics","sections":[],"depth":4}],"depth":3},{"title":"Step 4: Define Your Task","local":"step-4-define-your-task","sections":[{"title":"Simple Task (No Subsets)","local":"simple-task-no-subsets","sections":[],"depth":4},{"title":"Task with Multiple Subsets","local":"task-with-multiple-subsets","sections":[],"depth":4}],"depth":3},{"title":"Step 5: Add Tasks to the Table","local":"step-5-add-tasks-to-the-table","sections":[],"depth":3},{"title":"Step 6: Creating a requirement file","local":"step-6-creating-a-requirement-file","sections":[],"depth":3}],"depth":2},{"title":"Running Your Custom Task","local":"running-your-custom-task","sections":[{"title":"Example Usage","local":"example-usage","sections":[],"depth":3}],"depth":2}],"depth":1}';function ve(ot){return $e(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ve extends Ce{constructor(u){super(),ge(this,u,ve,Be,de,{})}}export{Ve as component};

Xet Storage Details

Size:
27.3 kB
·
Xet hash:
24673c220ddbe0dcd3802afa8b537cd309ad983a59461aa543b7b21417b15cce

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.