Buckets:

rtrm's picture
download
raw
26.8 kB
import{s as we,n as fe,o as de}from"../chunks/scheduler.7da89386.js";import{S as Ce,i as Ie,g as u,s as a,r as M,A as be,h as c,f as l,c as n,j as he,u as i,x as r,k as ee,y as ge,a as s,v as m,d as p,t as o,w as y}from"../chunks/index.20910acc.js";import{C as h}from"../chunks/CodeBlock.143bd81e.js";import{H as U,E as $e}from"../chunks/getInferenceSnippets.375cdad5.js";function ke(le){let T,pt,it,ot,w,yt,f,se="Lighteval provides a flexible framework for creating custom evaluation tasks. This guide explains how to create and integrate new tasks into the evaluation system.",ut,d,ct,C,ae="Before creating a custom task, consider which category it belongs to:",Ut,I,rt,b,ne=`Core evaluations are evaluations that only require standard logic in their
metrics and processing, and that we will add to our test suite to ensure non-regression through time. They already see high usage in the community.`,Tt,g,Jt,$,Me=`Extended evaluations are evaluations that require custom logic in their
metrics (complex normalization, an LLM as a judge, etc.), that we added to
facilitate the life of users. They already see high usage in the community.`,jt,k,ht,Z,ie="Community evaluations are submissions by the community of new tasks.",wt,A,me="A popular community evaluation can move to become an extended or core evaluation over time.",ft,J,pe='<p>You can find examples of custom tasks in the <a href="https://github.com/huggingface/lighteval/tree/main/community_tasks" rel="nofollow">community_tasks</a> directory.</p>',dt,B,Ct,j,oe=`<p>To contribute your custom task to the Lighteval repository, you would first need
to install the required dev dependencies by running <code>pip install -e .[dev]</code>
and then run <code>pre-commit install</code> to install the pre-commit hooks.</p>`,It,v,bt,G,ye="First, create a Python file under the <code>community_tasks</code> directory.",gt,S,$t,_,ue=`You need to define a prompt function that will convert a line from your
dataset to a document to be used for evaluation.`,kt,X,Zt,N,At,E,ce='You can either use an existing metric (defined in <code>lighteval.metrics.metrics.Metrics</code>) or <a href="adding-a-new-metric">create a custom one</a>.',Bt,V,vt,Q,Gt,R,St,W,_t,q,Xt,x,Ue='You can define a task with or without subsets using <a href="/docs/lighteval/pr_985/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a>.',Nt,z,Et,F,Vt,Y,Qt,H,re=`If you want to create a task with multiple subsets, add them to the
<code>SAMPLE_SUBSETS</code> list and create a task for each subset.`,Rt,L,Wt,D,qt,P,Te="Then you need to add your task to the <code>TASKS_TABLE</code> list.",xt,K,zt,O,Ft,tt,Je=`If your task has requirements, you need to create a <code>requirement.txt</code> file with
only the required dependencies so that anyone can run your task.`,Yt,et,Ht,lt,je="Once your file is created, you can run the evaluation with the following command:",Lt,st,Dt,at,Pt,nt,Kt,Mt,Ot,mt,te;return w=new U({props:{title:"Adding a Custom Task",local:"adding-a-custom-task",headingTag:"h1"}}),d=new U({props:{title:"Task Categories",local:"task-categories",headingTag:"h2"}}),I=new U({props:{title:"Core Evaluations",local:"core-evaluations",headingTag:"h3"}}),g=new U({props:{title:"Extended Evaluations",local:"extended-evaluations",headingTag:"h3"}}),k=new U({props:{title:"Community Evaluations",local:"community-evaluations",headingTag:"h3"}}),B=new U({props:{title:"Step-by-Step Creation of a Custom Task",local:"step-by-step-creation-of-a-custom-task",headingTag:"h2"}}),v=new U({props:{title:"Step 1: Create the Task File",local:"step-1-create-the-task-file",headingTag:"h3"}}),S=new U({props:{title:"Step 2: Define the Prompt Function",local:"step-2-define-the-prompt-function",headingTag:"h3"}}),X=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5yZXF1ZXN0cyUyMGltcG9ydCUyMERvYyUwQSUwQSUyMyUyMERlZmluZSUyMGFzJTIwbWFueSUyMGFzJTIweW91JTIwbmVlZCUyMGZvciUyMHlvdXIlMjBkaWZmZXJlbnQlMjB0YXNrcyUwQWRlZiUyMHByb21wdF9mbihsaW5lJTNBJTIwZGljdCUyQyUyMHRhc2tfbmFtZSUzQSUyMHN0ciklM0ElMEElMjAlMjAlMjAlMjAlMjIlMjIlMjJEZWZpbmVzJTIwaG93JTIwdG8lMjBnbyUyMGZyb20lMjBhJTIwZGF0YXNldCUyMGxpbmUlMjB0byUyMGElMjBkb2MlMjBvYmplY3QuJTBBJTIwJTIwJTIwJTIwRm9sbG93JTIwZXhhbXBsZXMlMjBpbiUyMHNyYyUyRmxpZ2h0ZXZhbCUyRnRhc2tzJTJGZGVmYXVsdF9wcm9tcHRzLnB5JTJDJTIwb3IlMjBnZXQlMjBtb3JlJTIwaW5mbyUwQSUyMCUyMCUyMCUyMGFib3V0JTIwd2hhdCUyMHRoaXMlMjBmdW5jdGlvbiUyMHNob3VsZCUyMGRvJTIwaW4lMjB0aGUlMjBSRUFETUUuJTBBJTIwJTIwJTIwJTIwJTIyJTIyJTIyJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwRG9jKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tfbmFtZSUzRHRhc2tfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHF1ZXJ5JTNEbGluZSU1QiUyMnF1ZXN0aW9uJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hvaWNlcyUzRCU1QmYlMjIlMjAlN0JjJTdEJTIyJTIwZm9yJTIwYyUyMGluJTIwbGluZSU1QiUyMmNob2ljZXMlMjIlNUQlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBnb2xkX2luZGV4JTNEbGluZSU1QiUyMmdvbGQlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.requests <span class="hljs-keyword">import</span> Doc
<span class="hljs-comment"># Define as many as you need for your different tasks</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">prompt_fn</span>(<span class="hljs-params">line: <span class="hljs-built_in">dict</span>, task_name: <span class="hljs-built_in">str</span></span>):
<span class="hljs-string">&quot;&quot;&quot;Defines how to go from a dataset line to a doc object.
Follow examples in src/lighteval/tasks/default_prompts.py, or get more info
about what this function should do in the README.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> Doc(
task_name=task_name,
query=line[<span class="hljs-string">&quot;question&quot;</span>],
choices=[<span class="hljs-string">f&quot; <span class="hljs-subst">{c}</span>&quot;</span> <span class="hljs-keyword">for</span> c <span class="hljs-keyword">in</span> line[<span class="hljs-string">&quot;choices&quot;</span>]],
gold_index=line[<span class="hljs-string">&quot;gold&quot;</span>],
)`,wrap:!1}}),N=new U({props:{title:"Step 3: Choose or Create Metrics",local:"step-3-choose-or-create-metrics",headingTag:"h3"}}),V=new U({props:{title:"Using Existing Metrics",local:"using-existing-metrics",headingTag:"h4"}}),Q=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzJTIwaW1wb3J0JTIwTWV0cmljcyUwQSUwQSUyMyUyMFVzZSUyMGFuJTIwZXhpc3RpbmclMjBtZXRyaWMlMEFtZXRyaWMlMjAlM0QlMjBNZXRyaWNzLkFDQ1VSQUNZ",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics
<span class="hljs-comment"># Use an existing metric</span>
metric = Metrics.ACCURACY`,wrap:!1}}),R=new U({props:{title:"Creating Custom Metrics",local:"creating-custom-metrics",headingTag:"h4"}}),W=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzLnV0aWxzLm1ldHJpY191dGlscyUyMGltcG9ydCUyMFNhbXBsZUxldmVsTWV0cmljJTBBaW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTIybXlfY3VzdG9tX21ldHJpY19uYW1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBjYXRlZ29yeSUzRCUyMmFjY3VyYWN5JTIyJTJDJTBBJTIwJTIwJTIwJTIwc2FtcGxlX2xldmVsX2ZuJTNEbGFtYmRhJTIweCUzQSUyMHglMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGNvbXB1dGUlMjBzY29yZSUyMGZvciUyMG9uZSUyMHNhbXBsZSUwQSUyMCUyMCUyMCUyMGNvcnB1c19sZXZlbF9mbiUzRG5wLm1lYW4lMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGFnZ3JlZ2F0ZSUyMHRoZSUyMHNhbXBsZSUyMG1ldHJpY3MlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics.utils.metric_utils <span class="hljs-keyword">import</span> SampleLevelMetric
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
custom_metric = SampleLevelMetric(
metric_name=<span class="hljs-string">&quot;my_custom_metric_name&quot;</span>,
higher_is_better=<span class="hljs-literal">True</span>,
category=<span class="hljs-string">&quot;accuracy&quot;</span>,
sample_level_fn=<span class="hljs-keyword">lambda</span> x: x, <span class="hljs-comment"># How to compute score for one sample</span>
corpus_level_fn=np.mean, <span class="hljs-comment"># How to aggregate the sample metrics</span>
)`,wrap:!1}}),q=new U({props:{title:"Step 4: Define Your Task",local:"step-4-define-your-task",headingTag:"h3"}}),z=new U({props:{title:"Simple Task (No Subsets)",local:"simple-task-no-subsets",headingTag:"h4"}}),F=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5saWdodGV2YWxfdGFzayUyMGltcG9ydCUyMExpZ2h0ZXZhbFRhc2tDb25maWclMEElMEElMjMlMjBUaGlzJTIwaXMlMjBob3clMjB5b3UlMjBjcmVhdGUlMjBhJTIwc2ltcGxlJTIwdGFzayUyMChsaWtlJTIwSGVsbGFTd2FnKSUyMHdoaWNoJTIwaGFzJTIwb25lJTIwc2luZ2xlJTIwc3Vic2V0JTBBJTIzJTIwYXR0YWNoZWQlMjB0byUyMGl0JTJDJTIwYW5kJTIwb25lJTIwZXZhbHVhdGlvbiUyMHBvc3NpYmxlLiUwQXRhc2slMjAlM0QlMjBMaWdodGV2YWxUYXNrQ29uZmlnKCUwQSUyMCUyMCUyMCUyMG5hbWUlM0QlMjJteW90aGVydGFzayUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdF9mdW5jdGlvbiUzRHByb21wdF9mbiUyQyUyMCUyMCUyMyUyME11c3QlMjBiZSUyMGRlZmluZWQlMjBpbiUyMHRoZSUyMGZpbGUlMjBvciUyMGltcG9ydGVkJTBBJTIwJTIwJTIwJTIwc3VpdGUlM0QlNUIlMjJjb21tdW5pdHklMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBoZl9yZXBvJTNEJTIyeW91cl9kYXRhc2V0X3JlcG9fb25faGYlMjIlMkMlMEElMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0QlMjJkZWZhdWx0JTIyJTJDJTBBJTIwJTIwJTIwJTIwaGZfYXZhaWxfc3BsaXRzJTNEJTVCJTIydHJhaW4lMjIlMkMlMjAlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbHVhdGlvbl9zcGxpdHMlM0QlNUIlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZmV3X3Nob3RzX3NwbGl0JTNEJTIydHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBmZXdfc2hvdHNfc2VsZWN0JTNEJTIycmFuZG9tX3NhbXBsaW5nX2Zyb21fdHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBtZXRyaWNzJTNEJTVCbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRpb25fc2l6ZSUzRDI1NiUyQyUwQSUyMCUyMCUyMCUyMHN0b3Bfc2VxdWVuY2UlM0QlNUIlMjIlNUNuJTIyJTJDJTIwJTIyUXVlc3Rpb24lM0ElMjIlNUQlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.lighteval_task <span class="hljs-keyword">import</span> LightevalTaskConfig
<span class="hljs-comment"># This is how you create a simple task (like HellaSwag) which has one single subset</span>
<span class="hljs-comment"># attached to it, and one evaluation possible.</span>
task = LightevalTaskConfig(
name=<span class="hljs-string">&quot;myothertask&quot;</span>,
prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span>
suite=[<span class="hljs-string">&quot;community&quot;</span>],
hf_repo=<span class="hljs-string">&quot;your_dataset_repo_on_hf&quot;</span>,
hf_subset=<span class="hljs-string">&quot;default&quot;</span>,
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>, <span class="hljs-string">&quot;test&quot;</span>],
evaluation_splits=[<span class="hljs-string">&quot;test&quot;</span>],
few_shots_split=<span class="hljs-string">&quot;train&quot;</span>,
few_shots_select=<span class="hljs-string">&quot;random_sampling_from_train&quot;</span>,
metrics=[metric], <span class="hljs-comment"># Select your metric in Metrics</span>
generation_size=<span class="hljs-number">256</span>,
stop_sequence=[<span class="hljs-string">&quot;\\n&quot;</span>, <span class="hljs-string">&quot;Question:&quot;</span>],
)`,wrap:!1}}),Y=new U({props:{title:"Task with Multiple Subsets",local:"task-with-multiple-subsets",headingTag:"h4"}}),L=new h({props:{code:"U0FNUExFX1NVQlNFVFMlMjAlM0QlMjAlNUIlMjJzdWJzZXQxJTIyJTJDJTIwJTIyc3Vic2V0MiUyMiUyQyUyMCUyMnN1YnNldDMlMjIlNUQlMjAlMjAlMjMlMjBMaXN0JTIwb2YlMjBhbGwlMjB0aGUlMjBzdWJzZXRzJTIwdG8lMjB1c2UlMjBmb3IlMjB0aGlzJTIwZXZhbCUwQSUwQWNsYXNzJTIwQ3VzdG9tU3Vic2V0VGFzayhMaWdodGV2YWxUYXNrQ29uZmlnKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBuYW1lJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaGZfc3Vic2V0JTJDJTBBJTIwJTIwJTIwJTIwKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZSUzRG5hbWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0RoZl9zdWJzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHRfZnVuY3Rpb24lM0Rwcm9tcHRfZm4lMkMlMjAlMjAlMjMlMjBNdXN0JTIwYmUlMjBkZWZpbmVkJTIwaW4lMjB0aGUlMjBmaWxlJTIwb3IlMjBpbXBvcnRlZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGhmX3JlcG8lM0QlMjJ5b3VyX2RhdGFzZXRfbmFtZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1ldHJpY3MlM0QlNUJjdXN0b21fbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUyMG9yJTIwdXNlJTIweW91ciUyMGN1c3RvbV9tZXRyaWMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9hdmFpbF9zcGxpdHMlM0QlNUIlMjJ0cmFpbiUyMiUyQyUyMCUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3NwbGl0cyUzRCU1QiUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZXdfc2hvdHNfc3BsaXQlM0QlMjJ0cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZld19zaG90c19zZWxlY3QlM0QlMjJyYW5kb21fc2FtcGxpbmdfZnJvbV90cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1aXRlJTNEJTVCJTIyY29tbXVuaXR5JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZ2VuZXJhdGlvbl9zaXplJTNEMjU2JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RvcF9zZXF1ZW5jZSUzRCU1QiUyMiU1Q24lMjIlMkMlMjAlMjJRdWVzdGlvbiUzQSUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCklMEElMEFTVUJTRVRfVEFTS1MlMjAlM0QlMjAlNUJDdXN0b21TdWJzZXRUYXNrKG5hbWUlM0RmJTIybXl0YXNrJTNBJTdCc3Vic2V0JTdEJTIyJTJDJTIwaGZfc3Vic2V0JTNEc3Vic2V0KSUyMGZvciUyMHN1YnNldCUyMGluJTIwU0FNUExFX1NVQlNFVFMlNUQ=",highlighted:`SAMPLE_SUBSETS = [<span class="hljs-string">&quot;subset1&quot;</span>, <span class="hljs-string">&quot;subset2&quot;</span>, <span class="hljs-string">&quot;subset3&quot;</span>] <span class="hljs-comment"># List of all the subsets to use for this eval</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomSubsetTask</span>(<span class="hljs-title class_ inherited__">LightevalTaskConfig</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">
self,
name,
hf_subset,
</span>):
<span class="hljs-built_in">super</span>().__init__(
name=name,
hf_subset=hf_subset,
prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span>
hf_repo=<span class="hljs-string">&quot;your_dataset_name&quot;</span>,
metrics=[custom_metric], <span class="hljs-comment"># Select your metric in Metrics or use your custom_metric</span>
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>, <span class="hljs-string">&quot;test&quot;</span>],
evaluation_splits=[<span class="hljs-string">&quot;test&quot;</span>],
few_shots_split=<span class="hljs-string">&quot;train&quot;</span>,
few_shots_select=<span class="hljs-string">&quot;random_sampling_from_train&quot;</span>,
suite=[<span class="hljs-string">&quot;community&quot;</span>],
generation_size=<span class="hljs-number">256</span>,
stop_sequence=[<span class="hljs-string">&quot;\\n&quot;</span>, <span class="hljs-string">&quot;Question:&quot;</span>],
)
SUBSET_TASKS = [CustomSubsetTask(name=<span class="hljs-string">f&quot;mytask:<span class="hljs-subst">{subset}</span>&quot;</span>, hf_subset=subset) <span class="hljs-keyword">for</span> subset <span class="hljs-keyword">in</span> SAMPLE_SUBSETS]`,wrap:!1}}),D=new U({props:{title:"Step 5: Add Tasks to the Table",local:"step-5-add-tasks-to-the-table",headingTag:"h3"}}),K=new h({props:{code:"JTIzJTIwU1RPUkUlMjBZT1VSJTIwRVZBTFMlMEElMEElMjMlMjBUYXNrcyUyMHdpdGglMjBzdWJzZXRzJTNBJTBBVEFTS1NfVEFCTEUlMjAlM0QlMjBTVUJTRVRfVEFTS1MlMEElMEElMjMlMjBUYXNrcyUyMHdpdGhvdXQlMjBzdWJzZXRzJTNBJTBBJTIzJTIwVEFTS1NfVEFCTEUlMjAlM0QlMjAlNUJ0YXNrJTVE",highlighted:`<span class="hljs-comment"># STORE YOUR EVALS</span>
<span class="hljs-comment"># Tasks with subsets:</span>
TASKS_TABLE = SUBSET_TASKS
<span class="hljs-comment"># Tasks without subsets:</span>
<span class="hljs-comment"># TASKS_TABLE = [task]</span>`,wrap:!1}}),O=new U({props:{title:"Step 6: Creating a requirement file",local:"step-6-creating-a-requirement-file",headingTag:"h3"}}),et=new U({props:{title:"Running Your Custom Task",local:"running-your-custom-task",headingTag:"h2"}}),st=new h({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb21tdW5pdHklN0MlN0JjdXN0b21fdGFzayU3RCU3QyU3QmZld3Nob3RzJTdEJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jdXN0b20tdGFza3MlMjAlN0JwYXRoX3RvX3lvdXJfY3VzdG9tX3Rhc2tfZmlsZSU3RA==",highlighted:`lighteval accelerate \\
<span class="hljs-string">&quot;model_name=HuggingFaceH4/zephyr-7b-beta&quot;</span> \\
<span class="hljs-string">&quot;community|{custom_task}|{fewshots}&quot;</span> \\
--custom-tasks {path_to_your_custom_task_file}`,wrap:!1}}),at=new U({props:{title:"Example Usage",local:"example-usage",headingTag:"h3"}}),nt=new h({props:{code:"JTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwemVyby1zaG90JTIwZXZhbHVhdGlvbiUwQWxpZ2h0ZXZhbCUyMGFjY2VsZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJtb2RlbF9uYW1lJTNEb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb21tdW5pdHklN0NteW90aGVydGFzayU3QzAlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWN1c3RvbS10YXNrcyUyMGNvbW11bml0eV90YXNrcyUyRm15X2N1c3RvbV90YXNrLnB5JTBBJTBBJTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwZmV3LXNob3QlMjBldmFsdWF0aW9uJTBBbGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmNvbW11bml0eSU3Q215b3RoZXJ0YXNrJTdDMyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY3VzdG9tLXRhc2tzJTIwY29tbXVuaXR5X3Rhc2tzJTJGbXlfY3VzdG9tX3Rhc2sucHk=",highlighted:`<span class="hljs-comment"># Run a custom task with zero-shot evaluation</span>
lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
<span class="hljs-string">&quot;community|myothertask|0&quot;</span> \\
--custom-tasks community_tasks/my_custom_task.py
<span class="hljs-comment"># Run a custom task with few-shot evaluation</span>
lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
<span class="hljs-string">&quot;community|myothertask|3&quot;</span> \\
--custom-tasks community_tasks/my_custom_task.py`,wrap:!1}}),Mt=new $e({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-custom-task.mdx"}}),{c(){T=u("meta"),pt=a(),it=u("p"),ot=a(),M(w.$$.fragment),yt=a(),f=u("p"),f.textContent=se,ut=a(),M(d.$$.fragment),ct=a(),C=u("p"),C.textContent=ae,Ut=a(),M(I.$$.fragment),rt=a(),b=u("p"),b.textContent=ne,Tt=a(),M(g.$$.fragment),Jt=a(),$=u("p"),$.textContent=Me,jt=a(),M(k.$$.fragment),ht=a(),Z=u("p"),Z.textContent=ie,wt=a(),A=u("p"),A.textContent=me,ft=a(),J=u("blockquote"),J.innerHTML=pe,dt=a(),M(B.$$.fragment),Ct=a(),j=u("blockquote"),j.innerHTML=oe,It=a(),M(v.$$.fragment),bt=a(),G=u("p"),G.innerHTML=ye,gt=a(),M(S.$$.fragment),$t=a(),_=u("p"),_.textContent=ue,kt=a(),M(X.$$.fragment),Zt=a(),M(N.$$.fragment),At=a(),E=u("p"),E.innerHTML=ce,Bt=a(),M(V.$$.fragment),vt=a(),M(Q.$$.fragment),Gt=a(),M(R.$$.fragment),St=a(),M(W.$$.fragment),_t=a(),M(q.$$.fragment),Xt=a(),x=u("p"),x.innerHTML=Ue,Nt=a(),M(z.$$.fragment),Et=a(),M(F.$$.fragment),Vt=a(),M(Y.$$.fragment),Qt=a(),H=u("p"),H.innerHTML=re,Rt=a(),M(L.$$.fragment),Wt=a(),M(D.$$.fragment),qt=a(),P=u("p"),P.innerHTML=Te,xt=a(),M(K.$$.fragment),zt=a(),M(O.$$.fragment),Ft=a(),tt=u("p"),tt.innerHTML=Je,Yt=a(),M(et.$$.fragment),Ht=a(),lt=u("p"),lt.textContent=je,Lt=a(),M(st.$$.fragment),Dt=a(),M(at.$$.fragment),Pt=a(),M(nt.$$.fragment),Kt=a(),M(Mt.$$.fragment),Ot=a(),mt=u("p"),this.h()},l(t){const e=be("svelte-u9bgzb",document.head);T=c(e,"META",{name:!0,content:!0}),e.forEach(l),pt=n(t),it=c(t,"P",{}),he(it).forEach(l),ot=n(t),i(w.$$.fragment,t),yt=n(t),f=c(t,"P",{"data-svelte-h":!0}),r(f)!=="svelte-r5gjmm"&&(f.textContent=se),ut=n(t),i(d.$$.fragment,t),ct=n(t),C=c(t,"P",{"data-svelte-h":!0}),r(C)!=="svelte-1m3yy4f"&&(C.textContent=ae),Ut=n(t),i(I.$$.fragment,t),rt=n(t),b=c(t,"P",{"data-svelte-h":!0}),r(b)!=="svelte-5bncqd"&&(b.textContent=ne),Tt=n(t),i(g.$$.fragment,t),Jt=n(t),$=c(t,"P",{"data-svelte-h":!0}),r($)!=="svelte-shw9at"&&($.textContent=Me),jt=n(t),i(k.$$.fragment,t),ht=n(t),Z=c(t,"P",{"data-svelte-h":!0}),r(Z)!=="svelte-17x9tm3"&&(Z.textContent=ie),wt=n(t),A=c(t,"P",{"data-svelte-h":!0}),r(A)!=="svelte-c3h151"&&(A.textContent=me),ft=n(t),J=c(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),r(J)!=="svelte-1y7m0b1"&&(J.innerHTML=pe),dt=n(t),i(B.$$.fragment,t),Ct=n(t),j=c(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),r(j)!=="svelte-elnchd"&&(j.innerHTML=oe),It=n(t),i(v.$$.fragment,t),bt=n(t),G=c(t,"P",{"data-svelte-h":!0}),r(G)!=="svelte-esiyg6"&&(G.innerHTML=ye),gt=n(t),i(S.$$.fragment,t),$t=n(t),_=c(t,"P",{"data-svelte-h":!0}),r(_)!=="svelte-dnqoqp"&&(_.textContent=ue),kt=n(t),i(X.$$.fragment,t),Zt=n(t),i(N.$$.fragment,t),At=n(t),E=c(t,"P",{"data-svelte-h":!0}),r(E)!=="svelte-58kl57"&&(E.innerHTML=ce),Bt=n(t),i(V.$$.fragment,t),vt=n(t),i(Q.$$.fragment,t),Gt=n(t),i(R.$$.fragment,t),St=n(t),i(W.$$.fragment,t),_t=n(t),i(q.$$.fragment,t),Xt=n(t),x=c(t,"P",{"data-svelte-h":!0}),r(x)!=="svelte-19fec27"&&(x.innerHTML=Ue),Nt=n(t),i(z.$$.fragment,t),Et=n(t),i(F.$$.fragment,t),Vt=n(t),i(Y.$$.fragment,t),Qt=n(t),H=c(t,"P",{"data-svelte-h":!0}),r(H)!=="svelte-2g4h75"&&(H.innerHTML=re),Rt=n(t),i(L.$$.fragment,t),Wt=n(t),i(D.$$.fragment,t),qt=n(t),P=c(t,"P",{"data-svelte-h":!0}),r(P)!=="svelte-7umcxy"&&(P.innerHTML=Te),xt=n(t),i(K.$$.fragment,t),zt=n(t),i(O.$$.fragment,t),Ft=n(t),tt=c(t,"P",{"data-svelte-h":!0}),r(tt)!=="svelte-c0uxql"&&(tt.innerHTML=Je),Yt=n(t),i(et.$$.fragment,t),Ht=n(t),lt=c(t,"P",{"data-svelte-h":!0}),r(lt)!=="svelte-1yzen0k"&&(lt.textContent=je),Lt=n(t),i(st.$$.fragment,t),Dt=n(t),i(at.$$.fragment,t),Pt=n(t),i(nt.$$.fragment,t),Kt=n(t),i(Mt.$$.fragment,t),Ot=n(t),mt=c(t,"P",{}),he(mt).forEach(l),this.h()},h(){ee(T,"name","hf:doc:metadata"),ee(T,"content",Ze),ee(J,"class","tip"),ee(j,"class","warning")},m(t,e){ge(document.head,T),s(t,pt,e),s(t,it,e),s(t,ot,e),m(w,t,e),s(t,yt,e),s(t,f,e),s(t,ut,e),m(d,t,e),s(t,ct,e),s(t,C,e),s(t,Ut,e),m(I,t,e),s(t,rt,e),s(t,b,e),s(t,Tt,e),m(g,t,e),s(t,Jt,e),s(t,$,e),s(t,jt,e),m(k,t,e),s(t,ht,e),s(t,Z,e),s(t,wt,e),s(t,A,e),s(t,ft,e),s(t,J,e),s(t,dt,e),m(B,t,e),s(t,Ct,e),s(t,j,e),s(t,It,e),m(v,t,e),s(t,bt,e),s(t,G,e),s(t,gt,e),m(S,t,e),s(t,$t,e),s(t,_,e),s(t,kt,e),m(X,t,e),s(t,Zt,e),m(N,t,e),s(t,At,e),s(t,E,e),s(t,Bt,e),m(V,t,e),s(t,vt,e),m(Q,t,e),s(t,Gt,e),m(R,t,e),s(t,St,e),m(W,t,e),s(t,_t,e),m(q,t,e),s(t,Xt,e),s(t,x,e),s(t,Nt,e),m(z,t,e),s(t,Et,e),m(F,t,e),s(t,Vt,e),m(Y,t,e),s(t,Qt,e),s(t,H,e),s(t,Rt,e),m(L,t,e),s(t,Wt,e),m(D,t,e),s(t,qt,e),s(t,P,e),s(t,xt,e),m(K,t,e),s(t,zt,e),m(O,t,e),s(t,Ft,e),s(t,tt,e),s(t,Yt,e),m(et,t,e),s(t,Ht,e),s(t,lt,e),s(t,Lt,e),m(st,t,e),s(t,Dt,e),m(at,t,e),s(t,Pt,e),m(nt,t,e),s(t,Kt,e),m(Mt,t,e),s(t,Ot,e),s(t,mt,e),te=!0},p:fe,i(t){te||(p(w.$$.fragment,t),p(d.$$.fragment,t),p(I.$$.fragment,t),p(g.$$.fragment,t),p(k.$$.fragment,t),p(B.$$.fragment,t),p(v.$$.fragment,t),p(S.$$.fragment,t),p(X.$$.fragment,t),p(N.$$.fragment,t),p(V.$$.fragment,t),p(Q.$$.fragment,t),p(R.$$.fragment,t),p(W.$$.fragment,t),p(q.$$.fragment,t),p(z.$$.fragment,t),p(F.$$.fragment,t),p(Y.$$.fragment,t),p(L.$$.fragment,t),p(D.$$.fragment,t),p(K.$$.fragment,t),p(O.$$.fragment,t),p(et.$$.fragment,t),p(st.$$.fragment,t),p(at.$$.fragment,t),p(nt.$$.fragment,t),p(Mt.$$.fragment,t),te=!0)},o(t){o(w.$$.fragment,t),o(d.$$.fragment,t),o(I.$$.fragment,t),o(g.$$.fragment,t),o(k.$$.fragment,t),o(B.$$.fragment,t),o(v.$$.fragment,t),o(S.$$.fragment,t),o(X.$$.fragment,t),o(N.$$.fragment,t),o(V.$$.fragment,t),o(Q.$$.fragment,t),o(R.$$.fragment,t),o(W.$$.fragment,t),o(q.$$.fragment,t),o(z.$$.fragment,t),o(F.$$.fragment,t),o(Y.$$.fragment,t),o(L.$$.fragment,t),o(D.$$.fragment,t),o(K.$$.fragment,t),o(O.$$.fragment,t),o(et.$$.fragment,t),o(st.$$.fragment,t),o(at.$$.fragment,t),o(nt.$$.fragment,t),o(Mt.$$.fragment,t),te=!1},d(t){t&&(l(pt),l(it),l(ot),l(yt),l(f),l(ut),l(ct),l(C),l(Ut),l(rt),l(b),l(Tt),l(Jt),l($),l(jt),l(ht),l(Z),l(wt),l(A),l(ft),l(J),l(dt),l(Ct),l(j),l(It),l(bt),l(G),l(gt),l($t),l(_),l(kt),l(Zt),l(At),l(E),l(Bt),l(vt),l(Gt),l(St),l(_t),l(Xt),l(x),l(Nt),l(Et),l(Vt),l(Qt),l(H),l(Rt),l(Wt),l(qt),l(P),l(xt),l(zt),l(Ft),l(tt),l(Yt),l(Ht),l(lt),l(Lt),l(Dt),l(Pt),l(Kt),l(Ot),l(mt)),l(T),y(w,t),y(d,t),y(I,t),y(g,t),y(k,t),y(B,t),y(v,t),y(S,t),y(X,t),y(N,t),y(V,t),y(Q,t),y(R,t),y(W,t),y(q,t),y(z,t),y(F,t),y(Y,t),y(L,t),y(D,t),y(K,t),y(O,t),y(et,t),y(st,t),y(at,t),y(nt,t),y(Mt,t)}}}const Ze='{"title":"Adding a Custom Task","local":"adding-a-custom-task","sections":[{"title":"Task Categories","local":"task-categories","sections":[{"title":"Core Evaluations","local":"core-evaluations","sections":[],"depth":3},{"title":"Extended Evaluations","local":"extended-evaluations","sections":[],"depth":3},{"title":"Community Evaluations","local":"community-evaluations","sections":[],"depth":3}],"depth":2},{"title":"Step-by-Step Creation of a Custom Task","local":"step-by-step-creation-of-a-custom-task","sections":[{"title":"Step 1: Create the Task File","local":"step-1-create-the-task-file","sections":[],"depth":3},{"title":"Step 2: Define the Prompt Function","local":"step-2-define-the-prompt-function","sections":[],"depth":3},{"title":"Step 3: Choose or Create Metrics","local":"step-3-choose-or-create-metrics","sections":[{"title":"Using Existing Metrics","local":"using-existing-metrics","sections":[],"depth":4},{"title":"Creating Custom Metrics","local":"creating-custom-metrics","sections":[],"depth":4}],"depth":3},{"title":"Step 4: Define Your Task","local":"step-4-define-your-task","sections":[{"title":"Simple Task (No Subsets)","local":"simple-task-no-subsets","sections":[],"depth":4},{"title":"Task with Multiple Subsets","local":"task-with-multiple-subsets","sections":[],"depth":4}],"depth":3},{"title":"Step 5: Add Tasks to the Table","local":"step-5-add-tasks-to-the-table","sections":[],"depth":3},{"title":"Step 6: Creating a requirement file","local":"step-6-creating-a-requirement-file","sections":[],"depth":3}],"depth":2},{"title":"Running Your Custom Task","local":"running-your-custom-task","sections":[{"title":"Example Usage","local":"example-usage","sections":[],"depth":3}],"depth":2}],"depth":1}';function Ae(le){return de(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class _e extends Ce{constructor(T){super(),Ie(this,T,Ae,ke,we,{})}}export{_e as component};

Xet Storage Details

Size:
26.8 kB
·
Xet hash:
5fc826081404dbd7d928c3bd1a90c51f4b380500443ec9581ddcf6cd9a8df1e9

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.