Buckets:

download
raw
23 kB
import{s as Dt,n as Pt,o as Kt}from"../chunks/scheduler.3a17fb72.js";import{S as Ot,i as te,e as r,s as a,c as M,h as ee,a as c,d as s,b as n,f as Lt,g as i,j as u,k as Rt,l as se,m as l,n as y,t as p,o as m,p as U}from"../chunks/index.093f8863.js";import{C as le,H as o,E as ae}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.6c54d4d9.js";import{C as J}from"../chunks/CodeBlock.248c3976.js";function ne(Vt){let T,tt,K,et,h,st,w,lt,f,Et="Lighteval provides a flexible framework for creating custom evaluation tasks. This guide explains how to create and integrate new tasks into the evaluation system.",at,d,nt,j,Nt=`<p>To contribute your task to the Lighteval repository, you would first need
to install the required dev dependencies by running <code>pip install -e .[dev]</code>
and then run <code>pre-commit install</code> to install the pre-commit hooks.</p>`,Mt,C,it,b,vt=`First, create a Python file or directory under the <code>src/lighteval/tasks/tasks</code> directory.
A directory is helpfull if you need to split your file into multiple ones, just make sure to have one of the file named <code>main.py</code>.`,yt,g,pt,$,Wt=`You need to define a prompt function that will convert a line from your
dataset to a document to be used for evaluation.`,mt,I,Ut,B,ot,k,zt='You can either use an existing metric (defined in <code>lighteval.metrics.metrics.Metrics</code>) or <a href="adding-a-new-metric">create a custom one</a>.',rt,Z,ct,A,ut,G,Tt,S,jt,_,Jt,X,qt='You can define a task with or without subsets using <a href="/docs/lighteval/pr_1233/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a>.',ht,Q,wt,R,ft,V,dt,E,Yt=`If you want to create a task with multiple subsets, add them to the
<code>SAMPLE_SUBSETS</code> list and create a task for each subset.`,Ct,N,bt,v,gt,W,Ft="Then you need to add your task to the <code>TASKS_TABLE</code> list.",$t,z,It,q,Bt,Y,xt=`If your task has requirements, you need to create a <code>requirement.txt</code> file with
only the required dependencies so that anyone can run your task.`,kt,F,Zt,x,Ht="Once your file is created, you can run the evaluation with the following command:",At,H,Gt,L,St,D,_t,P,Xt,O,Qt;return h=new le({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new o({props:{title:"Adding a Custom Task",local:"adding-a-custom-task",headingTag:"h1"}}),d=new o({props:{title:"Step-by-Step Creation of a Task",local:"step-by-step-creation-of-a-task",headingTag:"h2"}}),C=new o({props:{title:"Step 1: Create the Task File",local:"step-1-create-the-task-file",headingTag:"h3"}}),g=new o({props:{title:"Step 2: Define the Prompt Function",local:"step-2-define-the-prompt-function",headingTag:"h3"}}),I=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5yZXF1ZXN0cyUyMGltcG9ydCUyMERvYyUwQSUwQSUyMyUyMERlZmluZSUyMGFzJTIwbWFueSUyMGFzJTIweW91JTIwbmVlZCUyMGZvciUyMHlvdXIlMjBkaWZmZXJlbnQlMjB0YXNrcyUwQWRlZiUyMHByb21wdF9mbihsaW5lJTNBJTIwZGljdCUyQyUyMHRhc2tfbmFtZSUzQSUyMHN0ciklM0ElMEElMjAlMjAlMjAlMjAlMjIlMjIlMjJEZWZpbmVzJTIwaG93JTIwdG8lMjBnbyUyMGZyb20lMjBhJTIwZGF0YXNldCUyMGxpbmUlMjB0byUyMGElMjBkb2MlMjBvYmplY3QuJTBBJTIwJTIwJTIwJTIwRm9sbG93JTIwZXhhbXBsZXMlMjBpbiUyMHNyYyUyRmxpZ2h0ZXZhbCUyRnRhc2tzJTJGZGVmYXVsdF9wcm9tcHRzLnB5JTJDJTIwb3IlMjBnZXQlMjBtb3JlJTIwaW5mbyUwQSUyMCUyMCUyMCUyMGFib3V0JTIwd2hhdCUyMHRoaXMlMjBmdW5jdGlvbiUyMHNob3VsZCUyMGRvJTIwaW4lMjB0aGUlMjBSRUFETUUuJTBBJTIwJTIwJTIwJTIwJTIyJTIyJTIyJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwRG9jKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tfbmFtZSUzRHRhc2tfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHF1ZXJ5JTNEbGluZSU1QiUyMnF1ZXN0aW9uJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hvaWNlcyUzRCU1QmYlMjIlMjAlN0JjJTdEJTIyJTIwZm9yJTIwYyUyMGluJTIwbGluZSU1QiUyMmNob2ljZXMlMjIlNUQlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBnb2xkX2luZGV4JTNEbGluZSU1QiUyMmdvbGQlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.requests <span class="hljs-keyword">import</span> Doc
<span class="hljs-comment"># Define as many as you need for your different tasks</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">prompt_fn</span>(<span class="hljs-params">line: <span class="hljs-built_in">dict</span>, task_name: <span class="hljs-built_in">str</span></span>):
<span class="hljs-string">&quot;&quot;&quot;Defines how to go from a dataset line to a doc object.
Follow examples in src/lighteval/tasks/default_prompts.py, or get more info
about what this function should do in the README.
&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> Doc(
task_name=task_name,
query=line[<span class="hljs-string">&quot;question&quot;</span>],
choices=[<span class="hljs-string">f&quot; <span class="hljs-subst">{c}</span>&quot;</span> <span class="hljs-keyword">for</span> c <span class="hljs-keyword">in</span> line[<span class="hljs-string">&quot;choices&quot;</span>]],
gold_index=line[<span class="hljs-string">&quot;gold&quot;</span>],
)`,wrap:!1}}),B=new o({props:{title:"Step 3: Choose or Create Metrics",local:"step-3-choose-or-create-metrics",headingTag:"h3"}}),Z=new o({props:{title:"Using Existing Metrics",local:"using-existing-metrics",headingTag:"h4"}}),A=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzJTIwaW1wb3J0JTIwTWV0cmljcyUwQSUwQSUyMyUyMFVzZSUyMGFuJTIwZXhpc3RpbmclMjBtZXRyaWMlMEFtZXRyaWMlMjAlM0QlMjBNZXRyaWNzLkFDQ1VSQUNZ",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics
<span class="hljs-comment"># Use an existing metric</span>
metric = Metrics.ACCURACY`,wrap:!1}}),G=new o({props:{title:"Creating Custom Metrics",local:"creating-custom-metrics",headingTag:"h4"}}),S=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzLnV0aWxzLm1ldHJpY191dGlscyUyMGltcG9ydCUyMFNhbXBsZUxldmVsTWV0cmljJTBBaW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTIybXlfY3VzdG9tX21ldHJpY19uYW1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBjYXRlZ29yeSUzRCUyMmFjY3VyYWN5JTIyJTJDJTBBJTIwJTIwJTIwJTIwc2FtcGxlX2xldmVsX2ZuJTNEbGFtYmRhJTIweCUzQSUyMHglMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGNvbXB1dGUlMjBzY29yZSUyMGZvciUyMG9uZSUyMHNhbXBsZSUwQSUyMCUyMCUyMCUyMGNvcnB1c19sZXZlbF9mbiUzRG5wLm1lYW4lMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGFnZ3JlZ2F0ZSUyMHRoZSUyMHNhbXBsZSUyMG1ldHJpY3MlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics.utils.metric_utils <span class="hljs-keyword">import</span> SampleLevelMetric
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
custom_metric = SampleLevelMetric(
metric_name=<span class="hljs-string">&quot;my_custom_metric_name&quot;</span>,
higher_is_better=<span class="hljs-literal">True</span>,
category=<span class="hljs-string">&quot;accuracy&quot;</span>,
sample_level_fn=<span class="hljs-keyword">lambda</span> x: x, <span class="hljs-comment"># How to compute score for one sample</span>
corpus_level_fn=np.mean, <span class="hljs-comment"># How to aggregate the sample metrics</span>
)`,wrap:!1}}),_=new o({props:{title:"Step 4: Define Your Task",local:"step-4-define-your-task",headingTag:"h3"}}),Q=new o({props:{title:"Simple Task (No Subsets)",local:"simple-task-no-subsets",headingTag:"h4"}}),R=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5saWdodGV2YWxfdGFzayUyMGltcG9ydCUyMExpZ2h0ZXZhbFRhc2tDb25maWclMEElMEElMjMlMjBUaGlzJTIwaXMlMjBob3clMjB5b3UlMjBjcmVhdGUlMjBhJTIwc2ltcGxlJTIwdGFzayUyMChsaWtlJTIwSGVsbGFTd2FnKSUyMHdoaWNoJTIwaGFzJTIwb25lJTIwc2luZ2xlJTIwc3Vic2V0JTBBJTIzJTIwYXR0YWNoZWQlMjB0byUyMGl0JTJDJTIwYW5kJTIwb25lJTIwZXZhbHVhdGlvbiUyMHBvc3NpYmxlLiUwQXRhc2slMjAlM0QlMjBMaWdodGV2YWxUYXNrQ29uZmlnKCUwQSUyMCUyMCUyMCUyMG5hbWUlM0QlMjJteW90aGVydGFzayUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdF9mdW5jdGlvbiUzRHByb21wdF9mbiUyQyUyMCUyMCUyMyUyME11c3QlMjBiZSUyMGRlZmluZWQlMjBpbiUyMHRoZSUyMGZpbGUlMjBvciUyMGltcG9ydGVkJTBBJTIwJTIwJTIwJTIwaGZfcmVwbyUzRCUyMnlvdXJfZGF0YXNldF9yZXBvX29uX2hmJTIyJTJDJTBBJTIwJTIwJTIwJTIwaGZfc3Vic2V0JTNEJTIyZGVmYXVsdCUyMiUyQyUwQSUyMCUyMCUyMCUyMGhmX2F2YWlsX3NwbGl0cyUzRCU1QiUyMnRyYWluJTIyJTJDJTIwJTIydGVzdCUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMGV2YWx1YXRpb25fc3BsaXRzJTNEJTVCJTIydGVzdCUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMGZld19zaG90c19zcGxpdCUzRCUyMnRyYWluJTIyJTJDJTBBJTIwJTIwJTIwJTIwZmV3X3Nob3RzX3NlbGVjdCUzRCUyMnJhbmRvbV9zYW1wbGluZ19mcm9tX3RyYWluJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWV0cmljcyUzRCU1Qm1ldHJpYyU1RCUyQyUyMCUyMCUyMyUyMFNlbGVjdCUyMHlvdXIlMjBtZXRyaWMlMjBpbiUyME1ldHJpY3MlMEElMjAlMjAlMjAlMjBnZW5lcmF0aW9uX3NpemUlM0QyNTYlMkMlMEElMjAlMjAlMjAlMjBzdG9wX3NlcXVlbmNlJTNEJTVCJTIyJTVDbiUyMiUyQyUyMCUyMlF1ZXN0aW9uJTNBJTIyJTVEJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.lighteval_task <span class="hljs-keyword">import</span> LightevalTaskConfig
<span class="hljs-comment"># This is how you create a simple task (like HellaSwag) which has one single subset</span>
<span class="hljs-comment"># attached to it, and one evaluation possible.</span>
task = LightevalTaskConfig(
name=<span class="hljs-string">&quot;myothertask&quot;</span>,
prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span>
hf_repo=<span class="hljs-string">&quot;your_dataset_repo_on_hf&quot;</span>,
hf_subset=<span class="hljs-string">&quot;default&quot;</span>,
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>, <span class="hljs-string">&quot;test&quot;</span>],
evaluation_splits=[<span class="hljs-string">&quot;test&quot;</span>],
few_shots_split=<span class="hljs-string">&quot;train&quot;</span>,
few_shots_select=<span class="hljs-string">&quot;random_sampling_from_train&quot;</span>,
metrics=[metric], <span class="hljs-comment"># Select your metric in Metrics</span>
generation_size=<span class="hljs-number">256</span>,
stop_sequence=[<span class="hljs-string">&quot;\\n&quot;</span>, <span class="hljs-string">&quot;Question:&quot;</span>],
)`,wrap:!1}}),V=new o({props:{title:"Task with Multiple Subsets",local:"task-with-multiple-subsets",headingTag:"h4"}}),N=new J({props:{code:"U0FNUExFX1NVQlNFVFMlMjAlM0QlMjAlNUIlMjJzdWJzZXQxJTIyJTJDJTIwJTIyc3Vic2V0MiUyMiUyQyUyMCUyMnN1YnNldDMlMjIlNUQlMjAlMjAlMjMlMjBMaXN0JTIwb2YlMjBhbGwlMjB0aGUlMjBzdWJzZXRzJTIwdG8lMjB1c2UlMjBmb3IlMjB0aGlzJTIwZXZhbCUwQSUwQWNsYXNzJTIwQ3VzdG9tU3Vic2V0VGFzayhMaWdodGV2YWxUYXNrQ29uZmlnKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBuYW1lJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaGZfc3Vic2V0JTJDJTBBJTIwJTIwJTIwJTIwKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZSUzRG5hbWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0RoZl9zdWJzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHRfZnVuY3Rpb24lM0Rwcm9tcHRfZm4lMkMlMjAlMjAlMjMlMjBNdXN0JTIwYmUlMjBkZWZpbmVkJTIwaW4lMjB0aGUlMjBmaWxlJTIwb3IlMjBpbXBvcnRlZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGhmX3JlcG8lM0QlMjJ5b3VyX2RhdGFzZXRfbmFtZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1ldHJpY3MlM0QlNUJjdXN0b21fbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUyMG9yJTIwdXNlJTIweW91ciUyMGN1c3RvbV9tZXRyaWMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9hdmFpbF9zcGxpdHMlM0QlNUIlMjJ0cmFpbiUyMiUyQyUyMCUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3NwbGl0cyUzRCU1QiUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZXdfc2hvdHNfc3BsaXQlM0QlMjJ0cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZld19zaG90c19zZWxlY3QlM0QlMjJyYW5kb21fc2FtcGxpbmdfZnJvbV90cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGdlbmVyYXRpb25fc2l6ZSUzRDI1NiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0b3Bfc2VxdWVuY2UlM0QlNUIlMjIlNUNuJTIyJTJDJTIwJTIyUXVlc3Rpb24lM0ElMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjApJTBBJTBBU1VCU0VUX1RBU0tTJTIwJTNEJTIwJTVCQ3VzdG9tU3Vic2V0VGFzayhuYW1lJTNEZiUyMnRhc2slM0ElN0JzdWJzZXQlN0QlMjIlMkMlMjBoZl9zdWJzZXQlM0RzdWJzZXQpJTIwZm9yJTIwc3Vic2V0JTIwaW4lMjBTQU1QTEVfU1VCU0VUUyU1RA==",highlighted:`SAMPLE_SUBSETS = [<span class="hljs-string">&quot;subset1&quot;</span>, <span class="hljs-string">&quot;subset2&quot;</span>, <span class="hljs-string">&quot;subset3&quot;</span>] <span class="hljs-comment"># List of all the subsets to use for this eval</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomSubsetTask</span>(<span class="hljs-title class_ inherited__">LightevalTaskConfig</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">
self,
name,
hf_subset,
</span>):
<span class="hljs-built_in">super</span>().__init__(
name=name,
hf_subset=hf_subset,
prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span>
hf_repo=<span class="hljs-string">&quot;your_dataset_name&quot;</span>,
metrics=[custom_metric], <span class="hljs-comment"># Select your metric in Metrics or use your custom_metric</span>
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>, <span class="hljs-string">&quot;test&quot;</span>],
evaluation_splits=[<span class="hljs-string">&quot;test&quot;</span>],
few_shots_split=<span class="hljs-string">&quot;train&quot;</span>,
few_shots_select=<span class="hljs-string">&quot;random_sampling_from_train&quot;</span>,
generation_size=<span class="hljs-number">256</span>,
stop_sequence=[<span class="hljs-string">&quot;\\n&quot;</span>, <span class="hljs-string">&quot;Question:&quot;</span>],
)
SUBSET_TASKS = [CustomSubsetTask(name=<span class="hljs-string">f&quot;task:<span class="hljs-subst">{subset}</span>&quot;</span>, hf_subset=subset) <span class="hljs-keyword">for</span> subset <span class="hljs-keyword">in</span> SAMPLE_SUBSETS]`,wrap:!1}}),v=new o({props:{title:"Step 5: Add Tasks to the Table",local:"step-5-add-tasks-to-the-table",headingTag:"h3"}}),z=new J({props:{code:"JTIzJTIwU1RPUkUlMjBZT1VSJTIwRVZBTFMlMEElMEElMjMlMjBUYXNrcyUyMHdpdGglMjBzdWJzZXRzJTNBJTBBVEFTS1NfVEFCTEUlMjAlM0QlMjBTVUJTRVRfVEFTS1MlMEElMEElMjMlMjBUYXNrcyUyMHdpdGhvdXQlMjBzdWJzZXRzJTNBJTBBJTIzJTIwVEFTS1NfVEFCTEUlMjAlM0QlMjAlNUJ0YXNrJTVE",highlighted:`<span class="hljs-comment"># STORE YOUR EVALS</span>
<span class="hljs-comment"># Tasks with subsets:</span>
TASKS_TABLE = SUBSET_TASKS
<span class="hljs-comment"># Tasks without subsets:</span>
<span class="hljs-comment"># TASKS_TABLE = [task]</span>`,wrap:!1}}),q=new o({props:{title:"Step 6: Creating a requirement file",local:"step-6-creating-a-requirement-file",headingTag:"h3"}}),F=new o({props:{title:"Running Your Custom Task",local:"running-your-custom-task",headingTag:"h2"}}),H=new J({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlN0J0YXNrJTdEJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jdXN0b20tdGFza3MlMjAlN0JwYXRoX3RvX3lvdXJfY3VzdG9tX3Rhc2tfZmlsZSU3RA==",highlighted:`lighteval accelerate \\
<span class="hljs-string">&quot;model_name=HuggingFaceH4/zephyr-7b-beta&quot;</span> \\
{task} \\
--custom-tasks {path_to_your_custom_task_file}`,wrap:!1}}),L=new o({props:{title:"Example Usage",local:"example-usage",headingTag:"h3"}}),D=new J({props:{code:"JTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwMyUyMHNob3QlMjBldmFsdWF0aW9uJTBBbGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm15b3RoZXJ0YXNrJTdDMyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY3VzdG9tLXRhc2tzJTIwY29tbXVuaXR5X3Rhc2tzJTJGbXlfY3VzdG9tX3Rhc2sucHk=",highlighted:`<span class="hljs-comment"># Run a custom task with 3 shot evaluation</span>
lighteval accelerate \\
<span class="hljs-string">&quot;model_name=openai-community/gpt2&quot;</span> \\
<span class="hljs-string">&quot;myothertask|3&quot;</span> \\
--custom-tasks community_tasks/my_custom_task.py`,wrap:!1}}),P=new ae({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-custom-task.mdx"}}),{c(){T=r("meta"),tt=a(),K=r("p"),et=a(),M(h.$$.fragment),st=a(),M(w.$$.fragment),lt=a(),f=r("p"),f.textContent=Et,at=a(),M(d.$$.fragment),nt=a(),j=r("blockquote"),j.innerHTML=Nt,Mt=a(),M(C.$$.fragment),it=a(),b=r("p"),b.innerHTML=vt,yt=a(),M(g.$$.fragment),pt=a(),$=r("p"),$.textContent=Wt,mt=a(),M(I.$$.fragment),Ut=a(),M(B.$$.fragment),ot=a(),k=r("p"),k.innerHTML=zt,rt=a(),M(Z.$$.fragment),ct=a(),M(A.$$.fragment),ut=a(),M(G.$$.fragment),Tt=a(),M(S.$$.fragment),jt=a(),M(_.$$.fragment),Jt=a(),X=r("p"),X.innerHTML=qt,ht=a(),M(Q.$$.fragment),wt=a(),M(R.$$.fragment),ft=a(),M(V.$$.fragment),dt=a(),E=r("p"),E.innerHTML=Yt,Ct=a(),M(N.$$.fragment),bt=a(),M(v.$$.fragment),gt=a(),W=r("p"),W.innerHTML=Ft,$t=a(),M(z.$$.fragment),It=a(),M(q.$$.fragment),Bt=a(),Y=r("p"),Y.innerHTML=xt,kt=a(),M(F.$$.fragment),Zt=a(),x=r("p"),x.textContent=Ht,At=a(),M(H.$$.fragment),Gt=a(),M(L.$$.fragment),St=a(),M(D.$$.fragment),_t=a(),M(P.$$.fragment),Xt=a(),O=r("p"),this.h()},l(t){const e=ee("svelte-u9bgzb",document.head);T=c(e,"META",{name:!0,content:!0}),e.forEach(s),tt=n(t),K=c(t,"P",{}),Lt(K).forEach(s),et=n(t),i(h.$$.fragment,t),st=n(t),i(w.$$.fragment,t),lt=n(t),f=c(t,"P",{"data-svelte-h":!0}),u(f)!=="svelte-r5gjmm"&&(f.textContent=Et),at=n(t),i(d.$$.fragment,t),nt=n(t),j=c(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(j)!=="svelte-1gpemye"&&(j.innerHTML=Nt),Mt=n(t),i(C.$$.fragment,t),it=n(t),b=c(t,"P",{"data-svelte-h":!0}),u(b)!=="svelte-t1j2vg"&&(b.innerHTML=vt),yt=n(t),i(g.$$.fragment,t),pt=n(t),$=c(t,"P",{"data-svelte-h":!0}),u($)!=="svelte-dnqoqp"&&($.textContent=Wt),mt=n(t),i(I.$$.fragment,t),Ut=n(t),i(B.$$.fragment,t),ot=n(t),k=c(t,"P",{"data-svelte-h":!0}),u(k)!=="svelte-58kl57"&&(k.innerHTML=zt),rt=n(t),i(Z.$$.fragment,t),ct=n(t),i(A.$$.fragment,t),ut=n(t),i(G.$$.fragment,t),Tt=n(t),i(S.$$.fragment,t),jt=n(t),i(_.$$.fragment,t),Jt=n(t),X=c(t,"P",{"data-svelte-h":!0}),u(X)!=="svelte-1d086wa"&&(X.innerHTML=qt),ht=n(t),i(Q.$$.fragment,t),wt=n(t),i(R.$$.fragment,t),ft=n(t),i(V.$$.fragment,t),dt=n(t),E=c(t,"P",{"data-svelte-h":!0}),u(E)!=="svelte-2g4h75"&&(E.innerHTML=Yt),Ct=n(t),i(N.$$.fragment,t),bt=n(t),i(v.$$.fragment,t),gt=n(t),W=c(t,"P",{"data-svelte-h":!0}),u(W)!=="svelte-7umcxy"&&(W.innerHTML=Ft),$t=n(t),i(z.$$.fragment,t),It=n(t),i(q.$$.fragment,t),Bt=n(t),Y=c(t,"P",{"data-svelte-h":!0}),u(Y)!=="svelte-c0uxql"&&(Y.innerHTML=xt),kt=n(t),i(F.$$.fragment,t),Zt=n(t),x=c(t,"P",{"data-svelte-h":!0}),u(x)!=="svelte-1yzen0k"&&(x.textContent=Ht),At=n(t),i(H.$$.fragment,t),Gt=n(t),i(L.$$.fragment,t),St=n(t),i(D.$$.fragment,t),_t=n(t),i(P.$$.fragment,t),Xt=n(t),O=c(t,"P",{}),Lt(O).forEach(s),this.h()},h(){Rt(T,"name","hf:doc:metadata"),Rt(T,"content",Me),Rt(j,"class","warning")},m(t,e){se(document.head,T),l(t,tt,e),l(t,K,e),l(t,et,e),y(h,t,e),l(t,st,e),y(w,t,e),l(t,lt,e),l(t,f,e),l(t,at,e),y(d,t,e),l(t,nt,e),l(t,j,e),l(t,Mt,e),y(C,t,e),l(t,it,e),l(t,b,e),l(t,yt,e),y(g,t,e),l(t,pt,e),l(t,$,e),l(t,mt,e),y(I,t,e),l(t,Ut,e),y(B,t,e),l(t,ot,e),l(t,k,e),l(t,rt,e),y(Z,t,e),l(t,ct,e),y(A,t,e),l(t,ut,e),y(G,t,e),l(t,Tt,e),y(S,t,e),l(t,jt,e),y(_,t,e),l(t,Jt,e),l(t,X,e),l(t,ht,e),y(Q,t,e),l(t,wt,e),y(R,t,e),l(t,ft,e),y(V,t,e),l(t,dt,e),l(t,E,e),l(t,Ct,e),y(N,t,e),l(t,bt,e),y(v,t,e),l(t,gt,e),l(t,W,e),l(t,$t,e),y(z,t,e),l(t,It,e),y(q,t,e),l(t,Bt,e),l(t,Y,e),l(t,kt,e),y(F,t,e),l(t,Zt,e),l(t,x,e),l(t,At,e),y(H,t,e),l(t,Gt,e),y(L,t,e),l(t,St,e),y(D,t,e),l(t,_t,e),y(P,t,e),l(t,Xt,e),l(t,O,e),Qt=!0},p:Pt,i(t){Qt||(p(h.$$.fragment,t),p(w.$$.fragment,t),p(d.$$.fragment,t),p(C.$$.fragment,t),p(g.$$.fragment,t),p(I.$$.fragment,t),p(B.$$.fragment,t),p(Z.$$.fragment,t),p(A.$$.fragment,t),p(G.$$.fragment,t),p(S.$$.fragment,t),p(_.$$.fragment,t),p(Q.$$.fragment,t),p(R.$$.fragment,t),p(V.$$.fragment,t),p(N.$$.fragment,t),p(v.$$.fragment,t),p(z.$$.fragment,t),p(q.$$.fragment,t),p(F.$$.fragment,t),p(H.$$.fragment,t),p(L.$$.fragment,t),p(D.$$.fragment,t),p(P.$$.fragment,t),Qt=!0)},o(t){m(h.$$.fragment,t),m(w.$$.fragment,t),m(d.$$.fragment,t),m(C.$$.fragment,t),m(g.$$.fragment,t),m(I.$$.fragment,t),m(B.$$.fragment,t),m(Z.$$.fragment,t),m(A.$$.fragment,t),m(G.$$.fragment,t),m(S.$$.fragment,t),m(_.$$.fragment,t),m(Q.$$.fragment,t),m(R.$$.fragment,t),m(V.$$.fragment,t),m(N.$$.fragment,t),m(v.$$.fragment,t),m(z.$$.fragment,t),m(q.$$.fragment,t),m(F.$$.fragment,t),m(H.$$.fragment,t),m(L.$$.fragment,t),m(D.$$.fragment,t),m(P.$$.fragment,t),Qt=!1},d(t){t&&(s(tt),s(K),s(et),s(st),s(lt),s(f),s(at),s(nt),s(j),s(Mt),s(it),s(b),s(yt),s(pt),s($),s(mt),s(Ut),s(ot),s(k),s(rt),s(ct),s(ut),s(Tt),s(jt),s(Jt),s(X),s(ht),s(wt),s(ft),s(dt),s(E),s(Ct),s(bt),s(gt),s(W),s($t),s(It),s(Bt),s(Y),s(kt),s(Zt),s(x),s(At),s(Gt),s(St),s(_t),s(Xt),s(O)),s(T),U(h,t),U(w,t),U(d,t),U(C,t),U(g,t),U(I,t),U(B,t),U(Z,t),U(A,t),U(G,t),U(S,t),U(_,t),U(Q,t),U(R,t),U(V,t),U(N,t),U(v,t),U(z,t),U(q,t),U(F,t),U(H,t),U(L,t),U(D,t),U(P,t)}}}const Me='{"title":"Adding a Custom Task","local":"adding-a-custom-task","sections":[{"title":"Step-by-Step Creation of a Task","local":"step-by-step-creation-of-a-task","sections":[{"title":"Step 1: Create the Task File","local":"step-1-create-the-task-file","sections":[],"depth":3},{"title":"Step 2: Define the Prompt Function","local":"step-2-define-the-prompt-function","sections":[],"depth":3},{"title":"Step 3: Choose or Create Metrics","local":"step-3-choose-or-create-metrics","sections":[{"title":"Using Existing Metrics","local":"using-existing-metrics","sections":[],"depth":4},{"title":"Creating Custom Metrics","local":"creating-custom-metrics","sections":[],"depth":4}],"depth":3},{"title":"Step 4: Define Your Task","local":"step-4-define-your-task","sections":[{"title":"Simple Task (No Subsets)","local":"simple-task-no-subsets","sections":[],"depth":4},{"title":"Task with Multiple Subsets","local":"task-with-multiple-subsets","sections":[],"depth":4}],"depth":3},{"title":"Step 5: Add Tasks to the Table","local":"step-5-add-tasks-to-the-table","sections":[],"depth":3},{"title":"Step 6: Creating a requirement file","local":"step-6-creating-a-requirement-file","sections":[],"depth":3}],"depth":2},{"title":"Running Your Custom Task","local":"running-your-custom-task","sections":[{"title":"Example Usage","local":"example-usage","sections":[],"depth":3}],"depth":2}],"depth":1}';function ie(Vt){return Kt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class oe extends Ot{constructor(T){super(),te(this,T,ie,ne,Dt,{})}}export{oe as component};

Xet Storage Details

Size:
23 kB
·
Xet hash:
49d50279a8b8a02ee80ae386080395295731b0a118a5328505b2184c1224f250

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.