Buckets:
| import{s as Dt,n as Pt,o as Kt}from"../chunks/scheduler.3a17fb72.js";import{S as Ot,i as tl,e as c,s as a,c as n,h as ll,a as r,d as s,b as M,f as Lt,g as i,j as T,k as Et,l as sl,m as e,n as p,t as y,o as m,p as o}from"../chunks/index.093f8863.js";import{C as el}from"../chunks/CopyLLMTxtMenu.ccc246bc.js";import{C as J}from"../chunks/CodeBlock.b126c7de.js";import{H as U,E as al}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.ff8099de.js";function Ml(Qt){let j,tt,K,lt,h,st,w,et,f,vt="Lighteval provides a flexible framework for creating custom evaluation tasks. This guide explains how to create and integrate new tasks into the evaluation system.",at,d,Mt,u,Rt=`<p>To contribute your task to the Lighteval repository, you would first need | |
| to install the required dev dependencies by running <code>pip install -e .[dev]</code> | |
| and then run <code>pre-commit install</code> to install the pre-commit hooks.</p>`,nt,I,it,b,Wt=`First, create a Python file or directory under the <code>src/lighteval/tasks/tasks</code> directory. | |
| A directory is helpfull if you need to split your file into multiple ones, just make sure to have one of the file named <code>main.py</code>.`,pt,C,yt,g,Vt=`You need to define a prompt function that will convert a line from your | |
| dataset to a document to be used for evaluation.`,mt,$,ot,Z,Ut,A,qt='You can either use an existing metric (defined in <code>lighteval.metrics.metrics.Metrics</code>) or <a href="adding-a-new-metric">create a custom one</a>.',ct,k,rt,B,Tt,G,jt,S,ut,N,Jt,X,zt='You can define a task with or without subsets using <a href="/docs/lighteval/pr_1032/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a>.',ht,_,wt,E,ft,Q,dt,v,Yt=`If you want to create a task with multiple subsets, add them to the | |
| <code>SAMPLE_SUBSETS</code> list and create a task for each subset.`,It,R,bt,W,Ct,V,Ft="Then you need to add your task to the <code>TASKS_TABLE</code> list.",gt,q,$t,z,Zt,Y,xt=`If your task has requirements, you need to create a <code>requirement.txt</code> file with | |
| only the required dependencies so that anyone can run your task.`,At,F,kt,x,Ht="Once your file is created, you can run the evaluation with the following command:",Bt,H,Gt,L,St,D,Nt,P,Xt,O,_t;return h=new el({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new U({props:{title:"Adding a Custom Task",local:"adding-a-custom-task",headingTag:"h1"}}),d=new U({props:{title:"Step-by-Step Creation of a Task",local:"step-by-step-creation-of-a-task",headingTag:"h2"}}),I=new U({props:{title:"Step 1: Create the Task File",local:"step-1-create-the-task-file",headingTag:"h3"}}),C=new U({props:{title:"Step 2: Define the Prompt Function",local:"step-2-define-the-prompt-function",headingTag:"h3"}}),$=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5yZXF1ZXN0cyUyMGltcG9ydCUyMERvYyUwQSUwQSUyMyUyMERlZmluZSUyMGFzJTIwbWFueSUyMGFzJTIweW91JTIwbmVlZCUyMGZvciUyMHlvdXIlMjBkaWZmZXJlbnQlMjB0YXNrcyUwQWRlZiUyMHByb21wdF9mbihsaW5lJTNBJTIwZGljdCUyQyUyMHRhc2tfbmFtZSUzQSUyMHN0ciklM0ElMEElMjAlMjAlMjAlMjAlMjIlMjIlMjJEZWZpbmVzJTIwaG93JTIwdG8lMjBnbyUyMGZyb20lMjBhJTIwZGF0YXNldCUyMGxpbmUlMjB0byUyMGElMjBkb2MlMjBvYmplY3QuJTBBJTIwJTIwJTIwJTIwRm9sbG93JTIwZXhhbXBsZXMlMjBpbiUyMHNyYyUyRmxpZ2h0ZXZhbCUyRnRhc2tzJTJGZGVmYXVsdF9wcm9tcHRzLnB5JTJDJTIwb3IlMjBnZXQlMjBtb3JlJTIwaW5mbyUwQSUyMCUyMCUyMCUyMGFib3V0JTIwd2hhdCUyMHRoaXMlMjBmdW5jdGlvbiUyMHNob3VsZCUyMGRvJTIwaW4lMjB0aGUlMjBSRUFETUUuJTBBJTIwJTIwJTIwJTIwJTIyJTIyJTIyJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwRG9jKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tfbmFtZSUzRHRhc2tfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHF1ZXJ5JTNEbGluZSU1QiUyMnF1ZXN0aW9uJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hvaWNlcyUzRCU1QmYlMjIlMjAlN0JjJTdEJTIyJTIwZm9yJTIwYyUyMGluJTIwbGluZSU1QiUyMmNob2ljZXMlMjIlNUQlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBnb2xkX2luZGV4JTNEbGluZSU1QiUyMmdvbGQlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.requests <span class="hljs-keyword">import</span> Doc | |
| <span class="hljs-comment"># Define as many as you need for your different tasks</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">prompt_fn</span>(<span class="hljs-params">line: <span class="hljs-built_in">dict</span>, task_name: <span class="hljs-built_in">str</span></span>): | |
| <span class="hljs-string">"""Defines how to go from a dataset line to a doc object. | |
| Follow examples in src/lighteval/tasks/default_prompts.py, or get more info | |
| about what this function should do in the README. | |
| """</span> | |
| <span class="hljs-keyword">return</span> Doc( | |
| task_name=task_name, | |
| query=line[<span class="hljs-string">"question"</span>], | |
| choices=[<span class="hljs-string">f" <span class="hljs-subst">{c}</span>"</span> <span class="hljs-keyword">for</span> c <span class="hljs-keyword">in</span> line[<span class="hljs-string">"choices"</span>]], | |
| gold_index=line[<span class="hljs-string">"gold"</span>], | |
| )`,wrap:!1}}),Z=new U({props:{title:"Step 3: Choose or Create Metrics",local:"step-3-choose-or-create-metrics",headingTag:"h3"}}),k=new U({props:{title:"Using Existing Metrics",local:"using-existing-metrics",headingTag:"h4"}}),B=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzJTIwaW1wb3J0JTIwTWV0cmljcyUwQSUwQSUyMyUyMFVzZSUyMGFuJTIwZXhpc3RpbmclMjBtZXRyaWMlMEFtZXRyaWMlMjAlM0QlMjBNZXRyaWNzLkFDQ1VSQUNZ",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics | |
| <span class="hljs-comment"># Use an existing metric</span> | |
| metric = Metrics.ACCURACY`,wrap:!1}}),G=new U({props:{title:"Creating Custom Metrics",local:"creating-custom-metrics",headingTag:"h4"}}),S=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzLnV0aWxzLm1ldHJpY191dGlscyUyMGltcG9ydCUyMFNhbXBsZUxldmVsTWV0cmljJTBBaW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTIybXlfY3VzdG9tX21ldHJpY19uYW1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBjYXRlZ29yeSUzRCUyMmFjY3VyYWN5JTIyJTJDJTBBJTIwJTIwJTIwJTIwc2FtcGxlX2xldmVsX2ZuJTNEbGFtYmRhJTIweCUzQSUyMHglMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGNvbXB1dGUlMjBzY29yZSUyMGZvciUyMG9uZSUyMHNhbXBsZSUwQSUyMCUyMCUyMCUyMGNvcnB1c19sZXZlbF9mbiUzRG5wLm1lYW4lMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGFnZ3JlZ2F0ZSUyMHRoZSUyMHNhbXBsZSUyMG1ldHJpY3MlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics.utils.metric_utils <span class="hljs-keyword">import</span> SampleLevelMetric | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| custom_metric = SampleLevelMetric( | |
| metric_name=<span class="hljs-string">"my_custom_metric_name"</span>, | |
| higher_is_better=<span class="hljs-literal">True</span>, | |
| category=<span class="hljs-string">"accuracy"</span>, | |
| sample_level_fn=<span class="hljs-keyword">lambda</span> x: x, <span class="hljs-comment"># How to compute score for one sample</span> | |
| corpus_level_fn=np.mean, <span class="hljs-comment"># How to aggregate the sample metrics</span> | |
| )`,wrap:!1}}),N=new U({props:{title:"Step 4: Define Your Task",local:"step-4-define-your-task",headingTag:"h3"}}),_=new U({props:{title:"Simple Task (No Subsets)",local:"simple-task-no-subsets",headingTag:"h4"}}),E=new J({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5saWdodGV2YWxfdGFzayUyMGltcG9ydCUyMExpZ2h0ZXZhbFRhc2tDb25maWclMEElMEElMjMlMjBUaGlzJTIwaXMlMjBob3clMjB5b3UlMjBjcmVhdGUlMjBhJTIwc2ltcGxlJTIwdGFzayUyMChsaWtlJTIwSGVsbGFTd2FnKSUyMHdoaWNoJTIwaGFzJTIwb25lJTIwc2luZ2xlJTIwc3Vic2V0JTBBJTIzJTIwYXR0YWNoZWQlMjB0byUyMGl0JTJDJTIwYW5kJTIwb25lJTIwZXZhbHVhdGlvbiUyMHBvc3NpYmxlLiUwQXRhc2slMjAlM0QlMjBMaWdodGV2YWxUYXNrQ29uZmlnKCUwQSUyMCUyMCUyMCUyMG5hbWUlM0QlMjJteW90aGVydGFzayUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdF9mdW5jdGlvbiUzRHByb21wdF9mbiUyQyUyMCUyMCUyMyUyME11c3QlMjBiZSUyMGRlZmluZWQlMjBpbiUyMHRoZSUyMGZpbGUlMjBvciUyMGltcG9ydGVkJTBBJTIwJTIwJTIwJTIwc3VpdGUlM0QlNUIlMjJjb21tdW5pdHklMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBoZl9yZXBvJTNEJTIyeW91cl9kYXRhc2V0X3JlcG9fb25faGYlMjIlMkMlMEElMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0QlMjJkZWZhdWx0JTIyJTJDJTBBJTIwJTIwJTIwJTIwaGZfYXZhaWxfc3BsaXRzJTNEJTVCJTIydHJhaW4lMjIlMkMlMjAlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbHVhdGlvbl9zcGxpdHMlM0QlNUIlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZmV3X3Nob3RzX3NwbGl0JTNEJTIydHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBmZXdfc2hvdHNfc2VsZWN0JTNEJTIycmFuZG9tX3NhbXBsaW5nX2Zyb21fdHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBtZXRyaWNzJTNEJTVCbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRpb25fc2l6ZSUzRDI1NiUyQyUwQSUyMCUyMCUyMCUyMHN0b3Bfc2VxdWVuY2UlM0QlNUIlMjIlNUNuJTIyJTJDJTIwJTIyUXVlc3Rpb24lM0ElMjIlNUQlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.lighteval_task <span class="hljs-keyword">import</span> LightevalTaskConfig | |
| <span class="hljs-comment"># This is how you create a simple task (like HellaSwag) which has one single subset</span> | |
| <span class="hljs-comment"># attached to it, and one evaluation possible.</span> | |
| task = LightevalTaskConfig( | |
| name=<span class="hljs-string">"myothertask"</span>, | |
| prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span> | |
| suite=[<span class="hljs-string">"community"</span>], | |
| hf_repo=<span class="hljs-string">"your_dataset_repo_on_hf"</span>, | |
| hf_subset=<span class="hljs-string">"default"</span>, | |
| hf_avail_splits=[<span class="hljs-string">"train"</span>, <span class="hljs-string">"test"</span>], | |
| evaluation_splits=[<span class="hljs-string">"test"</span>], | |
| few_shots_split=<span class="hljs-string">"train"</span>, | |
| few_shots_select=<span class="hljs-string">"random_sampling_from_train"</span>, | |
| metrics=[metric], <span class="hljs-comment"># Select your metric in Metrics</span> | |
| generation_size=<span class="hljs-number">256</span>, | |
| stop_sequence=[<span class="hljs-string">"\\n"</span>, <span class="hljs-string">"Question:"</span>], | |
| )`,wrap:!1}}),Q=new U({props:{title:"Task with Multiple Subsets",local:"task-with-multiple-subsets",headingTag:"h4"}}),R=new J({props:{code:"U0FNUExFX1NVQlNFVFMlMjAlM0QlMjAlNUIlMjJzdWJzZXQxJTIyJTJDJTIwJTIyc3Vic2V0MiUyMiUyQyUyMCUyMnN1YnNldDMlMjIlNUQlMjAlMjAlMjMlMjBMaXN0JTIwb2YlMjBhbGwlMjB0aGUlMjBzdWJzZXRzJTIwdG8lMjB1c2UlMjBmb3IlMjB0aGlzJTIwZXZhbCUwQSUwQWNsYXNzJTIwQ3VzdG9tU3Vic2V0VGFzayhMaWdodGV2YWxUYXNrQ29uZmlnKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBuYW1lJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaGZfc3Vic2V0JTJDJTBBJTIwJTIwJTIwJTIwKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZSUzRG5hbWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0RoZl9zdWJzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHRfZnVuY3Rpb24lM0Rwcm9tcHRfZm4lMkMlMjAlMjAlMjMlMjBNdXN0JTIwYmUlMjBkZWZpbmVkJTIwaW4lMjB0aGUlMjBmaWxlJTIwb3IlMjBpbXBvcnRlZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGhmX3JlcG8lM0QlMjJ5b3VyX2RhdGFzZXRfbmFtZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1ldHJpY3MlM0QlNUJjdXN0b21fbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUyMG9yJTIwdXNlJTIweW91ciUyMGN1c3RvbV9tZXRyaWMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9hdmFpbF9zcGxpdHMlM0QlNUIlMjJ0cmFpbiUyMiUyQyUyMCUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3NwbGl0cyUzRCU1QiUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZXdfc2hvdHNfc3BsaXQlM0QlMjJ0cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZld19zaG90c19zZWxlY3QlM0QlMjJyYW5kb21fc2FtcGxpbmdfZnJvbV90cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1aXRlJTNEJTVCJTIybGlnaHRldmFsJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZ2VuZXJhdGlvbl9zaXplJTNEMjU2JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RvcF9zZXF1ZW5jZSUzRCU1QiUyMiU1Q24lMjIlMkMlMjAlMjJRdWVzdGlvbiUzQSUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCklMEElMEFTVUJTRVRfVEFTS1MlMjAlM0QlMjAlNUJDdXN0b21TdWJzZXRUYXNrKG5hbWUlM0RmJTIydGFzayUzQSU3QnN1YnNldCU3RCUyMiUyQyUyMGhmX3N1YnNldCUzRHN1YnNldCklMjBmb3IlMjBzdWJzZXQlMjBpbiUyMFNBTVBMRV9TVUJTRVRTJTVE",highlighted:`SAMPLE_SUBSETS = [<span class="hljs-string">"subset1"</span>, <span class="hljs-string">"subset2"</span>, <span class="hljs-string">"subset3"</span>] <span class="hljs-comment"># List of all the subsets to use for this eval</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomSubsetTask</span>(<span class="hljs-title class_ inherited__">LightevalTaskConfig</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params"> | |
| self, | |
| name, | |
| hf_subset, | |
| </span>): | |
| <span class="hljs-built_in">super</span>().__init__( | |
| name=name, | |
| hf_subset=hf_subset, | |
| prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span> | |
| hf_repo=<span class="hljs-string">"your_dataset_name"</span>, | |
| metrics=[custom_metric], <span class="hljs-comment"># Select your metric in Metrics or use your custom_metric</span> | |
| hf_avail_splits=[<span class="hljs-string">"train"</span>, <span class="hljs-string">"test"</span>], | |
| evaluation_splits=[<span class="hljs-string">"test"</span>], | |
| few_shots_split=<span class="hljs-string">"train"</span>, | |
| few_shots_select=<span class="hljs-string">"random_sampling_from_train"</span>, | |
| suite=[<span class="hljs-string">"lighteval"</span>], | |
| generation_size=<span class="hljs-number">256</span>, | |
| stop_sequence=[<span class="hljs-string">"\\n"</span>, <span class="hljs-string">"Question:"</span>], | |
| ) | |
| SUBSET_TASKS = [CustomSubsetTask(name=<span class="hljs-string">f"task:<span class="hljs-subst">{subset}</span>"</span>, hf_subset=subset) <span class="hljs-keyword">for</span> subset <span class="hljs-keyword">in</span> SAMPLE_SUBSETS]`,wrap:!1}}),W=new U({props:{title:"Step 5: Add Tasks to the Table",local:"step-5-add-tasks-to-the-table",headingTag:"h3"}}),q=new J({props:{code:"JTIzJTIwU1RPUkUlMjBZT1VSJTIwRVZBTFMlMEElMEElMjMlMjBUYXNrcyUyMHdpdGglMjBzdWJzZXRzJTNBJTBBVEFTS1NfVEFCTEUlMjAlM0QlMjBTVUJTRVRfVEFTS1MlMEElMEElMjMlMjBUYXNrcyUyMHdpdGhvdXQlMjBzdWJzZXRzJTNBJTBBJTIzJTIwVEFTS1NfVEFCTEUlMjAlM0QlMjAlNUJ0YXNrJTVE",highlighted:`<span class="hljs-comment"># STORE YOUR EVALS</span> | |
| <span class="hljs-comment"># Tasks with subsets:</span> | |
| TASKS_TABLE = SUBSET_TASKS | |
| <span class="hljs-comment"># Tasks without subsets:</span> | |
| <span class="hljs-comment"># TASKS_TABLE = [task]</span>`,wrap:!1}}),z=new U({props:{title:"Step 6: Creating a requirement file",local:"step-6-creating-a-requirement-file",headingTag:"h3"}}),F=new U({props:{title:"Running Your Custom Task",local:"running-your-custom-task",headingTag:"h2"}}),H=new J({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJsaWdodGV2YWwlN0MlN0J0YXNrJTdEJTdDJTdCZmV3c2hvdHMlN0QlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWN1c3RvbS10YXNrcyUyMCU3QnBhdGhfdG9feW91cl9jdXN0b21fdGFza19maWxlJTdE",highlighted:`lighteval accelerate \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta"</span> \\ | |
| <span class="hljs-string">"lighteval|{task}|{fewshots}"</span> \\ | |
| --custom-tasks {path_to_your_custom_task_file}`,wrap:!1}}),L=new U({props:{title:"Example Usage",local:"example-usage",headingTag:"h3"}}),D=new J({props:{code:"JTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwemVyby1zaG90JTIwZXZhbHVhdGlvbiUwQWxpZ2h0ZXZhbCUyMGFjY2VsZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJtb2RlbF9uYW1lJTNEb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJsaWdodGV2YWwlN0NteW90aGVydGFzayU3QzAlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWN1c3RvbS10YXNrcyUyMGNvbW11bml0eV90YXNrcyUyRm15X2N1c3RvbV90YXNrLnB5JTBBJTBBJTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwZmV3LXNob3QlMjBldmFsdWF0aW9uJTBBbGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmxpZ2h0ZXZhbCU3Q215b3RoZXJ0YXNrJTdDMyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY3VzdG9tLXRhc2tzJTIwY29tbXVuaXR5X3Rhc2tzJTJGbXlfY3VzdG9tX3Rhc2sucHk=",highlighted:`<span class="hljs-comment"># Run a custom task with zero-shot evaluation</span> | |
| lighteval accelerate \\ | |
| <span class="hljs-string">"model_name=openai-community/gpt2"</span> \\ | |
| <span class="hljs-string">"lighteval|myothertask|0"</span> \\ | |
| --custom-tasks community_tasks/my_custom_task.py | |
| <span class="hljs-comment"># Run a custom task with few-shot evaluation</span> | |
| lighteval accelerate \\ | |
| <span class="hljs-string">"model_name=openai-community/gpt2"</span> \\ | |
| <span class="hljs-string">"lighteval|myothertask|3"</span> \\ | |
| --custom-tasks community_tasks/my_custom_task.py`,wrap:!1}}),P=new al({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-custom-task.mdx"}}),{c(){j=c("meta"),tt=a(),K=c("p"),lt=a(),n(h.$$.fragment),st=a(),n(w.$$.fragment),et=a(),f=c("p"),f.textContent=vt,at=a(),n(d.$$.fragment),Mt=a(),u=c("blockquote"),u.innerHTML=Rt,nt=a(),n(I.$$.fragment),it=a(),b=c("p"),b.innerHTML=Wt,pt=a(),n(C.$$.fragment),yt=a(),g=c("p"),g.textContent=Vt,mt=a(),n($.$$.fragment),ot=a(),n(Z.$$.fragment),Ut=a(),A=c("p"),A.innerHTML=qt,ct=a(),n(k.$$.fragment),rt=a(),n(B.$$.fragment),Tt=a(),n(G.$$.fragment),jt=a(),n(S.$$.fragment),ut=a(),n(N.$$.fragment),Jt=a(),X=c("p"),X.innerHTML=zt,ht=a(),n(_.$$.fragment),wt=a(),n(E.$$.fragment),ft=a(),n(Q.$$.fragment),dt=a(),v=c("p"),v.innerHTML=Yt,It=a(),n(R.$$.fragment),bt=a(),n(W.$$.fragment),Ct=a(),V=c("p"),V.innerHTML=Ft,gt=a(),n(q.$$.fragment),$t=a(),n(z.$$.fragment),Zt=a(),Y=c("p"),Y.innerHTML=xt,At=a(),n(F.$$.fragment),kt=a(),x=c("p"),x.textContent=Ht,Bt=a(),n(H.$$.fragment),Gt=a(),n(L.$$.fragment),St=a(),n(D.$$.fragment),Nt=a(),n(P.$$.fragment),Xt=a(),O=c("p"),this.h()},l(t){const l=ll("svelte-u9bgzb",document.head);j=r(l,"META",{name:!0,content:!0}),l.forEach(s),tt=M(t),K=r(t,"P",{}),Lt(K).forEach(s),lt=M(t),i(h.$$.fragment,t),st=M(t),i(w.$$.fragment,t),et=M(t),f=r(t,"P",{"data-svelte-h":!0}),T(f)!=="svelte-r5gjmm"&&(f.textContent=vt),at=M(t),i(d.$$.fragment,t),Mt=M(t),u=r(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),T(u)!=="svelte-1gpemye"&&(u.innerHTML=Rt),nt=M(t),i(I.$$.fragment,t),it=M(t),b=r(t,"P",{"data-svelte-h":!0}),T(b)!=="svelte-t1j2vg"&&(b.innerHTML=Wt),pt=M(t),i(C.$$.fragment,t),yt=M(t),g=r(t,"P",{"data-svelte-h":!0}),T(g)!=="svelte-dnqoqp"&&(g.textContent=Vt),mt=M(t),i($.$$.fragment,t),ot=M(t),i(Z.$$.fragment,t),Ut=M(t),A=r(t,"P",{"data-svelte-h":!0}),T(A)!=="svelte-58kl57"&&(A.innerHTML=qt),ct=M(t),i(k.$$.fragment,t),rt=M(t),i(B.$$.fragment,t),Tt=M(t),i(G.$$.fragment,t),jt=M(t),i(S.$$.fragment,t),ut=M(t),i(N.$$.fragment,t),Jt=M(t),X=r(t,"P",{"data-svelte-h":!0}),T(X)!=="svelte-m0xnbh"&&(X.innerHTML=zt),ht=M(t),i(_.$$.fragment,t),wt=M(t),i(E.$$.fragment,t),ft=M(t),i(Q.$$.fragment,t),dt=M(t),v=r(t,"P",{"data-svelte-h":!0}),T(v)!=="svelte-2g4h75"&&(v.innerHTML=Yt),It=M(t),i(R.$$.fragment,t),bt=M(t),i(W.$$.fragment,t),Ct=M(t),V=r(t,"P",{"data-svelte-h":!0}),T(V)!=="svelte-7umcxy"&&(V.innerHTML=Ft),gt=M(t),i(q.$$.fragment,t),$t=M(t),i(z.$$.fragment,t),Zt=M(t),Y=r(t,"P",{"data-svelte-h":!0}),T(Y)!=="svelte-c0uxql"&&(Y.innerHTML=xt),At=M(t),i(F.$$.fragment,t),kt=M(t),x=r(t,"P",{"data-svelte-h":!0}),T(x)!=="svelte-1yzen0k"&&(x.textContent=Ht),Bt=M(t),i(H.$$.fragment,t),Gt=M(t),i(L.$$.fragment,t),St=M(t),i(D.$$.fragment,t),Nt=M(t),i(P.$$.fragment,t),Xt=M(t),O=r(t,"P",{}),Lt(O).forEach(s),this.h()},h(){Et(j,"name","hf:doc:metadata"),Et(j,"content",nl),Et(u,"class","warning")},m(t,l){sl(document.head,j),e(t,tt,l),e(t,K,l),e(t,lt,l),p(h,t,l),e(t,st,l),p(w,t,l),e(t,et,l),e(t,f,l),e(t,at,l),p(d,t,l),e(t,Mt,l),e(t,u,l),e(t,nt,l),p(I,t,l),e(t,it,l),e(t,b,l),e(t,pt,l),p(C,t,l),e(t,yt,l),e(t,g,l),e(t,mt,l),p($,t,l),e(t,ot,l),p(Z,t,l),e(t,Ut,l),e(t,A,l),e(t,ct,l),p(k,t,l),e(t,rt,l),p(B,t,l),e(t,Tt,l),p(G,t,l),e(t,jt,l),p(S,t,l),e(t,ut,l),p(N,t,l),e(t,Jt,l),e(t,X,l),e(t,ht,l),p(_,t,l),e(t,wt,l),p(E,t,l),e(t,ft,l),p(Q,t,l),e(t,dt,l),e(t,v,l),e(t,It,l),p(R,t,l),e(t,bt,l),p(W,t,l),e(t,Ct,l),e(t,V,l),e(t,gt,l),p(q,t,l),e(t,$t,l),p(z,t,l),e(t,Zt,l),e(t,Y,l),e(t,At,l),p(F,t,l),e(t,kt,l),e(t,x,l),e(t,Bt,l),p(H,t,l),e(t,Gt,l),p(L,t,l),e(t,St,l),p(D,t,l),e(t,Nt,l),p(P,t,l),e(t,Xt,l),e(t,O,l),_t=!0},p:Pt,i(t){_t||(y(h.$$.fragment,t),y(w.$$.fragment,t),y(d.$$.fragment,t),y(I.$$.fragment,t),y(C.$$.fragment,t),y($.$$.fragment,t),y(Z.$$.fragment,t),y(k.$$.fragment,t),y(B.$$.fragment,t),y(G.$$.fragment,t),y(S.$$.fragment,t),y(N.$$.fragment,t),y(_.$$.fragment,t),y(E.$$.fragment,t),y(Q.$$.fragment,t),y(R.$$.fragment,t),y(W.$$.fragment,t),y(q.$$.fragment,t),y(z.$$.fragment,t),y(F.$$.fragment,t),y(H.$$.fragment,t),y(L.$$.fragment,t),y(D.$$.fragment,t),y(P.$$.fragment,t),_t=!0)},o(t){m(h.$$.fragment,t),m(w.$$.fragment,t),m(d.$$.fragment,t),m(I.$$.fragment,t),m(C.$$.fragment,t),m($.$$.fragment,t),m(Z.$$.fragment,t),m(k.$$.fragment,t),m(B.$$.fragment,t),m(G.$$.fragment,t),m(S.$$.fragment,t),m(N.$$.fragment,t),m(_.$$.fragment,t),m(E.$$.fragment,t),m(Q.$$.fragment,t),m(R.$$.fragment,t),m(W.$$.fragment,t),m(q.$$.fragment,t),m(z.$$.fragment,t),m(F.$$.fragment,t),m(H.$$.fragment,t),m(L.$$.fragment,t),m(D.$$.fragment,t),m(P.$$.fragment,t),_t=!1},d(t){t&&(s(tt),s(K),s(lt),s(st),s(et),s(f),s(at),s(Mt),s(u),s(nt),s(it),s(b),s(pt),s(yt),s(g),s(mt),s(ot),s(Ut),s(A),s(ct),s(rt),s(Tt),s(jt),s(ut),s(Jt),s(X),s(ht),s(wt),s(ft),s(dt),s(v),s(It),s(bt),s(Ct),s(V),s(gt),s($t),s(Zt),s(Y),s(At),s(kt),s(x),s(Bt),s(Gt),s(St),s(Nt),s(Xt),s(O)),s(j),o(h,t),o(w,t),o(d,t),o(I,t),o(C,t),o($,t),o(Z,t),o(k,t),o(B,t),o(G,t),o(S,t),o(N,t),o(_,t),o(E,t),o(Q,t),o(R,t),o(W,t),o(q,t),o(z,t),o(F,t),o(H,t),o(L,t),o(D,t),o(P,t)}}}const nl='{"title":"Adding a Custom Task","local":"adding-a-custom-task","sections":[{"title":"Step-by-Step Creation of a Task","local":"step-by-step-creation-of-a-task","sections":[{"title":"Step 1: Create the Task File","local":"step-1-create-the-task-file","sections":[],"depth":3},{"title":"Step 2: Define the Prompt Function","local":"step-2-define-the-prompt-function","sections":[],"depth":3},{"title":"Step 3: Choose or Create Metrics","local":"step-3-choose-or-create-metrics","sections":[{"title":"Using Existing Metrics","local":"using-existing-metrics","sections":[],"depth":4},{"title":"Creating Custom Metrics","local":"creating-custom-metrics","sections":[],"depth":4}],"depth":3},{"title":"Step 4: Define Your Task","local":"step-4-define-your-task","sections":[{"title":"Simple Task (No Subsets)","local":"simple-task-no-subsets","sections":[],"depth":4},{"title":"Task with Multiple Subsets","local":"task-with-multiple-subsets","sections":[],"depth":4}],"depth":3},{"title":"Step 5: Add Tasks to the Table","local":"step-5-add-tasks-to-the-table","sections":[],"depth":3},{"title":"Step 6: Creating a requirement file","local":"step-6-creating-a-requirement-file","sections":[],"depth":3}],"depth":2},{"title":"Running Your Custom Task","local":"running-your-custom-task","sections":[{"title":"Example Usage","local":"example-usage","sections":[],"depth":3}],"depth":2}],"depth":1}';function il(Qt){return Kt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class cl extends Ot{constructor(j){super(),tl(this,j,il,Ml,Dt,{})}}export{cl as component}; | |
Xet Storage Details
- Size:
- 24.2 kB
- Xet hash:
- 2b5c9c3bce4876eb3f22b7768e544392bc820cac86d792bf9be6968d7f57b093
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.