Buckets:
| import{s as de,n as Ce,o as Ie}from"../chunks/scheduler.5f3e6389.js";import{S as be,i as ge,e as u,s as a,c as M,h as $e,a as U,d as l,b as n,f as fe,g as i,j as r,k as se,l as ke,m as s,n as m,t as p,o,p as y}from"../chunks/index.373ab25c.js";import{C as Ze,H as c,E as Ae}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.c2e0d06d.js";import{C as h}from"../chunks/CodeBlock.cd35d790.js";function Be(ae){let T,ot,mt,yt,w,ut,f,Ut,d,ne="Lighteval provides a flexible framework for creating custom evaluation tasks. This guide explains how to create and integrate new tasks into the evaluation system.",ct,C,rt,I,Me="Before creating a custom task, consider which category it belongs to:",Tt,b,Jt,g,ie=`Core evaluations are evaluations that only require standard logic in their | |
| metrics and processing, and that we will add to our test suite to ensure non-regression through time. They already see high usage in the community.`,jt,$,ht,k,me=`Extended evaluations are evaluations that require custom logic in their | |
| metrics (complex normalization, an LLM as a judge, etc.), that we added to | |
| facilitate the life of users. They already see high usage in the community.`,wt,Z,ft,A,pe="Community evaluations are submissions by the community of new tasks.",dt,B,oe="A popular community evaluation can move to become an extended or core evaluation over time.",Ct,J,ye='<p>You can find examples of custom tasks in the <a href="https://github.com/huggingface/lighteval/tree/main/community_tasks" rel="nofollow">community_tasks</a> directory.</p>',It,v,bt,j,ue=`<p>To contribute your custom task to the Lighteval repository, you would first need | |
| to install the required dev dependencies by running <code>pip install -e .[dev]</code> | |
| and then run <code>pre-commit install</code> to install the pre-commit hooks.</p>`,gt,G,$t,S,Ue="First, create a Python file under the <code>community_tasks</code> directory.",kt,_,Zt,X,ce=`You need to define a prompt function that will convert a line from your | |
| dataset to a document to be used for evaluation.`,At,N,Bt,E,vt,V,re='You can either use an existing metric (defined in <code>lighteval.metrics.metrics.Metrics</code>) or <a href="adding-a-new-metric">create a custom one</a>.',Gt,Q,St,R,_t,W,Xt,q,Nt,x,Et,z,Te='You can define a task with or without subsets using <a href="/docs/lighteval/pr_1027/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a>.',Vt,F,Qt,Y,Rt,H,Wt,L,Je=`If you want to create a task with multiple subsets, add them to the | |
| <code>SAMPLE_SUBSETS</code> list and create a task for each subset.`,qt,D,xt,P,zt,K,je="Then you need to add your task to the <code>TASKS_TABLE</code> list.",Ft,O,Yt,tt,Ht,et,he=`If your task has requirements, you need to create a <code>requirement.txt</code> file with | |
| only the required dependencies so that anyone can run your task.`,Lt,lt,Dt,st,we="Once your file is created, you can run the evaluation with the following command:",Pt,at,Kt,nt,Ot,Mt,te,it,ee,pt,le;return w=new Ze({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new c({props:{title:"Adding a Custom Task",local:"adding-a-custom-task",headingTag:"h1"}}),C=new c({props:{title:"Task Categories",local:"task-categories",headingTag:"h2"}}),b=new c({props:{title:"Core Evaluations",local:"core-evaluations",headingTag:"h3"}}),$=new c({props:{title:"Extended Evaluations",local:"extended-evaluations",headingTag:"h3"}}),Z=new c({props:{title:"Community Evaluations",local:"community-evaluations",headingTag:"h3"}}),v=new c({props:{title:"Step-by-Step Creation of a Custom Task",local:"step-by-step-creation-of-a-custom-task",headingTag:"h2"}}),G=new c({props:{title:"Step 1: Create the Task File",local:"step-1-create-the-task-file",headingTag:"h3"}}),_=new c({props:{title:"Step 2: Define the Prompt Function",local:"step-2-define-the-prompt-function",headingTag:"h3"}}),N=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5yZXF1ZXN0cyUyMGltcG9ydCUyMERvYyUwQSUwQSUyMyUyMERlZmluZSUyMGFzJTIwbWFueSUyMGFzJTIweW91JTIwbmVlZCUyMGZvciUyMHlvdXIlMjBkaWZmZXJlbnQlMjB0YXNrcyUwQWRlZiUyMHByb21wdF9mbihsaW5lJTNBJTIwZGljdCUyQyUyMHRhc2tfbmFtZSUzQSUyMHN0ciklM0ElMEElMjAlMjAlMjAlMjAlMjIlMjIlMjJEZWZpbmVzJTIwaG93JTIwdG8lMjBnbyUyMGZyb20lMjBhJTIwZGF0YXNldCUyMGxpbmUlMjB0byUyMGElMjBkb2MlMjBvYmplY3QuJTBBJTIwJTIwJTIwJTIwRm9sbG93JTIwZXhhbXBsZXMlMjBpbiUyMHNyYyUyRmxpZ2h0ZXZhbCUyRnRhc2tzJTJGZGVmYXVsdF9wcm9tcHRzLnB5JTJDJTIwb3IlMjBnZXQlMjBtb3JlJTIwaW5mbyUwQSUyMCUyMCUyMCUyMGFib3V0JTIwd2hhdCUyMHRoaXMlMjBmdW5jdGlvbiUyMHNob3VsZCUyMGRvJTIwaW4lMjB0aGUlMjBSRUFETUUuJTBBJTIwJTIwJTIwJTIwJTIyJTIyJTIyJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwRG9jKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tfbmFtZSUzRHRhc2tfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHF1ZXJ5JTNEbGluZSU1QiUyMnF1ZXN0aW9uJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hvaWNlcyUzRCU1QmYlMjIlMjAlN0JjJTdEJTIyJTIwZm9yJTIwYyUyMGluJTIwbGluZSU1QiUyMmNob2ljZXMlMjIlNUQlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBnb2xkX2luZGV4JTNEbGluZSU1QiUyMmdvbGQlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.requests <span class="hljs-keyword">import</span> Doc | |
| <span class="hljs-comment"># Define as many as you need for your different tasks</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">prompt_fn</span>(<span class="hljs-params">line: <span class="hljs-built_in">dict</span>, task_name: <span class="hljs-built_in">str</span></span>): | |
| <span class="hljs-string">"""Defines how to go from a dataset line to a doc object. | |
| Follow examples in src/lighteval/tasks/default_prompts.py, or get more info | |
| about what this function should do in the README. | |
| """</span> | |
| <span class="hljs-keyword">return</span> Doc( | |
| task_name=task_name, | |
| query=line[<span class="hljs-string">"question"</span>], | |
| choices=[<span class="hljs-string">f" <span class="hljs-subst">{c}</span>"</span> <span class="hljs-keyword">for</span> c <span class="hljs-keyword">in</span> line[<span class="hljs-string">"choices"</span>]], | |
| gold_index=line[<span class="hljs-string">"gold"</span>], | |
| )`,wrap:!1}}),E=new c({props:{title:"Step 3: Choose or Create Metrics",local:"step-3-choose-or-create-metrics",headingTag:"h3"}}),Q=new c({props:{title:"Using Existing Metrics",local:"using-existing-metrics",headingTag:"h4"}}),R=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzJTIwaW1wb3J0JTIwTWV0cmljcyUwQSUwQSUyMyUyMFVzZSUyMGFuJTIwZXhpc3RpbmclMjBtZXRyaWMlMEFtZXRyaWMlMjAlM0QlMjBNZXRyaWNzLkFDQ1VSQUNZ",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics | |
| <span class="hljs-comment"># Use an existing metric</span> | |
| metric = Metrics.ACCURACY`,wrap:!1}}),W=new c({props:{title:"Creating Custom Metrics",local:"creating-custom-metrics",headingTag:"h4"}}),q=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tZXRyaWNzLnV0aWxzLm1ldHJpY191dGlscyUyMGltcG9ydCUyMFNhbXBsZUxldmVsTWV0cmljJTBBaW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBY3VzdG9tX21ldHJpYyUyMCUzRCUyMFNhbXBsZUxldmVsTWV0cmljKCUwQSUyMCUyMCUyMCUyMG1ldHJpY19uYW1lJTNEJTIybXlfY3VzdG9tX21ldHJpY19uYW1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwaGlnaGVyX2lzX2JldHRlciUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBjYXRlZ29yeSUzRCUyMmFjY3VyYWN5JTIyJTJDJTBBJTIwJTIwJTIwJTIwc2FtcGxlX2xldmVsX2ZuJTNEbGFtYmRhJTIweCUzQSUyMHglMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGNvbXB1dGUlMjBzY29yZSUyMGZvciUyMG9uZSUyMHNhbXBsZSUwQSUyMCUyMCUyMCUyMGNvcnB1c19sZXZlbF9mbiUzRG5wLm1lYW4lMkMlMjAlMjAlMjMlMjBIb3clMjB0byUyMGFnZ3JlZ2F0ZSUyMHRoZSUyMHNhbXBsZSUyMG1ldHJpY3MlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.metrics.utils.metric_utils <span class="hljs-keyword">import</span> SampleLevelMetric | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| custom_metric = SampleLevelMetric( | |
| metric_name=<span class="hljs-string">"my_custom_metric_name"</span>, | |
| higher_is_better=<span class="hljs-literal">True</span>, | |
| category=<span class="hljs-string">"accuracy"</span>, | |
| sample_level_fn=<span class="hljs-keyword">lambda</span> x: x, <span class="hljs-comment"># How to compute score for one sample</span> | |
| corpus_level_fn=np.mean, <span class="hljs-comment"># How to aggregate the sample metrics</span> | |
| )`,wrap:!1}}),x=new c({props:{title:"Step 4: Define Your Task",local:"step-4-define-your-task",headingTag:"h3"}}),F=new c({props:{title:"Simple Task (No Subsets)",local:"simple-task-no-subsets",headingTag:"h4"}}),Y=new h({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC50YXNrcy5saWdodGV2YWxfdGFzayUyMGltcG9ydCUyMExpZ2h0ZXZhbFRhc2tDb25maWclMEElMEElMjMlMjBUaGlzJTIwaXMlMjBob3clMjB5b3UlMjBjcmVhdGUlMjBhJTIwc2ltcGxlJTIwdGFzayUyMChsaWtlJTIwSGVsbGFTd2FnKSUyMHdoaWNoJTIwaGFzJTIwb25lJTIwc2luZ2xlJTIwc3Vic2V0JTBBJTIzJTIwYXR0YWNoZWQlMjB0byUyMGl0JTJDJTIwYW5kJTIwb25lJTIwZXZhbHVhdGlvbiUyMHBvc3NpYmxlLiUwQXRhc2slMjAlM0QlMjBMaWdodGV2YWxUYXNrQ29uZmlnKCUwQSUyMCUyMCUyMCUyMG5hbWUlM0QlMjJteW90aGVydGFzayUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdF9mdW5jdGlvbiUzRHByb21wdF9mbiUyQyUyMCUyMCUyMyUyME11c3QlMjBiZSUyMGRlZmluZWQlMjBpbiUyMHRoZSUyMGZpbGUlMjBvciUyMGltcG9ydGVkJTBBJTIwJTIwJTIwJTIwc3VpdGUlM0QlNUIlMjJjb21tdW5pdHklMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBoZl9yZXBvJTNEJTIyeW91cl9kYXRhc2V0X3JlcG9fb25faGYlMjIlMkMlMEElMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0QlMjJkZWZhdWx0JTIyJTJDJTBBJTIwJTIwJTIwJTIwaGZfYXZhaWxfc3BsaXRzJTNEJTVCJTIydHJhaW4lMjIlMkMlMjAlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbHVhdGlvbl9zcGxpdHMlM0QlNUIlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZmV3X3Nob3RzX3NwbGl0JTNEJTIydHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBmZXdfc2hvdHNfc2VsZWN0JTNEJTIycmFuZG9tX3NhbXBsaW5nX2Zyb21fdHJhaW4lMjIlMkMlMEElMjAlMjAlMjAlMjBtZXRyaWNzJTNEJTVCbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRpb25fc2l6ZSUzRDI1NiUyQyUwQSUyMCUyMCUyMCUyMHN0b3Bfc2VxdWVuY2UlM0QlNUIlMjIlNUNuJTIyJTJDJTIwJTIyUXVlc3Rpb24lM0ElMjIlNUQlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> lighteval.tasks.lighteval_task <span class="hljs-keyword">import</span> LightevalTaskConfig | |
| <span class="hljs-comment"># This is how you create a simple task (like HellaSwag) which has one single subset</span> | |
| <span class="hljs-comment"># attached to it, and one evaluation possible.</span> | |
| task = LightevalTaskConfig( | |
| name=<span class="hljs-string">"myothertask"</span>, | |
| prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span> | |
| suite=[<span class="hljs-string">"community"</span>], | |
| hf_repo=<span class="hljs-string">"your_dataset_repo_on_hf"</span>, | |
| hf_subset=<span class="hljs-string">"default"</span>, | |
| hf_avail_splits=[<span class="hljs-string">"train"</span>, <span class="hljs-string">"test"</span>], | |
| evaluation_splits=[<span class="hljs-string">"test"</span>], | |
| few_shots_split=<span class="hljs-string">"train"</span>, | |
| few_shots_select=<span class="hljs-string">"random_sampling_from_train"</span>, | |
| metrics=[metric], <span class="hljs-comment"># Select your metric in Metrics</span> | |
| generation_size=<span class="hljs-number">256</span>, | |
| stop_sequence=[<span class="hljs-string">"\\n"</span>, <span class="hljs-string">"Question:"</span>], | |
| )`,wrap:!1}}),H=new c({props:{title:"Task with Multiple Subsets",local:"task-with-multiple-subsets",headingTag:"h4"}}),D=new h({props:{code:"U0FNUExFX1NVQlNFVFMlMjAlM0QlMjAlNUIlMjJzdWJzZXQxJTIyJTJDJTIwJTIyc3Vic2V0MiUyMiUyQyUyMCUyMnN1YnNldDMlMjIlNUQlMjAlMjAlMjMlMjBMaXN0JTIwb2YlMjBhbGwlMjB0aGUlMjBzdWJzZXRzJTIwdG8lMjB1c2UlMjBmb3IlMjB0aGlzJTIwZXZhbCUwQSUwQWNsYXNzJTIwQ3VzdG9tU3Vic2V0VGFzayhMaWdodGV2YWxUYXNrQ29uZmlnKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBuYW1lJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaGZfc3Vic2V0JTJDJTBBJTIwJTIwJTIwJTIwKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZSUzRG5hbWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9zdWJzZXQlM0RoZl9zdWJzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHRfZnVuY3Rpb24lM0Rwcm9tcHRfZm4lMkMlMjAlMjAlMjMlMjBNdXN0JTIwYmUlMjBkZWZpbmVkJTIwaW4lMjB0aGUlMjBmaWxlJTIwb3IlMjBpbXBvcnRlZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGhmX3JlcG8lM0QlMjJ5b3VyX2RhdGFzZXRfbmFtZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1ldHJpY3MlM0QlNUJjdXN0b21fbWV0cmljJTVEJTJDJTIwJTIwJTIzJTIwU2VsZWN0JTIweW91ciUyMG1ldHJpYyUyMGluJTIwTWV0cmljcyUyMG9yJTIwdXNlJTIweW91ciUyMGN1c3RvbV9tZXRyaWMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBoZl9hdmFpbF9zcGxpdHMlM0QlNUIlMjJ0cmFpbiUyMiUyQyUyMCUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3NwbGl0cyUzRCU1QiUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZXdfc2hvdHNfc3BsaXQlM0QlMjJ0cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZld19zaG90c19zZWxlY3QlM0QlMjJyYW5kb21fc2FtcGxpbmdfZnJvbV90cmFpbiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1aXRlJTNEJTVCJTIyY29tbXVuaXR5JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZ2VuZXJhdGlvbl9zaXplJTNEMjU2JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RvcF9zZXF1ZW5jZSUzRCU1QiUyMiU1Q24lMjIlMkMlMjAlMjJRdWVzdGlvbiUzQSUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCklMEElMEFTVUJTRVRfVEFTS1MlMjAlM0QlMjAlNUJDdXN0b21TdWJzZXRUYXNrKG5hbWUlM0RmJTIybXl0YXNrJTNBJTdCc3Vic2V0JTdEJTIyJTJDJTIwaGZfc3Vic2V0JTNEc3Vic2V0KSUyMGZvciUyMHN1YnNldCUyMGluJTIwU0FNUExFX1NVQlNFVFMlNUQ=",highlighted:`SAMPLE_SUBSETS = [<span class="hljs-string">"subset1"</span>, <span class="hljs-string">"subset2"</span>, <span class="hljs-string">"subset3"</span>] <span class="hljs-comment"># List of all the subsets to use for this eval</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomSubsetTask</span>(<span class="hljs-title class_ inherited__">LightevalTaskConfig</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params"> | |
| self, | |
| name, | |
| hf_subset, | |
| </span>): | |
| <span class="hljs-built_in">super</span>().__init__( | |
| name=name, | |
| hf_subset=hf_subset, | |
| prompt_function=prompt_fn, <span class="hljs-comment"># Must be defined in the file or imported</span> | |
| hf_repo=<span class="hljs-string">"your_dataset_name"</span>, | |
| metrics=[custom_metric], <span class="hljs-comment"># Select your metric in Metrics or use your custom_metric</span> | |
| hf_avail_splits=[<span class="hljs-string">"train"</span>, <span class="hljs-string">"test"</span>], | |
| evaluation_splits=[<span class="hljs-string">"test"</span>], | |
| few_shots_split=<span class="hljs-string">"train"</span>, | |
| few_shots_select=<span class="hljs-string">"random_sampling_from_train"</span>, | |
| suite=[<span class="hljs-string">"community"</span>], | |
| generation_size=<span class="hljs-number">256</span>, | |
| stop_sequence=[<span class="hljs-string">"\\n"</span>, <span class="hljs-string">"Question:"</span>], | |
| ) | |
| SUBSET_TASKS = [CustomSubsetTask(name=<span class="hljs-string">f"mytask:<span class="hljs-subst">{subset}</span>"</span>, hf_subset=subset) <span class="hljs-keyword">for</span> subset <span class="hljs-keyword">in</span> SAMPLE_SUBSETS]`,wrap:!1}}),P=new c({props:{title:"Step 5: Add Tasks to the Table",local:"step-5-add-tasks-to-the-table",headingTag:"h3"}}),O=new h({props:{code:"JTIzJTIwU1RPUkUlMjBZT1VSJTIwRVZBTFMlMEElMEElMjMlMjBUYXNrcyUyMHdpdGglMjBzdWJzZXRzJTNBJTBBVEFTS1NfVEFCTEUlMjAlM0QlMjBTVUJTRVRfVEFTS1MlMEElMEElMjMlMjBUYXNrcyUyMHdpdGhvdXQlMjBzdWJzZXRzJTNBJTBBJTIzJTIwVEFTS1NfVEFCTEUlMjAlM0QlMjAlNUJ0YXNrJTVE",highlighted:`<span class="hljs-comment"># STORE YOUR EVALS</span> | |
| <span class="hljs-comment"># Tasks with subsets:</span> | |
| TASKS_TABLE = SUBSET_TASKS | |
| <span class="hljs-comment"># Tasks without subsets:</span> | |
| <span class="hljs-comment"># TASKS_TABLE = [task]</span>`,wrap:!1}}),tt=new c({props:{title:"Step 6: Creating a requirement file",local:"step-6-creating-a-requirement-file",headingTag:"h3"}}),lt=new c({props:{title:"Running Your Custom Task",local:"running-your-custom-task",headingTag:"h2"}}),at=new h({props:{code:"bGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RIdWdnaW5nRmFjZUg0JTJGemVwaHlyLTdiLWJldGElMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb21tdW5pdHklN0MlN0JjdXN0b21fdGFzayU3RCU3QyU3QmZld3Nob3RzJTdEJTIyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1jdXN0b20tdGFza3MlMjAlN0JwYXRoX3RvX3lvdXJfY3VzdG9tX3Rhc2tfZmlsZSU3RA==",highlighted:`lighteval accelerate \\ | |
| <span class="hljs-string">"model_name=HuggingFaceH4/zephyr-7b-beta"</span> \\ | |
| <span class="hljs-string">"community|{custom_task}|{fewshots}"</span> \\ | |
| --custom-tasks {path_to_your_custom_task_file}`,wrap:!1}}),nt=new c({props:{title:"Example Usage",local:"example-usage",headingTag:"h3"}}),Mt=new h({props:{code:"JTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwemVyby1zaG90JTIwZXZhbHVhdGlvbiUwQWxpZ2h0ZXZhbCUyMGFjY2VsZXJhdGUlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJtb2RlbF9uYW1lJTNEb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJjb21tdW5pdHklN0NteW90aGVydGFzayU3QzAlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWN1c3RvbS10YXNrcyUyMGNvbW11bml0eV90YXNrcyUyRm15X2N1c3RvbV90YXNrLnB5JTBBJTBBJTIzJTIwUnVuJTIwYSUyMGN1c3RvbSUyMHRhc2slMjB3aXRoJTIwZmV3LXNob3QlMjBldmFsdWF0aW9uJTBBbGlnaHRldmFsJTIwYWNjZWxlcmF0ZSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMm1vZGVsX25hbWUlM0RvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmNvbW11bml0eSU3Q215b3RoZXJ0YXNrJTdDMyUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tY3VzdG9tLXRhc2tzJTIwY29tbXVuaXR5X3Rhc2tzJTJGbXlfY3VzdG9tX3Rhc2sucHk=",highlighted:`<span class="hljs-comment"># Run a custom task with zero-shot evaluation</span> | |
| lighteval accelerate \\ | |
| <span class="hljs-string">"model_name=openai-community/gpt2"</span> \\ | |
| <span class="hljs-string">"community|myothertask|0"</span> \\ | |
| --custom-tasks community_tasks/my_custom_task.py | |
| <span class="hljs-comment"># Run a custom task with few-shot evaluation</span> | |
| lighteval accelerate \\ | |
| <span class="hljs-string">"model_name=openai-community/gpt2"</span> \\ | |
| <span class="hljs-string">"community|myothertask|3"</span> \\ | |
| --custom-tasks community_tasks/my_custom_task.py`,wrap:!1}}),it=new Ae({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/adding-a-custom-task.mdx"}}),{c(){T=u("meta"),ot=a(),mt=u("p"),yt=a(),M(w.$$.fragment),ut=a(),M(f.$$.fragment),Ut=a(),d=u("p"),d.textContent=ne,ct=a(),M(C.$$.fragment),rt=a(),I=u("p"),I.textContent=Me,Tt=a(),M(b.$$.fragment),Jt=a(),g=u("p"),g.textContent=ie,jt=a(),M($.$$.fragment),ht=a(),k=u("p"),k.textContent=me,wt=a(),M(Z.$$.fragment),ft=a(),A=u("p"),A.textContent=pe,dt=a(),B=u("p"),B.textContent=oe,Ct=a(),J=u("blockquote"),J.innerHTML=ye,It=a(),M(v.$$.fragment),bt=a(),j=u("blockquote"),j.innerHTML=ue,gt=a(),M(G.$$.fragment),$t=a(),S=u("p"),S.innerHTML=Ue,kt=a(),M(_.$$.fragment),Zt=a(),X=u("p"),X.textContent=ce,At=a(),M(N.$$.fragment),Bt=a(),M(E.$$.fragment),vt=a(),V=u("p"),V.innerHTML=re,Gt=a(),M(Q.$$.fragment),St=a(),M(R.$$.fragment),_t=a(),M(W.$$.fragment),Xt=a(),M(q.$$.fragment),Nt=a(),M(x.$$.fragment),Et=a(),z=u("p"),z.innerHTML=Te,Vt=a(),M(F.$$.fragment),Qt=a(),M(Y.$$.fragment),Rt=a(),M(H.$$.fragment),Wt=a(),L=u("p"),L.innerHTML=Je,qt=a(),M(D.$$.fragment),xt=a(),M(P.$$.fragment),zt=a(),K=u("p"),K.innerHTML=je,Ft=a(),M(O.$$.fragment),Yt=a(),M(tt.$$.fragment),Ht=a(),et=u("p"),et.innerHTML=he,Lt=a(),M(lt.$$.fragment),Dt=a(),st=u("p"),st.textContent=we,Pt=a(),M(at.$$.fragment),Kt=a(),M(nt.$$.fragment),Ot=a(),M(Mt.$$.fragment),te=a(),M(it.$$.fragment),ee=a(),pt=u("p"),this.h()},l(t){const e=$e("svelte-u9bgzb",document.head);T=U(e,"META",{name:!0,content:!0}),e.forEach(l),ot=n(t),mt=U(t,"P",{}),fe(mt).forEach(l),yt=n(t),i(w.$$.fragment,t),ut=n(t),i(f.$$.fragment,t),Ut=n(t),d=U(t,"P",{"data-svelte-h":!0}),r(d)!=="svelte-r5gjmm"&&(d.textContent=ne),ct=n(t),i(C.$$.fragment,t),rt=n(t),I=U(t,"P",{"data-svelte-h":!0}),r(I)!=="svelte-1m3yy4f"&&(I.textContent=Me),Tt=n(t),i(b.$$.fragment,t),Jt=n(t),g=U(t,"P",{"data-svelte-h":!0}),r(g)!=="svelte-5bncqd"&&(g.textContent=ie),jt=n(t),i($.$$.fragment,t),ht=n(t),k=U(t,"P",{"data-svelte-h":!0}),r(k)!=="svelte-shw9at"&&(k.textContent=me),wt=n(t),i(Z.$$.fragment,t),ft=n(t),A=U(t,"P",{"data-svelte-h":!0}),r(A)!=="svelte-17x9tm3"&&(A.textContent=pe),dt=n(t),B=U(t,"P",{"data-svelte-h":!0}),r(B)!=="svelte-c3h151"&&(B.textContent=oe),Ct=n(t),J=U(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),r(J)!=="svelte-1y7m0b1"&&(J.innerHTML=ye),It=n(t),i(v.$$.fragment,t),bt=n(t),j=U(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),r(j)!=="svelte-elnchd"&&(j.innerHTML=ue),gt=n(t),i(G.$$.fragment,t),$t=n(t),S=U(t,"P",{"data-svelte-h":!0}),r(S)!=="svelte-esiyg6"&&(S.innerHTML=Ue),kt=n(t),i(_.$$.fragment,t),Zt=n(t),X=U(t,"P",{"data-svelte-h":!0}),r(X)!=="svelte-dnqoqp"&&(X.textContent=ce),At=n(t),i(N.$$.fragment,t),Bt=n(t),i(E.$$.fragment,t),vt=n(t),V=U(t,"P",{"data-svelte-h":!0}),r(V)!=="svelte-58kl57"&&(V.innerHTML=re),Gt=n(t),i(Q.$$.fragment,t),St=n(t),i(R.$$.fragment,t),_t=n(t),i(W.$$.fragment,t),Xt=n(t),i(q.$$.fragment,t),Nt=n(t),i(x.$$.fragment,t),Et=n(t),z=U(t,"P",{"data-svelte-h":!0}),r(z)!=="svelte-6j97sz"&&(z.innerHTML=Te),Vt=n(t),i(F.$$.fragment,t),Qt=n(t),i(Y.$$.fragment,t),Rt=n(t),i(H.$$.fragment,t),Wt=n(t),L=U(t,"P",{"data-svelte-h":!0}),r(L)!=="svelte-2g4h75"&&(L.innerHTML=Je),qt=n(t),i(D.$$.fragment,t),xt=n(t),i(P.$$.fragment,t),zt=n(t),K=U(t,"P",{"data-svelte-h":!0}),r(K)!=="svelte-7umcxy"&&(K.innerHTML=je),Ft=n(t),i(O.$$.fragment,t),Yt=n(t),i(tt.$$.fragment,t),Ht=n(t),et=U(t,"P",{"data-svelte-h":!0}),r(et)!=="svelte-c0uxql"&&(et.innerHTML=he),Lt=n(t),i(lt.$$.fragment,t),Dt=n(t),st=U(t,"P",{"data-svelte-h":!0}),r(st)!=="svelte-1yzen0k"&&(st.textContent=we),Pt=n(t),i(at.$$.fragment,t),Kt=n(t),i(nt.$$.fragment,t),Ot=n(t),i(Mt.$$.fragment,t),te=n(t),i(it.$$.fragment,t),ee=n(t),pt=U(t,"P",{}),fe(pt).forEach(l),this.h()},h(){se(T,"name","hf:doc:metadata"),se(T,"content",ve),se(J,"class","tip"),se(j,"class","warning")},m(t,e){ke(document.head,T),s(t,ot,e),s(t,mt,e),s(t,yt,e),m(w,t,e),s(t,ut,e),m(f,t,e),s(t,Ut,e),s(t,d,e),s(t,ct,e),m(C,t,e),s(t,rt,e),s(t,I,e),s(t,Tt,e),m(b,t,e),s(t,Jt,e),s(t,g,e),s(t,jt,e),m($,t,e),s(t,ht,e),s(t,k,e),s(t,wt,e),m(Z,t,e),s(t,ft,e),s(t,A,e),s(t,dt,e),s(t,B,e),s(t,Ct,e),s(t,J,e),s(t,It,e),m(v,t,e),s(t,bt,e),s(t,j,e),s(t,gt,e),m(G,t,e),s(t,$t,e),s(t,S,e),s(t,kt,e),m(_,t,e),s(t,Zt,e),s(t,X,e),s(t,At,e),m(N,t,e),s(t,Bt,e),m(E,t,e),s(t,vt,e),s(t,V,e),s(t,Gt,e),m(Q,t,e),s(t,St,e),m(R,t,e),s(t,_t,e),m(W,t,e),s(t,Xt,e),m(q,t,e),s(t,Nt,e),m(x,t,e),s(t,Et,e),s(t,z,e),s(t,Vt,e),m(F,t,e),s(t,Qt,e),m(Y,t,e),s(t,Rt,e),m(H,t,e),s(t,Wt,e),s(t,L,e),s(t,qt,e),m(D,t,e),s(t,xt,e),m(P,t,e),s(t,zt,e),s(t,K,e),s(t,Ft,e),m(O,t,e),s(t,Yt,e),m(tt,t,e),s(t,Ht,e),s(t,et,e),s(t,Lt,e),m(lt,t,e),s(t,Dt,e),s(t,st,e),s(t,Pt,e),m(at,t,e),s(t,Kt,e),m(nt,t,e),s(t,Ot,e),m(Mt,t,e),s(t,te,e),m(it,t,e),s(t,ee,e),s(t,pt,e),le=!0},p:Ce,i(t){le||(p(w.$$.fragment,t),p(f.$$.fragment,t),p(C.$$.fragment,t),p(b.$$.fragment,t),p($.$$.fragment,t),p(Z.$$.fragment,t),p(v.$$.fragment,t),p(G.$$.fragment,t),p(_.$$.fragment,t),p(N.$$.fragment,t),p(E.$$.fragment,t),p(Q.$$.fragment,t),p(R.$$.fragment,t),p(W.$$.fragment,t),p(q.$$.fragment,t),p(x.$$.fragment,t),p(F.$$.fragment,t),p(Y.$$.fragment,t),p(H.$$.fragment,t),p(D.$$.fragment,t),p(P.$$.fragment,t),p(O.$$.fragment,t),p(tt.$$.fragment,t),p(lt.$$.fragment,t),p(at.$$.fragment,t),p(nt.$$.fragment,t),p(Mt.$$.fragment,t),p(it.$$.fragment,t),le=!0)},o(t){o(w.$$.fragment,t),o(f.$$.fragment,t),o(C.$$.fragment,t),o(b.$$.fragment,t),o($.$$.fragment,t),o(Z.$$.fragment,t),o(v.$$.fragment,t),o(G.$$.fragment,t),o(_.$$.fragment,t),o(N.$$.fragment,t),o(E.$$.fragment,t),o(Q.$$.fragment,t),o(R.$$.fragment,t),o(W.$$.fragment,t),o(q.$$.fragment,t),o(x.$$.fragment,t),o(F.$$.fragment,t),o(Y.$$.fragment,t),o(H.$$.fragment,t),o(D.$$.fragment,t),o(P.$$.fragment,t),o(O.$$.fragment,t),o(tt.$$.fragment,t),o(lt.$$.fragment,t),o(at.$$.fragment,t),o(nt.$$.fragment,t),o(Mt.$$.fragment,t),o(it.$$.fragment,t),le=!1},d(t){t&&(l(ot),l(mt),l(yt),l(ut),l(Ut),l(d),l(ct),l(rt),l(I),l(Tt),l(Jt),l(g),l(jt),l(ht),l(k),l(wt),l(ft),l(A),l(dt),l(B),l(Ct),l(J),l(It),l(bt),l(j),l(gt),l($t),l(S),l(kt),l(Zt),l(X),l(At),l(Bt),l(vt),l(V),l(Gt),l(St),l(_t),l(Xt),l(Nt),l(Et),l(z),l(Vt),l(Qt),l(Rt),l(Wt),l(L),l(qt),l(xt),l(zt),l(K),l(Ft),l(Yt),l(Ht),l(et),l(Lt),l(Dt),l(st),l(Pt),l(Kt),l(Ot),l(te),l(ee),l(pt)),l(T),y(w,t),y(f,t),y(C,t),y(b,t),y($,t),y(Z,t),y(v,t),y(G,t),y(_,t),y(N,t),y(E,t),y(Q,t),y(R,t),y(W,t),y(q,t),y(x,t),y(F,t),y(Y,t),y(H,t),y(D,t),y(P,t),y(O,t),y(tt,t),y(lt,t),y(at,t),y(nt,t),y(Mt,t),y(it,t)}}}const ve='{"title":"Adding a Custom Task","local":"adding-a-custom-task","sections":[{"title":"Task Categories","local":"task-categories","sections":[{"title":"Core Evaluations","local":"core-evaluations","sections":[],"depth":3},{"title":"Extended Evaluations","local":"extended-evaluations","sections":[],"depth":3},{"title":"Community Evaluations","local":"community-evaluations","sections":[],"depth":3}],"depth":2},{"title":"Step-by-Step Creation of a Custom Task","local":"step-by-step-creation-of-a-custom-task","sections":[{"title":"Step 1: Create the Task File","local":"step-1-create-the-task-file","sections":[],"depth":3},{"title":"Step 2: Define the Prompt Function","local":"step-2-define-the-prompt-function","sections":[],"depth":3},{"title":"Step 3: Choose or Create Metrics","local":"step-3-choose-or-create-metrics","sections":[{"title":"Using Existing Metrics","local":"using-existing-metrics","sections":[],"depth":4},{"title":"Creating Custom Metrics","local":"creating-custom-metrics","sections":[],"depth":4}],"depth":3},{"title":"Step 4: Define Your Task","local":"step-4-define-your-task","sections":[{"title":"Simple Task (No Subsets)","local":"simple-task-no-subsets","sections":[],"depth":4},{"title":"Task with Multiple Subsets","local":"task-with-multiple-subsets","sections":[],"depth":4}],"depth":3},{"title":"Step 5: Add Tasks to the Table","local":"step-5-add-tasks-to-the-table","sections":[],"depth":3},{"title":"Step 6: Creating a requirement file","local":"step-6-creating-a-requirement-file","sections":[],"depth":3}],"depth":2},{"title":"Running Your Custom Task","local":"running-your-custom-task","sections":[{"title":"Example Usage","local":"example-usage","sections":[],"depth":3}],"depth":2}],"depth":1}';function Ge(ae){return Ie(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ee extends be{constructor(T){super(),ge(this,T,Ge,Be,de,{})}}export{Ee as component}; | |
Xet Storage Details
- Size:
- 27.1 kB
- Xet hash:
- 936cfbaf0e903f3f3300aae21e1245726fb8dde04ec31866fec4b25471b156b9
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.