Buckets:

hf-doc-build/doc-dev / lighteval /pr_744 /en /contributing-to-multilingual-evaluations.html
rtrm's picture
download
raw
18.3 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Contributing to multilingual evaluations&quot;,&quot;local&quot;:&quot;contributing-to-multilingual-evaluations&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Contributing a small translation&quot;,&quot;local&quot;:&quot;contributing-a-small-translation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Contributing a new multilingual task&quot;,&quot;local&quot;:&quot;contributing-a-new-multilingual-task&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/lighteval/pr_744/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/entry/start.a615223c.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/scheduler.7da89386.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/singletons.8c5be8fd.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/paths.86a4d49d.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/entry/app.b0033d27.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/index.20910acc.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/nodes/0.c40ee5c5.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/nodes/5.3c2046e3.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/Tip.53e22153.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/CodeBlock.143bd81e.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/index.c9cd5e8b.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Contributing to multilingual evaluations&quot;,&quot;local&quot;:&quot;contributing-to-multilingual-evaluations&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Contributing a small translation&quot;,&quot;local&quot;:&quot;contributing-a-small-translation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Contributing a new multilingual task&quot;,&quot;local&quot;:&quot;contributing-a-new-multilingual-task&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="contributing-to-multilingual-evaluations" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#contributing-to-multilingual-evaluations"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Contributing to multilingual evaluations</span></h1> <h2 class="relative group"><a id="contributing-a-small-translation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#contributing-a-small-translation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Contributing a small translation</span></h2> <p data-svelte-h="svelte-119kr4u">We define 19 <code>literals</code>, basic keywords or punctuation signs used when creating evaluation prompts in an automatic manner, such as <code>yes</code>, <code>no</code>, <code>because</code>, etc.</p> <p data-svelte-h="svelte-v5rsqz">We welcome translations in your language!</p> <p data-svelte-h="svelte-vrcxc3">To contribute, you’ll need to</p> <ol data-svelte-h="svelte-t7gcm7"><li>Open the <a href="https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py" rel="nofollow">translation_literals</a> file</li> <li>Edit the file to add or expand the literal for your language of interest.</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> Language.ENGLISH: TranslationLiterals(
language=Language.ENGLISH,
question_word=<span class="hljs-string">&quot;question&quot;</span>, <span class="hljs-comment"># Usage: &quot;Question: How are you?&quot;</span>
answer=<span class="hljs-string">&quot;answer&quot;</span>, <span class="hljs-comment"># Usage: &quot;Answer: I am fine&quot;</span>
confirmation_word=<span class="hljs-string">&quot;right&quot;</span>, <span class="hljs-comment"># Usage: &quot;He is smart, right?&quot;</span>
yes=<span class="hljs-string">&quot;yes&quot;</span>, <span class="hljs-comment"># Usage: &quot;Yes, he is&quot;</span>
no=<span class="hljs-string">&quot;no&quot;</span>, <span class="hljs-comment"># Usage: &quot;No, he is not&quot;</span>
also=<span class="hljs-string">&quot;also&quot;</span>, <span class="hljs-comment"># Usage: &quot;Also, she is smart.&quot;</span>
cause_word=<span class="hljs-string">&quot;because&quot;</span>, <span class="hljs-comment"># Usage: &quot;She is smart, because she is tall&quot;</span>
effect_word=<span class="hljs-string">&quot;therefore&quot;</span>, <span class="hljs-comment"># Usage: &quot;He is tall therefore he is smart&quot;</span>
or_word=<span class="hljs-string">&quot;or&quot;</span>, <span class="hljs-comment"># Usage: &quot;He is tall or small&quot;</span>
true=<span class="hljs-string">&quot;true&quot;</span>, <span class="hljs-comment"># Usage: &quot;He is smart, true, false or neither?&quot;</span>
false=<span class="hljs-string">&quot;false&quot;</span>, <span class="hljs-comment"># Usage: &quot;He is smart, true, false or neither?&quot;</span>
neither=<span class="hljs-string">&quot;neither&quot;</span>, <span class="hljs-comment"># Usage: &quot;He is smart, true, false or neither?&quot;</span>
<span class="hljs-comment"># Punctuation and spacing: only adjust if your language uses something different than in English</span>
full_stop=<span class="hljs-string">&quot;.&quot;</span>,
comma=<span class="hljs-string">&quot;,&quot;</span>,
question_mark=<span class="hljs-string">&quot;?&quot;</span>,
exclamation_mark=<span class="hljs-string">&quot;!&quot;</span>,
word_space=<span class="hljs-string">&quot; &quot;</span>,
sentence_space=<span class="hljs-string">&quot; &quot;</span>,
colon=<span class="hljs-string">&quot;:&quot;</span>,
<span class="hljs-comment"># The first characters of your alphabet used in enumerations, if different from English</span>
indices=[<span class="hljs-string">&quot;A&quot;</span>, <span class="hljs-string">&quot;B&quot;</span>, <span class="hljs-string">&quot;C&quot;</span>, ...]
)<!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-17788hx"><li>Open a PR with your modifications! And voilà!</li></ol> <h2 class="relative group"><a id="contributing-a-new-multilingual-task" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#contributing-a-new-multilingual-task"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Contributing a new multilingual task</span></h2> <p data-svelte-h="svelte-ghahgh">You should first read our guide on <a href="adding-a-custom-task">adding a custom task</a>, to better understand the different parameters we use.</p> <p data-svelte-h="svelte-1h6gb73">Then, you should take a look at the current <a href="https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py" rel="nofollow">multilingual tasks</a> file, to understand how they are defined. For multilingual evaluations the <code>prompt_function</code> should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.</p> <p data-svelte-h="svelte-1732p7f">Browse the list of all templates <a href="https://github.com/huggingface/lighteval/tree/main/src/lighteval/tasks/templates" rel="nofollow">here</a> to see which are the most adapted to your own task.</p> <p data-svelte-h="svelte-1vz5lzw">Then, when ready, to define your own task, you should:</p> <ol data-svelte-h="svelte-1rvvno4"><li>create a Python file as indicated in the above guide</li> <li>import the relevant templates for your task type (XNLI, Copa, Multiple choice, Question Answering, etc)</li> <li>define one or a list of tasks for each relevant language and evaluation formulation (for multichoice) using our parametrizable <a href="/docs/lighteval/pr_744/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a> class</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->your_tasks = [
LightevalTaskConfig(
<span class="hljs-comment"># Name of your evaluation</span>
name=<span class="hljs-string">f&quot;evalname_<span class="hljs-subst">{language.value}</span>_<span class="hljs-subst">{formulation.name.lower()}</span>&quot;</span>,
<span class="hljs-comment"># The evaluation is community contributed</span>
suite=[<span class="hljs-string">&quot;community&quot;</span>],
<span class="hljs-comment"># This will automatically get the correct metrics for your chosen formulation</span>
metric=get_metrics_for_formulation(
formulation,
[
loglikelihood_acc_metric(normalization=<span class="hljs-literal">None</span>),
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
loglikelihood_acc_metric(normalization=LogProbCharNorm()),
],
),
<span class="hljs-comment"># In this function, you choose which template to follow and for which language and formulation</span>
prompt_function=get_template_prompt_function(
language=language,
<span class="hljs-comment"># then use the adapter to define the mapping between the</span>
<span class="hljs-comment"># keys of the template (left), and the keys of your dataset</span>
<span class="hljs-comment"># (right)</span>
<span class="hljs-comment"># To know which template keys are required and available,</span>
<span class="hljs-comment"># consult the appropriate adapter type and doc-string.</span>
adapter=<span class="hljs-keyword">lambda</span> line: {
<span class="hljs-string">&quot;key&quot;</span>: line[<span class="hljs-string">&quot;relevant_key&quot;</span>],
...
},
formulation=formulation,
),
<span class="hljs-comment"># You can also add specific filters to remove irrelevant samples</span>
hf_filter=<span class="hljs-keyword">lambda</span> line: line[<span class="hljs-string">&quot;label&quot;</span>] <span class="hljs-keyword">in</span> &lt;condition&gt;,
<span class="hljs-comment"># You then select your huggingface dataset as well as</span>
<span class="hljs-comment"># the splits available for evaluation</span>
hf_repo=&lt;dataset&gt;,
hf_subset=&lt;subset&gt;,
evaluation_splits=[<span class="hljs-string">&quot;train&quot;</span>],
hf_avail_splits=[<span class="hljs-string">&quot;train&quot;</span>],
)
<span class="hljs-keyword">for</span> language <span class="hljs-keyword">in</span> [
Language.YOUR_LANGUAGE, ...
]
<span class="hljs-keyword">for</span> formulation <span class="hljs-keyword">in</span> [MCFFormulation(), CFFormulation(), HybridFormulation()]
]<!-- HTML_TAG_END --></pre></div> <ol start="4" data-svelte-h="svelte-cwcyc7"><li>then, you can go back to the guide to test if your task is correctly implemented!</li></ol> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1x1h02k">All <a href="/docs/lighteval/pr_744/en/package_reference/tasks#lighteval.tasks.lighteval_task.LightevalTaskConfig">LightevalTaskConfig</a> parameters are strongly typed, including the inputs to the template function. Make sure to take advantage of your IDE’s functionality to make it easier to correctly fill these parameters.</p></div> <p data-svelte-h="svelte-24o9mw">Once everything is good, open a PR, and we’ll be happy to review it!</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/lighteval/blob/main/docs/source/contributing-to-multilingual-evaluations.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1kl62qe = {
assets: "/docs/lighteval/pr_744/en",
base: "/docs/lighteval/pr_744/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/lighteval/pr_744/en/_app/immutable/entry/start.a615223c.js"),
import("/docs/lighteval/pr_744/en/_app/immutable/entry/app.b0033d27.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 5],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
18.3 kB
·
Xet hash:
57c94d1368a0b5ebe0cf3038245288aaaa182d1e5ca25e466d59cbc181d9d20b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.