Buckets:

hf-doc-build/doc-dev / lighteval /pr_744 /en /available-tasks.html
rtrm's picture
download
raw
56.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Available Tasks&quot;,&quot;local&quot;:&quot;available-tasks&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;List of tasks&quot;,&quot;local&quot;:&quot;list-of-tasks&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/lighteval/pr_744/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/entry/start.a615223c.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/scheduler.7da89386.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/singletons.8c5be8fd.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/paths.86a4d49d.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/entry/app.b0033d27.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/index.20910acc.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/nodes/0.c40ee5c5.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/nodes/4.25921f1d.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/CodeBlock.143bd81e.js">
<link rel="modulepreload" href="/docs/lighteval/pr_744/en/_app/immutable/chunks/index.c9cd5e8b.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Available Tasks&quot;,&quot;local&quot;:&quot;available-tasks&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;List of tasks&quot;,&quot;local&quot;:&quot;list-of-tasks&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="available-tasks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#available-tasks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Available Tasks</span></h1> <p data-svelte-h="svelte-t078aa">You can get a list of all the available tasks by running:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lighteval tasks list<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14p02ve">You can also inspect a specific task by running:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lighteval tasks inspect &lt;task_name&gt;<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="list-of-tasks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#list-of-tasks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>List of tasks</span></h2> <ul data-svelte-h="svelte-1a8m62s"><li><p>bigbench:</p> <ul><li>bigbench|abstract_narrative_understanding</li> <li>bigbench|anachronisms</li> <li>bigbench|analogical_similarity</li> <li>bigbench|analytic_entailment</li> <li>bigbench|arithmetic_bb</li> <li>bigbench|ascii_word_recognition</li> <li>bigbench|authorship_verification</li> <li>bigbench|auto_categorization</li> <li>bigbench|auto_debugging</li> <li>bigbench|bbq_lite_json</li> <li>bigbench|bridging_anaphora_resolution_barqa</li> <li>bigbench|causal_judgment</li> <li>bigbench|cause_and_effect</li> <li>bigbench|checkmate_in_one</li> <li>bigbench|chess_state_tracking</li> <li>bigbench|chinese_remainder_theorem</li> <li>bigbench|cifar10_classification</li> <li>bigbench|code_line_description</li> <li>bigbench|codenames</li> <li>bigbench|color</li> <li>bigbench|common_morpheme</li> <li>bigbench|conceptual_combinations</li> <li>bigbench|conlang_translation</li> <li>bigbench|contextual_parametric_knowledge_conflicts</li> <li>bigbench|coqa_bb</li> <li>bigbench|crash_blossom</li> <li>bigbench|crass_ai</li> <li>bigbench|cryobiology_spanish</li> <li>bigbench|cryptonite</li> <li>bigbench|cs_algorithms</li> <li>bigbench|dark_humor_detection</li> <li>bigbench|date_understanding</li> <li>bigbench|disambiguation_qa</li> <li>bigbench|discourse_marker_prediction</li> <li>bigbench|disfl_qa</li> <li>bigbench|dyck_languages</li> <li>bigbench|elementary_math_qa</li> <li>bigbench|emoji_movie</li> <li>bigbench|emojis_emotion_prediction</li> <li>bigbench|empirical_judgments</li> <li>bigbench|english_proverbs</li> <li>bigbench|english_russian_proverbs</li> <li>bigbench|entailed_polarity</li> <li>bigbench|entailed_polarity_hindi</li> <li>bigbench|epistemic_reasoning</li> <li>bigbench|evaluating_information_essentiality</li> <li>bigbench|fact_checker</li> <li>bigbench|fantasy_reasoning</li> <li>bigbench|few_shot_nlg</li> <li>bigbench|figure_of_speech_detection</li> <li>bigbench|formal_fallacies_syllogisms_negation</li> <li>bigbench|gem</li> <li>bigbench|gender_inclusive_sentences_german</li> <li>bigbench|general_knowledge</li> <li>bigbench|geometric_shapes</li> <li>bigbench|goal_step_wikihow</li> <li>bigbench|gre_reading_comprehension</li> <li>bigbench|hhh_alignment</li> <li>bigbench|hindi_question_answering</li> <li>bigbench|hindu_knowledge</li> <li>bigbench|hinglish_toxicity</li> <li>bigbench|human_organs_senses</li> <li>bigbench|hyperbaton</li> <li>bigbench|identify_math_theorems</li> <li>bigbench|identify_odd_metaphor</li> <li>bigbench|implicatures</li> <li>bigbench|implicit_relations</li> <li>bigbench|intent_recognition</li> <li>bigbench|international_phonetic_alphabet_nli</li> <li>bigbench|international_phonetic_alphabet_transliterate</li> <li>bigbench|intersect_geometry</li> <li>bigbench|irony_identification</li> <li>bigbench|kanji_ascii</li> <li>bigbench|kannada</li> <li>bigbench|key_value_maps</li> <li>bigbench|known_unknowns</li> <li>bigbench|language_games</li> <li>bigbench|language_identification</li> <li>bigbench|linguistic_mappings</li> <li>bigbench|linguistics_puzzles</li> <li>bigbench|logic_grid_puzzle</li> <li>bigbench|logical_args</li> <li>bigbench|logical_deduction</li> <li>bigbench|logical_fallacy_detection</li> <li>bigbench|logical_sequence</li> <li>bigbench|mathematical_induction</li> <li>bigbench|matrixshapes</li> <li>bigbench|metaphor_boolean</li> <li>bigbench|metaphor_understanding</li> <li>bigbench|minute_mysteries_qa</li> <li>bigbench|misconceptions</li> <li>bigbench|misconceptions_russian</li> <li>bigbench|mnist_ascii</li> <li>bigbench|modified_arithmetic</li> <li>bigbench|moral_permissibility</li> <li>bigbench|movie_dialog_same_or_different</li> <li>bigbench|movie_recommendation</li> <li>bigbench|mult_data_wrangling</li> <li>bigbench|multiemo</li> <li>bigbench|natural_instructions</li> <li>bigbench|navigate</li> <li>bigbench|nonsense_words_grammar</li> <li>bigbench|novel_concepts</li> <li>bigbench|object_counting</li> <li>bigbench|odd_one_out</li> <li>bigbench|operators</li> <li>bigbench|paragraph_segmentation</li> <li>bigbench|parsinlu_qa</li> <li>bigbench|parsinlu_reading_comprehension</li> <li>bigbench|penguins_in_a_table</li> <li>bigbench|periodic_elements</li> <li>bigbench|persian_idioms</li> <li>bigbench|phrase_relatedness</li> <li>bigbench|physical_intuition</li> <li>bigbench|physics</li> <li>bigbench|physics_questions</li> <li>bigbench|play_dialog_same_or_different</li> <li>bigbench|polish_sequence_labeling</li> <li>bigbench|presuppositions_as_nli</li> <li>bigbench|qa_wikidata</li> <li>bigbench|question_selection</li> <li>bigbench|real_or_fake_text</li> <li>bigbench|reasoning_about_colored_objects</li> <li>bigbench|repeat_copy_logic</li> <li>bigbench|rephrase</li> <li>bigbench|rhyming</li> <li>bigbench|riddle_sense</li> <li>bigbench|ruin_names</li> <li>bigbench|salient_translation_error_detection</li> <li>bigbench|scientific_press_release</li> <li>bigbench|semantic_parsing_in_context_sparc</li> <li>bigbench|semantic_parsing_spider</li> <li>bigbench|sentence_ambiguity</li> <li>bigbench|similarities_abstraction</li> <li>bigbench|simp_turing_concept</li> <li>bigbench|simple_arithmetic_json</li> <li>bigbench|simple_arithmetic_json_multiple_choice</li> <li>bigbench|simple_arithmetic_json_subtasks</li> <li>bigbench|simple_arithmetic_multiple_targets_json</li> <li>bigbench|simple_ethical_questions</li> <li>bigbench|simple_text_editing</li> <li>bigbench|snarks</li> <li>bigbench|social_iqa</li> <li>bigbench|social_support</li> <li>bigbench|sports_understanding</li> <li>bigbench|strange_stories</li> <li>bigbench|strategyqa</li> <li>bigbench|sufficient_information</li> <li>bigbench|suicide_risk</li> <li>bigbench|swahili_english_proverbs</li> <li>bigbench|swedish_to_german_proverbs</li> <li>bigbench|symbol_interpretation</li> <li>bigbench|tellmewhy</li> <li>bigbench|temporal_sequences</li> <li>bigbench|tense</li> <li>bigbench|timedial</li> <li>bigbench|topical_chat</li> <li>bigbench|tracking_shuffled_objects</li> <li>bigbench|understanding_fables</li> <li>bigbench|undo_permutation</li> <li>bigbench|unit_conversion</li> <li>bigbench|unit_interpretation</li> <li>bigbench|unnatural_in_context_learning</li> <li>bigbench|vitaminc_fact_verification</li> <li>bigbench|what_is_the_tao</li> <li>bigbench|which_wiki_edit</li> <li>bigbench|wino_x_german</li> <li>bigbench|winowhy</li> <li>bigbench|word_sorting</li> <li>bigbench|word_unscrambling</li></ul></li> <li><p>harness:</p> <ul><li>harness|bbh:boolean_expressions</li> <li>harness|bbh:causal_judgment</li> <li>harness|bbh:date_understanding</li> <li>harness|bbh:disambiguation_qa</li> <li>harness|bbh:dyck_languages</li> <li>harness|bbh:formal_fallacies</li> <li>harness|bbh:geometric_shapes</li> <li>harness|bbh:hyperbaton</li> <li>harness|bbh:logical_deduction_five_objects</li> <li>harness|bbh:logical_deduction_seven_objects</li> <li>harness|bbh:logical_deduction_three_objects</li> <li>harness|bbh:movie_recommendation</li> <li>harness|bbh:multistep_arithmetic_two</li> <li>harness|bbh:navigate</li> <li>harness|bbh:object_counting</li> <li>harness|bbh:penguins_in_a_table</li> <li>harness|bbh:reasoning_about_colored_objects</li> <li>harness|bbh:ruin_names</li> <li>harness|bbh:salient_translation_error_detection</li> <li>harness|bbh:snarks</li> <li>harness|bbh:sports_understanding</li> <li>harness|bbh:temporal_sequences</li> <li>harness|bbh:tracking_shuffled_objects_five_objects</li> <li>harness|bbh:tracking_shuffled_objects_seven_objects</li> <li>harness|bbh:tracking_shuffled_objects_three_objects</li> <li>harness|bbh:web_of_lies</li> <li>harness|bbh:word_sorting</li> <li>harness|bigbench:causal_judgment</li> <li>harness|bigbench:date_understanding</li> <li>harness|bigbench:disambiguation_qa</li> <li>harness|bigbench:geometric_shapes</li> <li>harness|bigbench:logical_deduction_five_objects</li> <li>harness|bigbench:logical_deduction_seven_objects</li> <li>harness|bigbench:logical_deduction_three_objects</li> <li>harness|bigbench:movie_recommendation</li> <li>harness|bigbench:navigate</li> <li>harness|bigbench:reasoning_about_colored_objects</li> <li>harness|bigbench:ruin_names</li> <li>harness|bigbench:salient_translation_error_detection</li> <li>harness|bigbench:snarks</li> <li>harness|bigbench:sports_understanding</li> <li>harness|bigbench:temporal_sequences</li> <li>harness|bigbench:tracking_shuffled_objects_five_objects</li> <li>harness|bigbench:tracking_shuffled_objects_seven_objects</li> <li>harness|bigbench:tracking_shuffled_objects_three_objects</li> <li>harness|wikitext:103:document_level</li></ul></li> <li><p>helm:</p> <ul><li>helm|babi_qa</li> <li>helm|bbq</li> <li>helm|bbq:Age</li> <li>helm|bbq:Disability_status</li> <li>helm|bbq:Gender_identity</li> <li>helm|bbq:Physical_appearance</li> <li>helm|bbq:Race_ethnicity</li> <li>helm|bbq:Race_x_SES</li> <li>helm|bbq:Race_x_gender</li> <li>helm|bbq:Religion</li> <li>helm|bbq:SES</li> <li>helm|bbq:Sexual_orientation</li> <li>helm|bbq=Nationality</li> <li>helm|bigbench:auto_debugging</li> <li>helm|bigbench:bbq_lite_json:age_ambig</li> <li>helm|bigbench:bbq_lite_json:age_disambig</li> <li>helm|bigbench:bbq_lite_json:disability_status_ambig</li> <li>helm|bigbench:bbq_lite_json:disability_status_disambig</li> <li>helm|bigbench:bbq_lite_json:gender_identity_ambig</li> <li>helm|bigbench:bbq_lite_json:gender_identity_disambig</li> <li>helm|bigbench:bbq_lite_json:nationality_ambig</li> <li>helm|bigbench:bbq_lite_json:nationality_disambig</li> <li>helm|bigbench:bbq_lite_json:physical_appearance_ambig</li> <li>helm|bigbench:bbq_lite_json:physical_appearance_disambig</li> <li>helm|bigbench:bbq_lite_json:race_ethnicity_ambig</li> <li>helm|bigbench:bbq_lite_json:race_ethnicity_disambig</li> <li>helm|bigbench:bbq_lite_json:religion_ambig</li> <li>helm|bigbench:bbq_lite_json:religion_disambig</li> <li>helm|bigbench:bbq_lite_json:ses_ambig</li> <li>helm|bigbench:bbq_lite_json:ses_disambig</li> <li>helm|bigbench:bbq_lite_json:sexual_orientation_ambig</li> <li>helm|bigbench:bbq_lite_json:sexual_orientation_disambig</li> <li>helm|bigbench:code_line_description</li> <li>helm|bigbench:conceptual_combinations:contradictions</li> <li>helm|bigbench:conceptual_combinations:emergent_properties</li> <li>helm|bigbench:conceptual_combinations:fanciful_fictional_combinations</li> <li>helm|bigbench:conceptual_combinations:homonyms</li> <li>helm|bigbench:conceptual_combinations:invented_words</li> <li>helm|bigbench:conlang_translation:adna_from</li> <li>helm|bigbench:conlang_translation:adna_to</li> <li>helm|bigbench:conlang_translation:atikampe_from</li> <li>helm|bigbench:conlang_translation:atikampe_to</li> <li>helm|bigbench:conlang_translation:gornam_from</li> <li>helm|bigbench:conlang_translation:gornam_to</li> <li>helm|bigbench:conlang_translation:holuan_from</li> <li>helm|bigbench:conlang_translation:holuan_to</li> <li>helm|bigbench:conlang_translation:mkafala_from</li> <li>helm|bigbench:conlang_translation:mkafala_to</li> <li>helm|bigbench:conlang_translation:postpositive_english_from</li> <li>helm|bigbench:conlang_translation:postpositive_english_to</li> <li>helm|bigbench:conlang_translation:unapuri_from</li> <li>helm|bigbench:conlang_translation:unapuri_to</li> <li>helm|bigbench:conlang_translation:vaomi_from</li> <li>helm|bigbench:conlang_translation:vaomi_to</li> <li>helm|bigbench:emoji_movie</li> <li>helm|bigbench:formal_fallacies_syllogisms_negation</li> <li>helm|bigbench:hindu_knowledge</li> <li>helm|bigbench:known_unknowns</li> <li>helm|bigbench:language_identification</li> <li>helm|bigbench:linguistics_puzzles</li> <li>helm|bigbench:logic_grid_puzzle</li> <li>helm|bigbench:logical_deduction-five_objects</li> <li>helm|bigbench:logical_deduction-seven_objects</li> <li>helm|bigbench:logical_deduction-three_objects</li> <li>helm|bigbench:misconceptions_russian</li> <li>helm|bigbench:novel_concepts</li> <li>helm|bigbench:operators</li> <li>helm|bigbench:parsinlu_reading_comprehension</li> <li>helm|bigbench:play_dialog_same_or_different</li> <li>helm|bigbench:repeat_copy_logic</li> <li>helm|bigbench:strange_stories-boolean</li> <li>helm|bigbench:strange_stories-multiple_choice</li> <li>helm|bigbench:strategyqa</li> <li>helm|bigbench:symbol_interpretation-adversarial</li> <li>helm|bigbench:symbol_interpretation-emoji_agnostic</li> <li>helm|bigbench:symbol_interpretation-name_agnostic</li> <li>helm|bigbench:symbol_interpretation-plain</li> <li>helm|bigbench:symbol_interpretation-tricky</li> <li>helm|bigbench:vitaminc_fact_verification</li> <li>helm|bigbench:winowhy</li> <li>helm|blimp:adjunct_island</li> <li>helm|blimp:anaphor_gender_agreement</li> <li>helm|blimp:anaphor_number_agreement</li> <li>helm|blimp:animate_subject_passive</li> <li>helm|blimp:animate_subject_trans</li> <li>helm|blimp:causative</li> <li>helm|blimp:complex_NP_island</li> <li>helm|blimp:coordinate_structure_constraint_complex_left_branch</li> <li>helm|blimp:coordinate_structure_constraint_object_extraction</li> <li>helm|blimp:determiner_noun_agreement_1</li> <li>helm|blimp:determiner_noun_agreement_2</li> <li>helm|blimp:determiner_noun_agreement_irregular_1</li> <li>helm|blimp:determiner_noun_agreement_irregular_2</li> <li>helm|blimp:determiner_noun_agreement_with_adj_2</li> <li>helm|blimp:determiner_noun_agreement_with_adj_irregular_1</li> <li>helm|blimp:determiner_noun_agreement_with_adj_irregular_2</li> <li>helm|blimp:determiner_noun_agreement_with_adjective_1</li> <li>helm|blimp:distractor_agreement_relational_noun</li> <li>helm|blimp:distractor_agreement_relative_clause</li> <li>helm|blimp:drop_argument</li> <li>helm|blimp:ellipsis_n_bar_1</li> <li>helm|blimp:ellipsis_n_bar_2</li> <li>helm|blimp:existential_there_object_raising</li> <li>helm|blimp:existential_there_quantifiers_1</li> <li>helm|blimp:existential_there_quantifiers_2</li> <li>helm|blimp:existential_there_subject_raising</li> <li>helm|blimp:expletive_it_object_raising</li> <li>helm|blimp:inchoative</li> <li>helm|blimp:intransitive</li> <li>helm|blimp:irregular_past_participle_adjectives</li> <li>helm|blimp:irregular_past_participle_verbs</li> <li>helm|blimp:irregular_plural_subject_verb_agreement_1</li> <li>helm|blimp:irregular_plural_subject_verb_agreement_2</li> <li>helm|blimp:left_branch_island_echo_question</li> <li>helm|blimp:left_branch_island_simple_question</li> <li>helm|blimp:matrix_question_npi_licensor_present</li> <li>helm|blimp:npi_present_1</li> <li>helm|blimp:npi_present_2</li> <li>helm|blimp:only_npi_licensor_present</li> <li>helm|blimp:only_npi_scope</li> <li>helm|blimp:passive_1</li> <li>helm|blimp:passive_2</li> <li>helm|blimp:principle_A_c_command</li> <li>helm|blimp:principle_A_case_1</li> <li>helm|blimp:principle_A_case_2</li> <li>helm|blimp:principle_A_domain_1</li> <li>helm|blimp:principle_A_domain_2</li> <li>helm|blimp:principle_A_domain_3</li> <li>helm|blimp:principle_A_reconstruction</li> <li>helm|blimp:regular_plural_subject_verb_agreement_1</li> <li>helm|blimp:regular_plural_subject_verb_agreement_2</li> <li>helm|blimp:sentential_negation_npi_licensor_present</li> <li>helm|blimp:sentential_negation_npi_scope</li> <li>helm|blimp:sentential_subject_island</li> <li>helm|blimp:superlative_quantifiers_1</li> <li>helm|blimp:superlative_quantifiers_2</li> <li>helm|blimp:tough_vs_raising_1</li> <li>helm|blimp:tough_vs_raising_2</li> <li>helm|blimp:transitive</li> <li>helm|blimp:wh_island</li> <li>helm|blimp:wh_questions_object_gap</li> <li>helm|blimp:wh_questions_subject_gap</li> <li>helm|blimp:wh_questions_subject_gap_long_distance</li> <li>helm|blimp:wh_vs_that_no_gap</li> <li>helm|blimp:wh_vs_that_no_gap_long_distance</li> <li>helm|blimp:wh_vs_that_with_gap</li> <li>helm|blimp:wh_vs_that_with_gap_long_distance</li> <li>helm|bold</li> <li>helm|bold:gender</li> <li>helm|bold:political_ideology</li> <li>helm|bold:profession</li> <li>helm|bold:race</li> <li>helm|bold:religious_ideology</li> <li>helm|boolq</li> <li>helm|boolq:contrastset</li> <li>helm|civil_comments</li> <li>helm|civil_comments:LGBTQ</li> <li>helm|civil_comments:black</li> <li>helm|civil_comments:christian</li> <li>helm|civil_comments:female</li> <li>helm|civil_comments:male</li> <li>helm|civil_comments:muslim</li> <li>helm|civil_comments:other_religions</li> <li>helm|civil_comments:white</li> <li>helm|commonsenseqa</li> <li>helm|copyright:n_books_1000-extractions_per_book_1-prefix_length_125</li> <li>helm|copyright:n_books_1000-extractions_per_book_1-prefix_length_25</li> <li>helm|copyright:n_books_1000-extractions_per_book_1-prefix_length_5</li> <li>helm|copyright:n_books_1000-extractions_per_book_3-prefix_length_125</li> <li>helm|copyright:n_books_1000-extractions_per_book_3-prefix_length_25</li> <li>helm|copyright:n_books_1000-extractions_per_book_3-prefix_length_5</li> <li>helm|copyright:oh_the_places</li> <li>helm|copyright:pilot</li> <li>helm|copyright:popular_books-prefix_length_10</li> <li>helm|copyright:popular_books-prefix_length_125</li> <li>helm|copyright:popular_books-prefix_length_25</li> <li>helm|copyright:popular_books-prefix_length_250</li> <li>helm|copyright:popular_books-prefix_length_5</li> <li>helm|copyright:popular_books-prefix_length_50</li> <li>helm|copyright:prompt_num_line_1-min_lines_20</li> <li>helm|copyright:prompt_num_line_10-min_lines_20</li> <li>helm|copyright:prompt_num_line_5-min_lines_20</li> <li>helm|covid_dialogue</li> <li>helm|dyck_language:2</li> <li>helm|dyck_language:3</li> <li>helm|dyck_language:4</li> <li>helm|entity_data_imputation:Buy</li> <li>helm|entity_data_imputation:Restaurant</li> <li>helm|entity_matching:Abt_Buy</li> <li>helm|entity_matching:Amazon_Google</li> <li>helm|entity_matching:Beer</li> <li>helm|entity_matching:Company</li> <li>helm|entity_matching:DBLP_ACM</li> <li>helm|entity_matching:DBLP_GoogleScholar</li> <li>helm|entity_matching:Dirty_DBLP_ACM</li> <li>helm|entity_matching:Dirty_DBLP_GoogleScholar</li> <li>helm|entity_matching:Dirty_Walmart_Amazon</li> <li>helm|entity_matching:Dirty_iTunes_Amazon</li> <li>helm|entity_matching:Walmart_Amazon</li> <li>helm|entity_matching:iTunes_Amazon</li> <li>helm|entity_matching=Fodors_Zagats</li> <li>helm|hellaswag</li> <li>helm|imdb</li> <li>helm|imdb:contrastset</li> <li>helm|interactive_qa_mmlu:abstract_algebra</li> <li>helm|interactive_qa_mmlu:college_chemistry</li> <li>helm|interactive_qa_mmlu:global_facts</li> <li>helm|interactive_qa_mmlu:miscellaneous</li> <li>helm|interactive_qa_mmlu:nutrition</li> <li>helm|interactive_qa_mmlu:us_foreign_policy</li> <li>helm|legal_summarization:billsum</li> <li>helm|legal_summarization:eurlexsum</li> <li>helm|legal_summarization:multilexsum</li> <li>helm|legalsupport</li> <li>helm|lexglue:case_hold</li> <li>helm|lexglue:ecthr_a</li> <li>helm|lexglue:ecthr_b</li> <li>helm|lexglue:eurlex</li> <li>helm|lexglue:ledgar</li> <li>helm|lexglue:scotus</li> <li>helm|lexglue:unfair_tos</li> <li>helm|lextreme:brazilian_court_decisions_judgment</li> <li>helm|lextreme:brazilian_court_decisions_unanimity</li> <li>helm|lextreme:covid19_emergency_event</li> <li>helm|lextreme:german_argument_mining</li> <li>helm|lextreme:greek_legal_code_chapter</li> <li>helm|lextreme:greek_legal_code_subject</li> <li>helm|lextreme:greek_legal_code_volume</li> <li>helm|lextreme:greek_legal_ner</li> <li>helm|lextreme:legalnero</li> <li>helm|lextreme:lener_br</li> <li>helm|lextreme:mapa_coarse</li> <li>helm|lextreme:mapa_fine</li> <li>helm|lextreme:multi_eurlex_level_1</li> <li>helm|lextreme:multi_eurlex_level_2</li> <li>helm|lextreme:multi_eurlex_level_3</li> <li>helm|lextreme:online_terms_of_service_clause_topics</li> <li>helm|lextreme:online_terms_of_service_unfairness_levels</li> <li>helm|lextreme:swiss_judgment_prediction</li> <li>helm|lsat_qa</li> <li>helm|lsat_qa:assignment</li> <li>helm|lsat_qa:grouping</li> <li>helm|lsat_qa:miscellaneous</li> <li>helm|lsat_qa:ordering</li> <li>helm|me_q_sum</li> <li>helm|med_dialog:healthcaremagic</li> <li>helm|med_dialog:icliniq</li> <li>helm|med_mcqa</li> <li>helm|med_paragraph_simplification</li> <li>helm|med_qa</li> <li>helm|mmlu</li> <li>helm|mmlu:abstract_algebra</li> <li>helm|mmlu:anatomy</li> <li>helm|mmlu:astronomy</li> <li>helm|mmlu:business_ethics</li> <li>helm|mmlu:clinical_knowledge</li> <li>helm|mmlu:college_biology</li> <li>helm|mmlu:college_chemistry</li> <li>helm|mmlu:college_computer_science</li> <li>helm|mmlu:college_mathematics</li> <li>helm|mmlu:college_medicine</li> <li>helm|mmlu:college_physics</li> <li>helm|mmlu:computer_security</li> <li>helm|mmlu:conceptual_physics</li> <li>helm|mmlu:econometrics</li> <li>helm|mmlu:electrical_engineering</li> <li>helm|mmlu:elementary_mathematics</li> <li>helm|mmlu:formal_logic</li> <li>helm|mmlu:global_facts</li> <li>helm|mmlu:high_school_biology</li> <li>helm|mmlu:high_school_chemistry</li> <li>helm|mmlu:high_school_computer_science</li> <li>helm|mmlu:high_school_european_history</li> <li>helm|mmlu:high_school_geography</li> <li>helm|mmlu:high_school_government_and_politics</li> <li>helm|mmlu:high_school_macroeconomics</li> <li>helm|mmlu:high_school_mathematics</li> <li>helm|mmlu:high_school_microeconomics</li> <li>helm|mmlu:high_school_physics</li> <li>helm|mmlu:high_school_psychology</li> <li>helm|mmlu:high_school_statistics</li> <li>helm|mmlu:high_school_us_history</li> <li>helm|mmlu:high_school_world_history</li> <li>helm|mmlu:human_aging</li> <li>helm|mmlu:human_sexuality</li> <li>helm|mmlu:international_law</li> <li>helm|mmlu:jurisprudence</li> <li>helm|mmlu:logical_fallacies</li> <li>helm|mmlu:machine_learning</li> <li>helm|mmlu:management</li> <li>helm|mmlu:marketing</li> <li>helm|mmlu:medical_genetics</li> <li>helm|mmlu:miscellaneous</li> <li>helm|mmlu:moral_disputes</li> <li>helm|mmlu:moral_scenarios</li> <li>helm|mmlu:nutrition</li> <li>helm|mmlu:philosophy</li> <li>helm|mmlu:prehistory</li> <li>helm|mmlu:professional_accounting</li> <li>helm|mmlu:professional_law</li> <li>helm|mmlu:professional_medicine</li> <li>helm|mmlu:professional_psychology</li> <li>helm|mmlu:public_relations</li> <li>helm|mmlu:security_studies</li> <li>helm|mmlu:sociology</li> <li>helm|mmlu:us_foreign_policy</li> <li>helm|mmlu:virology</li> <li>helm|mmlu:world_religions</li> <li>helm|narrativeqa</li> <li>helm|numeracy:linear_example</li> <li>helm|numeracy:linear_standard</li> <li>helm|numeracy:parabola_example</li> <li>helm|numeracy:parabola_standard</li> <li>helm|numeracy:paraboloid_example</li> <li>helm|numeracy:paraboloid_standard</li> <li>helm|numeracy:plane_example</li> <li>helm|numeracy:plane_standard</li> <li>helm|openbookqa</li> <li>helm|piqa</li> <li>helm|pubmedqa</li> <li>helm|quac</li> <li>helm|raft:ade_corpus_v2</li> <li>helm|raft:banking_77</li> <li>helm|raft:neurips_impact_statement_risks</li> <li>helm|raft:one_stop_english</li> <li>helm|raft:overruling</li> <li>helm|raft:semiconductor_org_types</li> <li>helm|raft:systematic_review_inclusion</li> <li>helm|raft:tai_safety_research</li> <li>helm|raft:terms_of_service</li> <li>helm|raft:tweet_eval_hate</li> <li>helm|raft:twitter_complaints</li> <li>helm|real_toxicity_prompts</li> <li>helm|siqa</li> <li>helm|summarization:cnn-dm</li> <li>helm|summarization:xsum</li> <li>helm|summarization:xsum-sampled</li> <li>helm|synthetic_reasoning:induction</li> <li>helm|synthetic_reasoning:natural_easy</li> <li>helm|synthetic_reasoning:natural_hard</li> <li>helm|synthetic_reasoning:pattern_match</li> <li>helm|synthetic_reasoning:variable_substitution</li> <li>helm|the_pile:arxiv</li> <li>helm|the_pile:bibliotik</li> <li>helm|the_pile:commoncrawl</li> <li>helm|the_pile:dm-mathematics</li> <li>helm|the_pile:enron</li> <li>helm|the_pile:europarl</li> <li>helm|the_pile:freelaw</li> <li>helm|the_pile:github</li> <li>helm|the_pile:gutenberg</li> <li>helm|the_pile:hackernews</li> <li>helm|the_pile:nih-exporter</li> <li>helm|the_pile:opensubtitles</li> <li>helm|the_pile:openwebtext2</li> <li>helm|the_pile:pubmed-abstracts</li> <li>helm|the_pile:pubmed-central</li> <li>helm|the_pile:stackexchange</li> <li>helm|the_pile:upsto</li> <li>helm|the_pile:wikipedia</li> <li>helm|the_pile:youtubesubtitles</li> <li>helm|truthfulqa</li> <li>helm|twitterAAE:aa</li> <li>helm|twitterAAE:white</li> <li>helm|wikifact:applies_to_jurisdiction</li> <li>helm|wikifact:atomic_number</li> <li>helm|wikifact:author</li> <li>helm|wikifact:award_received</li> <li>helm|wikifact:basic_form_of_government</li> <li>helm|wikifact:capital</li> <li>helm|wikifact:capital_of</li> <li>helm|wikifact:central_bank</li> <li>helm|wikifact:composer</li> <li>helm|wikifact:continent</li> <li>helm|wikifact:country</li> <li>helm|wikifact:country_of_citizenship</li> <li>helm|wikifact:country_of_origin</li> <li>helm|wikifact:creator</li> <li>helm|wikifact:currency</li> <li>helm|wikifact:defendant</li> <li>helm|wikifact:developer</li> <li>helm|wikifact:diplomatic_relation</li> <li>helm|wikifact:director</li> <li>helm|wikifact:discoverer_or_inventor</li> <li>helm|wikifact:drug_or_therapy_used_for_treatment</li> <li>helm|wikifact:educated_at</li> <li>helm|wikifact:electron_configuration</li> <li>helm|wikifact:employer</li> <li>helm|wikifact:field_of_work</li> <li>helm|wikifact:file_extension</li> <li>helm|wikifact:genetic_association</li> <li>helm|wikifact:genre</li> <li>helm|wikifact:has_part</li> <li>helm|wikifact:head_of_government</li> <li>helm|wikifact:head_of_state</li> <li>helm|wikifact:headquarters_location</li> <li>helm|wikifact:industry</li> <li>helm|wikifact:influenced_by</li> <li>helm|wikifact:instance_of</li> <li>helm|wikifact:instrument</li> <li>helm|wikifact:language_of_work_or_name</li> <li>helm|wikifact:languages_spoken_written_or_signed</li> <li>helm|wikifact:laws_applied</li> <li>helm|wikifact:located_in_the_administrative_territorial_entity</li> <li>helm|wikifact:location</li> <li>helm|wikifact:location_of_discovery</li> <li>helm|wikifact:location_of_formation</li> <li>helm|wikifact:majority_opinion_by</li> <li>helm|wikifact:manufacturer</li> <li>helm|wikifact:measured_physical_quantity</li> <li>helm|wikifact:medical_condition_treated</li> <li>helm|wikifact:member_of</li> <li>helm|wikifact:member_of_political_party</li> <li>helm|wikifact:member_of_sports_team</li> <li>helm|wikifact:movement</li> <li>helm|wikifact:named_after</li> <li>helm|wikifact:native_language</li> <li>helm|wikifact:number_of_processor_cores</li> <li>helm|wikifact:occupation</li> <li>helm|wikifact:office_held_by_head_of_government</li> <li>helm|wikifact:office_held_by_head_of_state</li> <li>helm|wikifact:official_language</li> <li>helm|wikifact:operating_system</li> <li>helm|wikifact:original_language_of_film_or_TV_show</li> <li>helm|wikifact:original_network</li> <li>helm|wikifact:overrules</li> <li>helm|wikifact:owned_by</li> <li>helm|wikifact:part_of</li> <li>helm|wikifact:participating_team</li> <li>helm|wikifact:place_of_birth</li> <li>helm|wikifact:place_of_death</li> <li>helm|wikifact:plaintiff</li> <li>helm|wikifact:position_held</li> <li>helm|wikifact:position_played_on_team</li> <li>helm|wikifact:programming_language</li> <li>helm|wikifact:recommended_unit_of_measurement</li> <li>helm|wikifact:record_label</li> <li>helm|wikifact:religion</li> <li>helm|wikifact:repealed_by</li> <li>helm|wikifact:shares_border_with</li> <li>helm|wikifact:solved_by</li> <li>helm|wikifact:statement_describes</li> <li>helm|wikifact:stock_exchange</li> <li>helm|wikifact:subclass_of</li> <li>helm|wikifact:subsidiary</li> <li>helm|wikifact:symptoms_and_signs</li> <li>helm|wikifact:therapeutic_area</li> <li>helm|wikifact:time_of_discovery_or_invention</li> <li>helm|wikifact:twinned_administrative_body</li> <li>helm|wikifact:work_location</li> <li>helm|wikitext:103:document_level</li> <li>helm|wmt14:cs-en</li> <li>helm|wmt14:de-en</li> <li>helm|wmt14:fr-en</li> <li>helm|wmt14:hi-en</li> <li>helm|wmt14:ru-en</li></ul></li> <li><p>leaderboard:</p> <ul><li>leaderboard|arc:challenge</li> <li>leaderboard|gsm8k</li> <li>leaderboard|hellaswag</li> <li>leaderboard|mmlu:abstract_algebra</li> <li>leaderboard|mmlu:anatomy</li> <li>leaderboard|mmlu:astronomy</li> <li>leaderboard|mmlu:business_ethics</li> <li>leaderboard|mmlu:clinical_knowledge</li> <li>leaderboard|mmlu:college_biology</li> <li>leaderboard|mmlu:college_chemistry</li> <li>leaderboard|mmlu:college_computer_science</li> <li>leaderboard|mmlu:college_mathematics</li> <li>leaderboard|mmlu:college_medicine</li> <li>leaderboard|mmlu:college_physics</li> <li>leaderboard|mmlu:computer_security</li> <li>leaderboard|mmlu:conceptual_physics</li> <li>leaderboard|mmlu:econometrics</li> <li>leaderboard|mmlu:electrical_engineering</li> <li>leaderboard|mmlu:elementary_mathematics</li> <li>leaderboard|mmlu:formal_logic</li> <li>leaderboard|mmlu:global_facts</li> <li>leaderboard|mmlu:high_school_biology</li> <li>leaderboard|mmlu:high_school_chemistry</li> <li>leaderboard|mmlu:high_school_computer_science</li> <li>leaderboard|mmlu:high_school_european_history</li> <li>leaderboard|mmlu:high_school_geography</li> <li>leaderboard|mmlu:high_school_government_and_politics</li> <li>leaderboard|mmlu:high_school_macroeconomics</li> <li>leaderboard|mmlu:high_school_mathematics</li> <li>leaderboard|mmlu:high_school_microeconomics</li> <li>leaderboard|mmlu:high_school_physics</li> <li>leaderboard|mmlu:high_school_psychology</li> <li>leaderboard|mmlu:high_school_statistics</li> <li>leaderboard|mmlu:high_school_us_history</li> <li>leaderboard|mmlu:high_school_world_history</li> <li>leaderboard|mmlu:human_aging</li> <li>leaderboard|mmlu:human_sexuality</li> <li>leaderboard|mmlu:international_law</li> <li>leaderboard|mmlu:jurisprudence</li> <li>leaderboard|mmlu:logical_fallacies</li> <li>leaderboard|mmlu:machine_learning</li> <li>leaderboard|mmlu:management</li> <li>leaderboard|mmlu:marketing</li> <li>leaderboard|mmlu:medical_genetics</li> <li>leaderboard|mmlu:miscellaneous</li> <li>leaderboard|mmlu:moral_disputes</li> <li>leaderboard|mmlu:moral_scenarios</li> <li>leaderboard|mmlu:nutrition</li> <li>leaderboard|mmlu:philosophy</li> <li>leaderboard|mmlu:prehistory</li> <li>leaderboard|mmlu:professional_accounting</li> <li>leaderboard|mmlu:professional_law</li> <li>leaderboard|mmlu:professional_medicine</li> <li>leaderboard|mmlu:professional_psychology</li> <li>leaderboard|mmlu:public_relations</li> <li>leaderboard|mmlu:security_studies</li> <li>leaderboard|mmlu:sociology</li> <li>leaderboard|mmlu:us_foreign_policy</li> <li>leaderboard|mmlu:virology</li> <li>leaderboard|mmlu:world_religions</li> <li>leaderboard|truthfulqa:mc</li> <li>leaderboard|winogrande</li></ul></li> <li><p>lighteval:</p> <ul><li>lighteval|agieval:aqua-rat</li> <li>lighteval|agieval:gaokao-biology</li> <li>lighteval|agieval:gaokao-chemistry</li> <li>lighteval|agieval:gaokao-chinese</li> <li>lighteval|agieval:gaokao-english</li> <li>lighteval|agieval:gaokao-geography</li> <li>lighteval|agieval:gaokao-history</li> <li>lighteval|agieval:gaokao-mathqa</li> <li>lighteval|agieval:gaokao-physics</li> <li>lighteval|agieval:logiqa-en</li> <li>lighteval|agieval:logiqa-zh</li> <li>lighteval|agieval:lsat-ar</li> <li>lighteval|agieval:lsat-lr</li> <li>lighteval|agieval:lsat-rc</li> <li>lighteval|agieval:sat-en</li> <li>lighteval|agieval:sat-en-without-passage</li> <li>lighteval|agieval:sat-math</li> <li>lighteval|anli</li> <li>lighteval|anli:r1</li> <li>lighteval|anli:r2</li> <li>lighteval|anli:r3</li> <li>lighteval|arc:easy</li> <li>lighteval|arithmetic:1dc</li> <li>lighteval|arithmetic:2da</li> <li>lighteval|arithmetic:2dm</li> <li>lighteval|arithmetic:2ds</li> <li>lighteval|arithmetic:3da</li> <li>lighteval|arithmetic:3ds</li> <li>lighteval|arithmetic:4da</li> <li>lighteval|arithmetic:4ds</li> <li>lighteval|arithmetic:5da</li> <li>lighteval|arithmetic:5ds</li> <li>lighteval|asdiv</li> <li>lighteval|bigbench:causal_judgment</li> <li>lighteval|bigbench:date_understanding</li> <li>lighteval|bigbench:disambiguation_qa</li> <li>lighteval|bigbench:geometric_shapes</li> <li>lighteval|bigbench:logical_deduction_five_objects</li> <li>lighteval|bigbench:logical_deduction_seven_objects</li> <li>lighteval|bigbench:logical_deduction_three_objects</li> <li>lighteval|bigbench:movie_recommendation</li> <li>lighteval|bigbench:navigate</li> <li>lighteval|bigbench:reasoning_about_colored_objects</li> <li>lighteval|bigbench:ruin_names</li> <li>lighteval|bigbench:salient_translation_error_detection</li> <li>lighteval|bigbench:snarks</li> <li>lighteval|bigbench:sports_understanding</li> <li>lighteval|bigbench:temporal_sequences</li> <li>lighteval|bigbench:tracking_shuffled_objects_five_objects</li> <li>lighteval|bigbench:tracking_shuffled_objects_seven_objects</li> <li>lighteval|bigbench:tracking_shuffled_objects_three_objects</li> <li>lighteval|blimp:adjunct_island</li> <li>lighteval|blimp:anaphor_gender_agreement</li> <li>lighteval|blimp:anaphor_number_agreement</li> <li>lighteval|blimp:animate_subject_passive</li> <li>lighteval|blimp:animate_subject_trans</li> <li>lighteval|blimp:causative</li> <li>lighteval|blimp:complex_NP_island</li> <li>lighteval|blimp:coordinate_structure_constraint_complex_left_branch</li> <li>lighteval|blimp:coordinate_structure_constraint_object_extraction</li> <li>lighteval|blimp:determiner_noun_agreement_1</li> <li>lighteval|blimp:determiner_noun_agreement_2</li> <li>lighteval|blimp:determiner_noun_agreement_irregular_1</li> <li>lighteval|blimp:determiner_noun_agreement_irregular_2</li> <li>lighteval|blimp:determiner_noun_agreement_with_adj_2</li> <li>lighteval|blimp:determiner_noun_agreement_with_adj_irregular_1</li> <li>lighteval|blimp:determiner_noun_agreement_with_adj_irregular_2</li> <li>lighteval|blimp:determiner_noun_agreement_with_adjective_1</li> <li>lighteval|blimp:distractor_agreement_relational_noun</li> <li>lighteval|blimp:distractor_agreement_relative_clause</li> <li>lighteval|blimp:drop_argument</li> <li>lighteval|blimp:ellipsis_n_bar_1</li> <li>lighteval|blimp:ellipsis_n_bar_2</li> <li>lighteval|blimp:existential_there_object_raising</li> <li>lighteval|blimp:existential_there_quantifiers_1</li> <li>lighteval|blimp:existential_there_quantifiers_2</li> <li>lighteval|blimp:existential_there_subject_raising</li> <li>lighteval|blimp:expletive_it_object_raising</li> <li>lighteval|blimp:inchoative</li> <li>lighteval|blimp:intransitive</li> <li>lighteval|blimp:irregular_past_participle_adjectives</li> <li>lighteval|blimp:irregular_past_participle_verbs</li> <li>lighteval|blimp:irregular_plural_subject_verb_agreement_1</li> <li>lighteval|blimp:irregular_plural_subject_verb_agreement_2</li> <li>lighteval|blimp:left_branch_island_echo_question</li> <li>lighteval|blimp:left_branch_island_simple_question</li> <li>lighteval|blimp:matrix_question_npi_licensor_present</li> <li>lighteval|blimp:npi_present_1</li> <li>lighteval|blimp:npi_present_2</li> <li>lighteval|blimp:only_npi_licensor_present</li> <li>lighteval|blimp:only_npi_scope</li> <li>lighteval|blimp:passive_1</li> <li>lighteval|blimp:passive_2</li> <li>lighteval|blimp:principle_A_c_command</li> <li>lighteval|blimp:principle_A_case_1</li> <li>lighteval|blimp:principle_A_case_2</li> <li>lighteval|blimp:principle_A_domain_1</li> <li>lighteval|blimp:principle_A_domain_2</li> <li>lighteval|blimp:principle_A_domain_3</li> <li>lighteval|blimp:principle_A_reconstruction</li> <li>lighteval|blimp:regular_plural_subject_verb_agreement_1</li> <li>lighteval|blimp:regular_plural_subject_verb_agreement_2</li> <li>lighteval|blimp:sentential_negation_npi_licensor_present</li> <li>lighteval|blimp:sentential_negation_npi_scope</li> <li>lighteval|blimp:sentential_subject_island</li> <li>lighteval|blimp:superlative_quantifiers_1</li> <li>lighteval|blimp:superlative_quantifiers_2</li> <li>lighteval|blimp:tough_vs_raising_1</li> <li>lighteval|blimp:tough_vs_raising_2</li> <li>lighteval|blimp:transitive</li> <li>lighteval|blimp:wh_island</li> <li>lighteval|blimp:wh_questions_object_gap</li> <li>lighteval|blimp:wh_questions_subject_gap</li> <li>lighteval|blimp:wh_questions_subject_gap_long_distance</li> <li>lighteval|blimp:wh_vs_that_no_gap</li> <li>lighteval|blimp:wh_vs_that_no_gap_long_distance</li> <li>lighteval|blimp:wh_vs_that_with_gap</li> <li>lighteval|blimp:wh_vs_that_with_gap_long_distance</li> <li>lighteval|coqa</li> <li>lighteval|coqa_bb</li> <li>lighteval|drop</li> <li>lighteval|ethics:commonsense</li> <li>lighteval|ethics:deontology</li> <li>lighteval|ethics:justice</li> <li>lighteval|ethics:utilitarianism</li> <li>lighteval|ethics:virtue</li> <li>lighteval|glue:cola</li> <li>lighteval|glue:mnli</li> <li>lighteval|glue:mnli_mismatched</li> <li>lighteval|glue:mrpc</li> <li>lighteval|glue:qnli</li> <li>lighteval|glue:qqp</li> <li>lighteval|glue:rte</li> <li>lighteval|glue:sst2</li> <li>lighteval|glue:stsb</li> <li>lighteval|glue:wnli</li> <li>lighteval|gpqa</li> <li>lighteval|gsm8k</li> <li>lighteval|headqa:en</li> <li>lighteval|headqa:es</li> <li>lighteval|iwslt17:ar-en</li> <li>lighteval|iwslt17:de-en</li> <li>lighteval|iwslt17:en-ar</li> <li>lighteval|iwslt17:en-de</li> <li>lighteval|iwslt17:en-fr</li> <li>lighteval|iwslt17:en-ja</li> <li>lighteval|iwslt17:en-ko</li> <li>lighteval|iwslt17:en-zh</li> <li>lighteval|iwslt17:fr-en</li> <li>lighteval|iwslt17:ja-en</li> <li>lighteval|iwslt17:ko-en</li> <li>lighteval|iwslt17:zh-en</li> <li>lighteval|lambada:openai</li> <li>lighteval|lambada:openai:de</li> <li>lighteval|lambada:openai:en</li> <li>lighteval|lambada:openai:es</li> <li>lighteval|lambada:openai:fr</li> <li>lighteval|lambada:openai:it</li> <li>lighteval|lambada:openai_cloze</li> <li>lighteval|lambada:standard</li> <li>lighteval|lambada:standard_cloze</li> <li>lighteval|logiqa</li> <li>lighteval|math:algebra</li> <li>lighteval|math:counting_and_probability</li> <li>lighteval|math:geometry</li> <li>lighteval|math:intermediate_algebra</li> <li>lighteval|math:number_theory</li> <li>lighteval|math:prealgebra</li> <li>lighteval|math:precalculus</li> <li>lighteval|math_cot:algebra</li> <li>lighteval|math_cot:counting_and_probability</li> <li>lighteval|math_cot:geometry</li> <li>lighteval|math_cot:intermediate_algebra</li> <li>lighteval|math_cot:number_theory</li> <li>lighteval|math_cot:prealgebra</li> <li>lighteval|math_cot:precalculus</li> <li>lighteval|mathqa</li> <li>lighteval|mgsm:bn</li> <li>lighteval|mgsm:de</li> <li>lighteval|mgsm:en</li> <li>lighteval|mgsm:es</li> <li>lighteval|mgsm:fr</li> <li>lighteval|mgsm:ja</li> <li>lighteval|mgsm:ru</li> <li>lighteval|mgsm:sw</li> <li>lighteval|mgsm:te</li> <li>lighteval|mgsm:th</li> <li>lighteval|mgsm:zh</li> <li>lighteval|mtnt2019:en-fr</li> <li>lighteval|mtnt2019:en-ja</li> <li>lighteval|mtnt2019:fr-en</li> <li>lighteval|mtnt2019:ja-en</li> <li>lighteval|mutual</li> <li>lighteval|mutual_plus</li> <li>lighteval|openbookqa</li> <li>lighteval|piqa</li> <li>lighteval|prost</li> <li>lighteval|pubmedqa</li> <li>lighteval|qa4mre:2011</li> <li>lighteval|qa4mre:2012</li> <li>lighteval|qa4mre:2013</li> <li>lighteval|qasper</li> <li>lighteval|qasper_ll</li> <li>lighteval|race:high</li> <li>lighteval|sciq</li> <li>lighteval|storycloze:2016</li> <li>lighteval|storycloze:2018</li> <li>lighteval|super_glue:boolq</li> <li>lighteval|super_glue:cb</li> <li>lighteval|super_glue:copa</li> <li>lighteval|super_glue:multirc</li> <li>lighteval|super_glue:rte</li> <li>lighteval|super_glue:wic</li> <li>lighteval|super_glue:wsc</li> <li>lighteval|swag</li> <li>lighteval|the_pile:arxiv</li> <li>lighteval|the_pile:bookcorpus2</li> <li>lighteval|the_pile:books3</li> <li>lighteval|the_pile:dm-mathematics</li> <li>lighteval|the_pile:enron</li> <li>lighteval|the_pile:europarl</li> <li>lighteval|the_pile:freelaw</li> <li>lighteval|the_pile:github</li> <li>lighteval|the_pile:gutenberg</li> <li>lighteval|the_pile:hackernews</li> <li>lighteval|the_pile:nih-exporter</li> <li>lighteval|the_pile:opensubtitles</li> <li>lighteval|the_pile:openwebtext2</li> <li>lighteval|the_pile:philpapers</li> <li>lighteval|the_pile:pile-cc</li> <li>lighteval|the_pile:pubmed-abstracts</li> <li>lighteval|the_pile:pubmed-central</li> <li>lighteval|the_pile:stackexchange</li> <li>lighteval|the_pile:ubuntu-irc</li> <li>lighteval|the_pile:uspto</li> <li>lighteval|the_pile:wikipedia</li> <li>lighteval|the_pile:youtubesubtitles</li> <li>lighteval|toxigen</li> <li>lighteval|triviaqa</li> <li>lighteval|truthfulqa:gen</li> <li>lighteval|unscramble:anagrams1</li> <li>lighteval|unscramble:anagrams2</li> <li>lighteval|unscramble:cycle_letters</li> <li>lighteval|unscramble:random_insertion</li> <li>lighteval|unscramble:reversed_words</li> <li>lighteval|webqs</li> <li>lighteval|wikitext:2</li> <li>lighteval|wmt08:cs-en</li> <li>lighteval|wmt08:de-en</li> <li>lighteval|wmt08:en-cs</li> <li>lighteval|wmt08:en-de</li> <li>lighteval|wmt08:en-es</li> <li>lighteval|wmt08:en-fr</li> <li>lighteval|wmt08:en-hu</li> <li>lighteval|wmt08:es-en</li> <li>lighteval|wmt08:fr-en</li> <li>lighteval|wmt08:hu-en</li> <li>lighteval|wmt09:cs-en</li> <li>lighteval|wmt09:de-en</li> <li>lighteval|wmt09:en-cs</li> <li>lighteval|wmt09:en-de</li> <li>lighteval|wmt09:en-es</li> <li>lighteval|wmt09:en-fr</li> <li>lighteval|wmt09:en-hu</li> <li>lighteval|wmt09:en-it</li> <li>lighteval|wmt09:es-en</li> <li>lighteval|wmt09:fr-en</li> <li>lighteval|wmt09:hu-en</li> <li>lighteval|wmt09:it-en</li> <li>lighteval|wmt10:cs-en</li> <li>lighteval|wmt10:de-en</li> <li>lighteval|wmt10:en-cs</li> <li>lighteval|wmt10:en-de</li> <li>lighteval|wmt10:en-es</li> <li>lighteval|wmt10:en-fr</li> <li>lighteval|wmt10:es-en</li> <li>lighteval|wmt10:fr-en</li> <li>lighteval|wmt11:cs-en</li> <li>lighteval|wmt11:de-en</li> <li>lighteval|wmt11:en-cs</li> <li>lighteval|wmt11:en-de</li> <li>lighteval|wmt11:en-es</li> <li>lighteval|wmt11:en-fr</li> <li>lighteval|wmt11:es-en</li> <li>lighteval|wmt11:fr-en</li> <li>lighteval|wmt12:cs-en</li> <li>lighteval|wmt12:de-en</li> <li>lighteval|wmt12:en-cs</li> <li>lighteval|wmt12:en-de</li> <li>lighteval|wmt12:en-es</li> <li>lighteval|wmt12:en-fr</li> <li>lighteval|wmt12:es-en</li> <li>lighteval|wmt12:fr-en</li> <li>lighteval|wmt13:cs-en</li> <li>lighteval|wmt13:de-en</li> <li>lighteval|wmt13:en-cs</li> <li>lighteval|wmt13:en-de</li> <li>lighteval|wmt13:en-es</li> <li>lighteval|wmt13:en-fr</li> <li>lighteval|wmt13:en-ru</li> <li>lighteval|wmt13:es-en</li> <li>lighteval|wmt13:fr-en</li> <li>lighteval|wmt13:ru-en</li> <li>lighteval|wmt14:cs-en</li> <li>lighteval|wmt14:de-en</li> <li>lighteval|wmt14:en-cs</li> <li>lighteval|wmt14:en-de</li> <li>lighteval|wmt14:en-fr</li> <li>lighteval|wmt14:en-hi</li> <li>lighteval|wmt14:en-ru</li> <li>lighteval|wmt14:fr-en</li> <li>lighteval|wmt14:hi-en</li> <li>lighteval|wmt14:ru-en</li> <li>lighteval|wmt15:cs-en</li> <li>lighteval|wmt15:de-en</li> <li>lighteval|wmt15:en-cs</li> <li>lighteval|wmt15:en-de</li> <li>lighteval|wmt15:en-fi</li> <li>lighteval|wmt15:en-fr</li> <li>lighteval|wmt15:en-ru</li> <li>lighteval|wmt15:fi-en</li> <li>lighteval|wmt15:fr-en</li> <li>lighteval|wmt15:ru-en</li> <li>lighteval|wmt16:cs-en</li> <li>lighteval|wmt16:de-en</li> <li>lighteval|wmt16:en-cs</li> <li>lighteval|wmt16:en-de</li> <li>lighteval|wmt16:en-fi</li> <li>lighteval|wmt16:en-ro</li> <li>lighteval|wmt16:en-ru</li> <li>lighteval|wmt16:en-tr</li> <li>lighteval|wmt16:fi-en</li> <li>lighteval|wmt16:ro-en</li> <li>lighteval|wmt16:ru-en</li> <li>lighteval|wmt16:tr-en</li> <li>lighteval|wmt17:cs-en</li> <li>lighteval|wmt17:de-en</li> <li>lighteval|wmt17:en-cs</li> <li>lighteval|wmt17:en-de</li> <li>lighteval|wmt17:en-fi</li> <li>lighteval|wmt17:en-lv</li> <li>lighteval|wmt17:en-ru</li> <li>lighteval|wmt17:en-tr</li> <li>lighteval|wmt17:en-zh</li> <li>lighteval|wmt17:fi-en</li> <li>lighteval|wmt17:lv-en</li> <li>lighteval|wmt17:ru-en</li> <li>lighteval|wmt17:tr-en</li> <li>lighteval|wmt17:zh-en</li> <li>lighteval|wmt18:cs-en</li> <li>lighteval|wmt18:de-en</li> <li>lighteval|wmt18:en-cs</li> <li>lighteval|wmt18:en-de</li> <li>lighteval|wmt18:en-et</li> <li>lighteval|wmt18:en-fi</li> <li>lighteval|wmt18:en-ru</li> <li>lighteval|wmt18:en-tr</li> <li>lighteval|wmt18:en-zh</li> <li>lighteval|wmt18:et-en</li> <li>lighteval|wmt18:fi-en</li> <li>lighteval|wmt18:ru-en</li> <li>lighteval|wmt18:tr-en</li> <li>lighteval|wmt18:zh-en</li> <li>lighteval|wmt19:cs-de</li> <li>lighteval|wmt19:de-cs</li> <li>lighteval|wmt19:de-en</li> <li>lighteval|wmt19:de-fr</li> <li>lighteval|wmt19:en-cs</li> <li>lighteval|wmt19:en-de</li> <li>lighteval|wmt19:en-fi</li> <li>lighteval|wmt19:en-gu</li> <li>lighteval|wmt19:en-kk</li> <li>lighteval|wmt19:en-lt</li> <li>lighteval|wmt19:en-ru</li> <li>lighteval|wmt19:en-zh</li> <li>lighteval|wmt19:fi-en</li> <li>lighteval|wmt19:fr-de</li> <li>lighteval|wmt19:gu-en</li> <li>lighteval|wmt19:kk-en</li> <li>lighteval|wmt19:lt-en</li> <li>lighteval|wmt19:ru-en</li> <li>lighteval|wmt19:zh-en</li> <li>lighteval|wmt20:cs-en</li> <li>lighteval|wmt20:de-en</li> <li>lighteval|wmt20:de-fr</li> <li>lighteval|wmt20:en-cs</li> <li>lighteval|wmt20:en-de</li> <li>lighteval|wmt20:en-iu</li> <li>lighteval|wmt20:en-ja</li> <li>lighteval|wmt20:en-km</li> <li>lighteval|wmt20:en-pl</li> <li>lighteval|wmt20:en-ps</li> <li>lighteval|wmt20:en-ru</li> <li>lighteval|wmt20:en-ta</li> <li>lighteval|wmt20:en-zh</li> <li>lighteval|wmt20:fr-de</li> <li>lighteval|wmt20:iu-en</li> <li>lighteval|wmt20:ja-en</li> <li>lighteval|wmt20:km-en</li> <li>lighteval|wmt20:pl-en</li> <li>lighteval|wmt20:ps-en</li> <li>lighteval|wmt20:ru-en</li> <li>lighteval|wmt20:ta-en</li> <li>lighteval|wmt20:zh-en</li> <li>lighteval|wsc273</li> <li>lighteval|xcopa:en</li> <li>lighteval|xcopa:et</li> <li>lighteval|xcopa:ht</li> <li>lighteval|xcopa:id</li> <li>lighteval|xcopa:it</li> <li>lighteval|xcopa:qu</li> <li>lighteval|xcopa:sw</li> <li>lighteval|xcopa:ta</li> <li>lighteval|xcopa:th</li> <li>lighteval|xcopa:tr</li> <li>lighteval|xcopa:vi</li> <li>lighteval|xcopa:zh</li> <li>lighteval|xstory_cloze:ar</li> <li>lighteval|xstory_cloze:en</li> <li>lighteval|xstory_cloze:es</li> <li>lighteval|xstory_cloze:eu</li> <li>lighteval|xstory_cloze:hi</li> <li>lighteval|xstory_cloze:id</li> <li>lighteval|xstory_cloze:my</li> <li>lighteval|xstory_cloze:ru</li> <li>lighteval|xstory_cloze:sw</li> <li>lighteval|xstory_cloze:te</li> <li>lighteval|xstory_cloze:zh</li> <li>lighteval|xwinograd:en</li> <li>lighteval|xwinograd:fr</li> <li>lighteval|xwinograd:jp</li> <li>lighteval|xwinograd:pt</li> <li>lighteval|xwinograd:ru</li> <li>lighteval|xwinograd:zh</li></ul></li> <li><p>original:</p> <ul><li>original|arc:c:letters</li> <li>original|arc:c:options</li> <li>original|arc:c:simple</li> <li>original|mmlu</li> <li>original|mmlu:abstract_algebra</li> <li>original|mmlu:anatomy</li> <li>original|mmlu:astronomy</li> <li>original|mmlu:business_ethics</li> <li>original|mmlu:clinical_knowledge</li> <li>original|mmlu:college_biology</li> <li>original|mmlu:college_chemistry</li> <li>original|mmlu:college_computer_science</li> <li>original|mmlu:college_mathematics</li> <li>original|mmlu:college_medicine</li> <li>original|mmlu:college_physics</li> <li>original|mmlu:computer_security</li> <li>original|mmlu:conceptual_physics</li> <li>original|mmlu:econometrics</li> <li>original|mmlu:electrical_engineering</li> <li>original|mmlu:elementary_mathematics</li> <li>original|mmlu:formal_logic</li> <li>original|mmlu:global_facts</li> <li>original|mmlu:high_school_biology</li> <li>original|mmlu:high_school_chemistry</li> <li>original|mmlu:high_school_computer_science</li> <li>original|mmlu:high_school_european_history</li> <li>original|mmlu:high_school_geography</li> <li>original|mmlu:high_school_government_and_politics</li> <li>original|mmlu:high_school_macroeconomics</li> <li>original|mmlu:high_school_mathematics</li> <li>original|mmlu:high_school_microeconomics</li> <li>original|mmlu:high_school_physics</li> <li>original|mmlu:high_school_psychology</li> <li>original|mmlu:high_school_statistics</li> <li>original|mmlu:high_school_us_history</li> <li>original|mmlu:high_school_world_history</li> <li>original|mmlu:human_aging</li> <li>original|mmlu:human_sexuality</li> <li>original|mmlu:international_law</li> <li>original|mmlu:jurisprudence</li> <li>original|mmlu:logical_fallacies</li> <li>original|mmlu:machine_learning</li> <li>original|mmlu:management</li> <li>original|mmlu:marketing</li> <li>original|mmlu:medical_genetics</li> <li>original|mmlu:miscellaneous</li> <li>original|mmlu:moral_disputes</li> <li>original|mmlu:moral_scenarios</li> <li>original|mmlu:nutrition</li> <li>original|mmlu:philosophy</li> <li>original|mmlu:prehistory</li> <li>original|mmlu:professional_accounting</li> <li>original|mmlu:professional_law</li> <li>original|mmlu:professional_medicine</li> <li>original|mmlu:professional_psychology</li> <li>original|mmlu:public_relations</li> <li>original|mmlu:security_studies</li> <li>original|mmlu:sociology</li> <li>original|mmlu:us_foreign_policy</li> <li>original|mmlu:virology</li> <li>original|mmlu:world_religions</li></ul></li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/lighteval/blob/main/docs/source/available-tasks.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1kl62qe = {
assets: "/docs/lighteval/pr_744/en",
base: "/docs/lighteval/pr_744/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/lighteval/pr_744/en/_app/immutable/entry/start.a615223c.js"),
import("/docs/lighteval/pr_744/en/_app/immutable/entry/app.b0033d27.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 4],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
56.6 kB
·
Xet hash:
6d8bf601b8e7426d760d28c2d794cdf27be964debecb6b3b63125cb1a5a83db0

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.