| Group | Config Location | |---------------------------------|------------------------------------------------------------------------| |aclue |lm_eval/tasks/aclue/_aclue.yaml | |aexams |lm_eval/tasks/aexams/_aexams.yaml | |agieval |lm_eval/tasks/agieval/agieval.yaml | |agieval_cn |lm_eval/tasks/agieval/agieval_cn.yaml | |agieval_en |lm_eval/tasks/agieval/agieval_en.yaml | |agieval_nous |lm_eval/tasks/agieval/agieval_nous.yaml | |arabicmmlu |lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml | |arabicmmlu_humanities |lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml | |arabicmmlu_language |lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml | |arabicmmlu_other |lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml | |arabicmmlu_social_science |lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml | |arabicmmlu_stem |lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml | |bbh |lm_eval/tasks/bbh/cot_fewshot/_bbh.yaml | |bbh_cot_fewshot |lm_eval/tasks/bbh/cot_fewshot/_bbh_cot_fewshot.yaml | |bbh_cot_zeroshot |lm_eval/tasks/bbh/cot_zeroshot/_bbh_cot_zeroshot.yaml | |bbh_fewshot |lm_eval/tasks/bbh/fewshot/_bbh_fewshot.yaml | |bbh_zeroshot |lm_eval/tasks/bbh/zeroshot/_bbh_zeroshot.yaml | |belebele |lm_eval/tasks/belebele/_belebele.yaml | |blimp |lm_eval/tasks/blimp/_blimp.yaml | |ceval-valid |lm_eval/tasks/ceval/_ceval-valid.yaml | |cmmlu |lm_eval/tasks/cmmlu/_cmmlu.yaml | |csatqa |lm_eval/tasks/csatqa/_csatqa.yaml | |flan_held_in |lm_eval/tasks/benchmarks/flan/flan_held_in.yaml | |flan_held_out |lm_eval/tasks/benchmarks/flan/flan_held_out.yaml | |haerae |lm_eval/tasks/haerae/_haerae.yaml | |hendrycks_math |lm_eval/tasks/hendrycks_math/hendrycks_math.yaml | |kormedmcqa |lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml | |leaderboard |lm_eval/tasks/leaderboard/leaderboard.yaml | |leaderboard_bbh |lm_eval/tasks/leaderboard/bbh_mc/_leaderboard_bbh.yaml | |leaderboard_gpqa |lm_eval/tasks/leaderboard/gpqa/_leaderboard_gpqa.yaml | |leaderboard_instruction_following|lm_eval/tasks/leaderboard/ifeval/_leaderboard_instruction_following.yaml| |leaderboard_math_hard |lm_eval/tasks/leaderboard/math/_leaderboard_math.yaml | |leaderboard_musr |lm_eval/tasks/leaderboard/musr/_musr.yaml | |med_concepts_qa |lm_eval/tasks/med_concepts_qa/_med_concepts_qa.yaml | |med_concepts_qa_atc |lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml | |med_concepts_qa_icd10cm |lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10cm.yaml | |med_concepts_qa_icd10proc |lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml | |med_concepts_qa_icd9cm |lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml | |med_concepts_qa_icd9proc |lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml | |minerva_math |lm_eval/tasks/benchmarks/minerva_math.yaml | |mmlu |lm_eval/tasks/mmlu/default/_mmlu.yaml | |mmlu_continuation |lm_eval/tasks/mmlu/continuation/_mmlu.yaml | |mmlu_flan_cot_fewshot |lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu.yaml | |mmlu_flan_cot_zeroshot |lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml | |mmlu_flan_n_shot_generative |lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml | |mmlu_flan_n_shot_loglikelihood |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml | |mmlu_generative |lm_eval/tasks/mmlu/generative/_mmlu.yaml | |mmlu_humanities |lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml | |mmlu_other |lm_eval/tasks/mmlu/default/_mmlu_other.yaml | |mmlu_social_sciences |lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml | |mmlu_stem |lm_eval/tasks/mmlu/default/_mmlu_stem.yaml | |mmlusr |lm_eval/tasks/mmlusr/question_and_answer/_question_and_answer.yaml | |mmlusr_answer_only |lm_eval/tasks/mmlusr/answer_only/_answer_only.yaml | |mmlusr_question_only |lm_eval/tasks/mmlusr/question_only/_question_only.yaml | |multimedqa |lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml | |openllm |lm_eval/tasks/benchmarks/openllm.yaml | |pawsx |lm_eval/tasks/paws-x/_pawsx.yaml | |pythia |lm_eval/tasks/benchmarks/pythia.yaml | |t0_eval |lm_eval/tasks/benchmarks/t0_eval.yaml | |tinyBenchmarks |lm_eval/tasks/tinyBenchmarks/tinyBenchmarks.yaml | |tmmluplus |lm_eval/tasks/tmmluplus/default/tmmluplus.yaml | |wmdp |lm_eval/tasks/wmdp/_wmdp.yaml | |xcopa |lm_eval/tasks/xcopa/_xcopa.yaml | |xnli |lm_eval/tasks/xnli/_xnli.yaml | |xstorycloze |lm_eval/tasks/xstorycloze/_xstorycloze.yaml | |xwinograd |lm_eval/tasks/xwinograd/_xwinograd.yaml | | Tag | |------------------------------------------------| |advanced_ai_risk | |afrimgsm | |afrimgsm_direct | |afrimgsm_en_cot | |afrimgsm_translate | |afrimmlu | |afrimmlu_direct | |afrimmlu_translate | |afrixnli | |afrixnli_en_direct | |afrixnli_manual_direct | |afrixnli_native_direct | |afrixnli_translate | |ai2_arc | |anli | |arabicmmlu_humanities_tasks | |arabicmmlu_language_tasks | |arabicmmlu_other_tasks | |arabicmmlu_social_science_tasks | |arabicmmlu_stem_tasks | |arc_challenge_mt | |arc_multilingual | |arithmetic | |basque-glue | |bertaqa | |bigbench_generate_until | |bigbench_multiple_choice | |chain_of_thought | |codexglue_code2text | |copal_id | |crows_pairs | |eus_exams_es | |eus_exams_eu | |fld_logical_formula | |freebase | |french_bench | |french_bench_extra | |french_bench_gen | |french_bench_mc | |french_bench_perplexity | |glue | |gpqa | |gpt3_translation_benchmarks | |headqa | |hellaswag_multilingual | |hendrycks_ethics | |inverse_scaling_mc | |iwslt2017 | |kmmlu | |kmmlu_direct | |kmmlu_hard | |kmmlu_hard_cot | |kmmlu_hard_direct | |kobest | |lambada | |lambada_cloze | |lambada_multilingual | |lambada_multilingual_stablelm | |m_mmlu | |math_word_problems | |med_concepts_qa_atc_tasks | |med_concepts_qa_icd10cm_tasks | |med_concepts_qa_icd10proc_tasks | |med_concepts_qa_icd9cm_tasks | |med_concepts_qa_icd9proc_tasks | |mgsm_cot_native | |mgsm_direct | |mmlu_continuation_humanities | |mmlu_continuation_other | |mmlu_continuation_social_sciences | |mmlu_continuation_stem | |mmlu_flan_cot_fewshot_humanities | |mmlu_flan_cot_fewshot_other | |mmlu_flan_cot_fewshot_social_sciences | |mmlu_flan_cot_fewshot_stem | |mmlu_flan_cot_zeroshot_humanities | |mmlu_flan_cot_zeroshot_other | |mmlu_flan_cot_zeroshot_social_sciences | |mmlu_flan_cot_zeroshot_stem | |mmlu_flan_n_shot_generative_humanities | |mmlu_flan_n_shot_generative_other | |mmlu_flan_n_shot_generative_social_sciences | |mmlu_flan_n_shot_generative_stem | |mmlu_flan_n_shot_loglikelihood_humanities | |mmlu_flan_n_shot_loglikelihood_other | |mmlu_flan_n_shot_loglikelihood_social_sciences | |mmlu_flan_n_shot_loglikelihood_stem | |mmlu_humanities_generative | |mmlu_humanities_tasks | |mmlu_other_generative | |mmlu_other_tasks | |mmlu_social_sciences_generative | |mmlu_social_sciences_tasks | |mmlu_stem_generative | |mmlu_stem_tasks | |mmlusr_answer_only_humanities_tasks | |mmlusr_answer_only_other_tasks | |mmlusr_answer_only_social_sciences_tasks | |mmlusr_answer_only_stem_tasks | |mmlusr_question_and_answer_humanities_tasks | |mmlusr_question_and_answer_other_tasks | |mmlusr_question_and_answer_social_sciences_tasks| |mmlusr_question_and_answer_stem_tasks | |mmlusr_question_only_humanities_tasks | |mmlusr_question_only_other_tasks | |mmlusr_question_only_social_sciences_tasks | |mmlusr_question_only_stem_tasks | |multiple_choice | |paloma | |persona | |polemo2 | |qa4mre | |qasper | |self_consistency | |storycloze | |super-glue-lm-eval-v1 | |super-glue-lm-eval-v1-seq2seq | |super-glue-t5-prompt | |sycophancy | |tmmluplus_STEM | |tmmluplus_humanities | |tmmluplus_other | |tmmluplus_social_sciences | |translation | |truthfulqa | |truthfulqa_multilingual | |unscramble | |wmt14 | |wmt16 | |xnli_eu_mt_native | | Task | Config Location | Output Type | |--------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|---------------------| |aclue_ancient_chinese_culture |lm_eval/tasks/aclue/aclue_ancient_chinese_culture.yaml |multiple_choice | |aclue_ancient_literature |lm_eval/tasks/aclue/aclue_ancient_literature.yaml |multiple_choice | |aclue_ancient_medical |lm_eval/tasks/aclue/aclue_ancient_medical.yaml |multiple_choice | |aclue_ancient_phonetics |lm_eval/tasks/aclue/aclue_ancient_phonetics.yaml |multiple_choice | |aclue_basic_ancient_chinese |lm_eval/tasks/aclue/aclue_basic_ancient_chinese.yaml |multiple_choice | |aclue_couplet_prediction |lm_eval/tasks/aclue/aclue_couplet_prediction.yaml |multiple_choice | |aclue_homographic_character_resolution |lm_eval/tasks/aclue/aclue_homographic_character_resolution.yaml |multiple_choice | |aclue_named_entity_recognition |lm_eval/tasks/aclue/aclue_named_entity_recognition.yaml |multiple_choice | |aclue_poetry_appreciate |lm_eval/tasks/aclue/aclue_poetry_appreciate.yaml |multiple_choice | |aclue_poetry_context_prediction |lm_eval/tasks/aclue/aclue_poetry_context_prediction.yaml |multiple_choice | |aclue_poetry_quality_assessment |lm_eval/tasks/aclue/aclue_poetry_quality_assessment.yaml |multiple_choice | |aclue_poetry_sentiment_analysis |lm_eval/tasks/aclue/aclue_poetry_sentiment_analysis.yaml |multiple_choice | |aclue_polysemy_resolution |lm_eval/tasks/aclue/aclue_polysemy_resolution.yaml |multiple_choice | |aclue_reading_comprehension |lm_eval/tasks/aclue/aclue_reading_comprehension.yaml |multiple_choice | |aclue_sentence_segmentation |lm_eval/tasks/aclue/aclue_sentence_segmentation.yaml |multiple_choice | |advanced_ai_risk_fewshot-coordinate-itself |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml |multiple_choice | |advanced_ai_risk_fewshot-coordinate-other-ais |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml |multiple_choice | |advanced_ai_risk_fewshot-coordinate-other-versions |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml |multiple_choice | |advanced_ai_risk_fewshot-corrigible-less-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml |multiple_choice | |advanced_ai_risk_fewshot-corrigible-more-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml |multiple_choice | |advanced_ai_risk_fewshot-corrigible-neutral-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml |multiple_choice | |advanced_ai_risk_fewshot-myopic-reward |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml |multiple_choice | |advanced_ai_risk_fewshot-one-box-tendency |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml |multiple_choice | |advanced_ai_risk_fewshot-power-seeking-inclination |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml |multiple_choice | |advanced_ai_risk_fewshot-self-awareness-general-ai |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml |multiple_choice | |advanced_ai_risk_fewshot-self-awareness-good-text-model |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml |multiple_choice | |advanced_ai_risk_fewshot-self-awareness-text-model |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml |multiple_choice | |advanced_ai_risk_fewshot-self-awareness-training-architecture |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml |multiple_choice | |advanced_ai_risk_fewshot-self-awareness-training-web-gpt |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml |multiple_choice | |advanced_ai_risk_fewshot-survival-instinct |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml |multiple_choice | |advanced_ai_risk_fewshot-wealth-seeking-inclination |lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml |multiple_choice | |advanced_ai_risk_human-coordinate-itself |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml |multiple_choice | |advanced_ai_risk_human-coordinate-other-ais |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml |multiple_choice | |advanced_ai_risk_human-coordinate-other-versions |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml |multiple_choice | |advanced_ai_risk_human-corrigible-less-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml |multiple_choice | |advanced_ai_risk_human-corrigible-more-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml |multiple_choice | |advanced_ai_risk_human-corrigible-neutral-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml |multiple_choice | |advanced_ai_risk_human-myopic-reward |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml |multiple_choice | |advanced_ai_risk_human-one-box-tendency |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml |multiple_choice | |advanced_ai_risk_human-power-seeking-inclination |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml |multiple_choice | |advanced_ai_risk_human-self-awareness-general-ai |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml |multiple_choice | |advanced_ai_risk_human-self-awareness-good-text-model |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml |multiple_choice | |advanced_ai_risk_human-self-awareness-text-model |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml |multiple_choice | |advanced_ai_risk_human-self-awareness-training-architecture |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml |multiple_choice | |advanced_ai_risk_human-self-awareness-web-gpt |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml |multiple_choice | |advanced_ai_risk_human-survival-instinct |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml |multiple_choice | |advanced_ai_risk_human-wealth-seeking-inclination |lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml |multiple_choice | |advanced_ai_risk_lm-coordinate-itself |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml |multiple_choice | |advanced_ai_risk_lm-coordinate-other-ais |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml |multiple_choice | |advanced_ai_risk_lm-coordinate-other-versions |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml |multiple_choice | |advanced_ai_risk_lm-corrigible-less-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml |multiple_choice | |advanced_ai_risk_lm-corrigible-more-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml |multiple_choice | |advanced_ai_risk_lm-corrigible-neutral-HHH |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml |multiple_choice | |advanced_ai_risk_lm-myopic-reward |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml |multiple_choice | |advanced_ai_risk_lm-one-box-tendency |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml |multiple_choice | |advanced_ai_risk_lm-power-seeking-inclination |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml |multiple_choice | |advanced_ai_risk_lm-self-awareness-general-ai |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml |multiple_choice | |advanced_ai_risk_lm-self-awareness-good-text-model |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml |multiple_choice | |advanced_ai_risk_lm-self-awareness-text-model |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml |multiple_choice | |advanced_ai_risk_lm-self-awareness-training-architecture |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml |multiple_choice | |advanced_ai_risk_lm-self-awareness-training-nn-architecture |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml |multiple_choice | |advanced_ai_risk_lm-self-awareness-training-web-gpt |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml |multiple_choice | |advanced_ai_risk_lm-survival-instinct |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml |multiple_choice | |advanced_ai_risk_lm-wealth-seeking-inclination |lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-wealth-seeking-inclination.yaml |multiple_choice | |aexams_Biology |lm_eval/tasks/aexams/aexams_Biology.yaml |multiple_choice | |aexams_IslamicStudies |lm_eval/tasks/aexams/aexams_IslamicStudies.yaml |multiple_choice | |aexams_Physics |lm_eval/tasks/aexams/aexams_Physics.yaml |multiple_choice | |aexams_Science |lm_eval/tasks/aexams/aexams_Science.yaml |multiple_choice | |aexams_Social |lm_eval/tasks/aexams/aexams_Social.yaml |multiple_choice | |afrimgsm_direct_amh |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml |generate_until | |afrimgsm_direct_eng |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml |generate_until | |afrimgsm_direct_ewe |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml |generate_until | |afrimgsm_direct_fra |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml |generate_until | |afrimgsm_direct_hau |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml |generate_until | |afrimgsm_direct_ibo |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml |generate_until | |afrimgsm_direct_kin |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml |generate_until | |afrimgsm_direct_lin |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml |generate_until | |afrimgsm_direct_lug |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml |generate_until | |afrimgsm_direct_orm |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml |generate_until | |afrimgsm_direct_sna |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml |generate_until | |afrimgsm_direct_sot |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml |generate_until | |afrimgsm_direct_swa |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml |generate_until | |afrimgsm_direct_twi |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml |generate_until | |afrimgsm_direct_wol |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml |generate_until | |afrimgsm_direct_xho |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml |generate_until | |afrimgsm_direct_yor |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml |generate_until | |afrimgsm_direct_zul |lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml |generate_until | |afrimgsm_en_cot_amh |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml |generate_until | |afrimgsm_en_cot_eng |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_eng.yaml |generate_until | |afrimgsm_en_cot_ewe |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml |generate_until | |afrimgsm_en_cot_fra |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml |generate_until | |afrimgsm_en_cot_hau |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_hau.yaml |generate_until | |afrimgsm_en_cot_ibo |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ibo.yaml |generate_until | |afrimgsm_en_cot_kin |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml |generate_until | |afrimgsm_en_cot_lin |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml |generate_until | |afrimgsm_en_cot_lug |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lug.yaml |generate_until | |afrimgsm_en_cot_orm |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml |generate_until | |afrimgsm_en_cot_sna |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml |generate_until | |afrimgsm_en_cot_sot |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sot.yaml |generate_until | |afrimgsm_en_cot_swa |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_swa.yaml |generate_until | |afrimgsm_en_cot_twi |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_twi.yaml |generate_until | |afrimgsm_en_cot_wol |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml |generate_until | |afrimgsm_en_cot_xho |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml |generate_until | |afrimgsm_en_cot_yor |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml |generate_until | |afrimgsm_en_cot_zul |lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml |generate_until | |afrimgsm_translate_direct_amh |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml |generate_until | |afrimgsm_translate_direct_eng |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml |generate_until | |afrimgsm_translate_direct_ewe |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml |generate_until | |afrimgsm_translate_direct_fra |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml |generate_until | |afrimgsm_translate_direct_hau |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml |generate_until | |afrimgsm_translate_direct_ibo |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml |generate_until | |afrimgsm_translate_direct_kin |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml |generate_until | |afrimgsm_translate_direct_lin |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml |generate_until | |afrimgsm_translate_direct_lug |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml |generate_until | |afrimgsm_translate_direct_orm |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml |generate_until | |afrimgsm_translate_direct_sna |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml |generate_until | |afrimgsm_translate_direct_sot |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml |generate_until | |afrimgsm_translate_direct_swa |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml |generate_until | |afrimgsm_translate_direct_twi |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml |generate_until | |afrimgsm_translate_direct_wol |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml |generate_until | |afrimgsm_translate_direct_xho |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml |generate_until | |afrimgsm_translate_direct_yor |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml |generate_until | |afrimgsm_translate_direct_zul |lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_zul.yaml |generate_until | |afrimmlu_direct_amh |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml |multiple_choice | |afrimmlu_direct_eng |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml |multiple_choice | |afrimmlu_direct_ewe |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml |multiple_choice | |afrimmlu_direct_fra |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml |multiple_choice | |afrimmlu_direct_hau |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml |multiple_choice | |afrimmlu_direct_ibo |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml |multiple_choice | |afrimmlu_direct_kin |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml |multiple_choice | |afrimmlu_direct_lin |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml |multiple_choice | |afrimmlu_direct_lug |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml |multiple_choice | |afrimmlu_direct_orm |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml |multiple_choice | |afrimmlu_direct_sna |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml |multiple_choice | |afrimmlu_direct_sot |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml |multiple_choice | |afrimmlu_direct_swa |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml |multiple_choice | |afrimmlu_direct_twi |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml |multiple_choice | |afrimmlu_direct_wol |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml |multiple_choice | |afrimmlu_direct_xho |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml |multiple_choice | |afrimmlu_direct_yor |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml |multiple_choice | |afrimmlu_direct_zul |lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml |multiple_choice | |afrimmlu_translate_amh |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_amh.yaml |multiple_choice | |afrimmlu_translate_eng |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml |multiple_choice | |afrimmlu_translate_ewe |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml |multiple_choice | |afrimmlu_translate_fra |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml |multiple_choice | |afrimmlu_translate_hau |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml |multiple_choice | |afrimmlu_translate_ibo |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml |multiple_choice | |afrimmlu_translate_kin |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml |multiple_choice | |afrimmlu_translate_lin |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml |multiple_choice | |afrimmlu_translate_lug |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml |multiple_choice | |afrimmlu_translate_orm |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml |multiple_choice | |afrimmlu_translate_sna |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml |multiple_choice | |afrimmlu_translate_sot |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml |multiple_choice | |afrimmlu_translate_swa |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml |multiple_choice | |afrimmlu_translate_twi |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml |multiple_choice | |afrimmlu_translate_wol |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml |multiple_choice | |afrimmlu_translate_xho |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml |multiple_choice | |afrimmlu_translate_yor |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml |multiple_choice | |afrimmlu_translate_zul |lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml |multiple_choice | |afrixnli_en_direct_amh |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_amh.yaml |multiple_choice | |afrixnli_en_direct_eng |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_eng.yaml |multiple_choice | |afrixnli_en_direct_ewe |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ewe.yaml |multiple_choice | |afrixnli_en_direct_fra |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_fra.yaml |multiple_choice | |afrixnli_en_direct_hau |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_hau.yaml |multiple_choice | |afrixnli_en_direct_ibo |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ibo.yaml |multiple_choice | |afrixnli_en_direct_kin |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_kin.yaml |multiple_choice | |afrixnli_en_direct_lin |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lin.yaml |multiple_choice | |afrixnli_en_direct_lug |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lug.yaml |multiple_choice | |afrixnli_en_direct_orm |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_orm.yaml |multiple_choice | |afrixnli_en_direct_sna |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sna.yaml |multiple_choice | |afrixnli_en_direct_sot |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sot.yaml |multiple_choice | |afrixnli_en_direct_swa |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_swa.yaml |multiple_choice | |afrixnli_en_direct_twi |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_twi.yaml |multiple_choice | |afrixnli_en_direct_wol |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_wol.yaml |multiple_choice | |afrixnli_en_direct_xho |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_xho.yaml |multiple_choice | |afrixnli_en_direct_yor |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yor.yaml |multiple_choice | |afrixnli_en_direct_zul |lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_zul.yaml |multiple_choice | |afrixnli_manual_direct_amh |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_amh.yaml |multiple_choice | |afrixnli_manual_direct_eng |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_eng.yaml |multiple_choice | |afrixnli_manual_direct_ewe |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ewe.yaml |multiple_choice | |afrixnli_manual_direct_fra |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_fra.yaml |multiple_choice | |afrixnli_manual_direct_hau |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_hau.yaml |multiple_choice | |afrixnli_manual_direct_ibo |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ibo.yaml |multiple_choice | |afrixnli_manual_direct_kin |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_kin.yaml |multiple_choice | |afrixnli_manual_direct_lin |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lin.yaml |multiple_choice | |afrixnli_manual_direct_lug |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lug.yaml |multiple_choice | |afrixnli_manual_direct_orm |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_orm.yaml |multiple_choice | |afrixnli_manual_direct_sna |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sna.yaml |multiple_choice | |afrixnli_manual_direct_sot |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sot.yaml |multiple_choice | |afrixnli_manual_direct_swa |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_swa.yaml |multiple_choice | |afrixnli_manual_direct_twi |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_twi.yaml |multiple_choice | |afrixnli_manual_direct_wol |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_wol.yaml |multiple_choice | |afrixnli_manual_direct_xho |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_xho.yaml |multiple_choice | |afrixnli_manual_direct_yor |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yor.yaml |multiple_choice | |afrixnli_manual_direct_zul |lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_zul.yaml |multiple_choice | |afrixnli_manual_translate_amh |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_amh.yaml |multiple_choice | |afrixnli_manual_translate_ewe |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ewe.yaml |multiple_choice | |afrixnli_manual_translate_fra |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_fra.yaml |multiple_choice | |afrixnli_manual_translate_hau |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_hau.yaml |multiple_choice | |afrixnli_manual_translate_ibo |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ibo.yaml |multiple_choice | |afrixnli_manual_translate_kin |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_kin.yaml |multiple_choice | |afrixnli_manual_translate_lin |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lin.yaml |multiple_choice | |afrixnli_manual_translate_lug |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lug.yaml |multiple_choice | |afrixnli_manual_translate_orm |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_orm.yaml |multiple_choice | |afrixnli_manual_translate_sna |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sna.yaml |multiple_choice | |afrixnli_manual_translate_sot |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sot.yaml |multiple_choice | |afrixnli_manual_translate_swa |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_swa.yaml |multiple_choice | |afrixnli_manual_translate_twi |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_twi.yaml |multiple_choice | |afrixnli_manual_translate_wol |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_wol.yaml |multiple_choice | |afrixnli_manual_translate_xho |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_xho.yaml |multiple_choice | |afrixnli_manual_translate_yor |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yor.yaml |multiple_choice | |afrixnli_manual_translate_zul |lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_zul.yaml |multiple_choice | |afrixnli_native_direct_amh |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_amh.yaml |multiple_choice | |afrixnli_native_direct_eng |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_eng.yaml |multiple_choice | |afrixnli_native_direct_ewe |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ewe.yaml |multiple_choice | |afrixnli_native_direct_fra |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_fra.yaml |multiple_choice | |afrixnli_native_direct_hau |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_hau.yaml |multiple_choice | |afrixnli_native_direct_ibo |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ibo.yaml |multiple_choice | |afrixnli_native_direct_kin |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_kin.yaml |multiple_choice | |afrixnli_native_direct_lin |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lin.yaml |multiple_choice | |afrixnli_native_direct_lug |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lug.yaml |multiple_choice | |afrixnli_native_direct_orm |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_orm.yaml |multiple_choice | |afrixnli_native_direct_sna |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sna.yaml |multiple_choice | |afrixnli_native_direct_sot |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sot.yaml |multiple_choice | |afrixnli_native_direct_swa |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_swa.yaml |multiple_choice | |afrixnli_native_direct_twi |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_twi.yaml |multiple_choice | |afrixnli_native_direct_wol |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_wol.yaml |multiple_choice | |afrixnli_native_direct_xho |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_xho.yaml |multiple_choice | |afrixnli_native_direct_yor |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yor.yaml |multiple_choice | |afrixnli_native_direct_zul |lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_zul.yaml |multiple_choice | |afrixnli_translate_amh |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml |multiple_choice | |afrixnli_translate_ewe |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ewe.yaml |multiple_choice | |afrixnli_translate_fra |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_fra.yaml |multiple_choice | |afrixnli_translate_hau |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_hau.yaml |multiple_choice | |afrixnli_translate_ibo |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ibo.yaml |multiple_choice | |afrixnli_translate_kin |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_kin.yaml |multiple_choice | |afrixnli_translate_lin |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lin.yaml |multiple_choice | |afrixnli_translate_lug |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lug.yaml |multiple_choice | |afrixnli_translate_orm |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_orm.yaml |multiple_choice | |afrixnli_translate_sna |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sna.yaml |multiple_choice | |afrixnli_translate_sot |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sot.yaml |multiple_choice | |afrixnli_translate_swa |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_swa.yaml |multiple_choice | |afrixnli_translate_twi |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_twi.yaml |multiple_choice | |afrixnli_translate_wol |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_wol.yaml |multiple_choice | |afrixnli_translate_xho |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_xho.yaml |multiple_choice | |afrixnli_translate_yor |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yor.yaml |multiple_choice | |afrixnli_translate_zul |lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_zul.yaml |multiple_choice | |agieval_aqua_rat |lm_eval/tasks/agieval/aqua-rat.yaml |multiple_choice | |agieval_gaokao_biology |lm_eval/tasks/agieval/gaokao-biology.yaml |multiple_choice | |agieval_gaokao_chemistry |lm_eval/tasks/agieval/gaokao-chemistry.yaml |multiple_choice | |agieval_gaokao_chinese |lm_eval/tasks/agieval/gaokao-chinese.yaml |multiple_choice | |agieval_gaokao_english |lm_eval/tasks/agieval/gaokao-english.yaml |multiple_choice | |agieval_gaokao_geography |lm_eval/tasks/agieval/gaokao-geography.yaml |multiple_choice | |agieval_gaokao_history |lm_eval/tasks/agieval/gaokao-history.yaml |multiple_choice | |agieval_gaokao_mathcloze |lm_eval/tasks/agieval/gaokao-mathcloze.yaml |generate_until | |agieval_gaokao_mathqa |lm_eval/tasks/agieval/gaokao-mathqa.yaml |multiple_choice | |agieval_gaokao_physics |lm_eval/tasks/agieval/gaokao-physics.yaml |multiple_choice | |agieval_jec_qa_ca |lm_eval/tasks/agieval/jec-qa-ca.yaml |multiple_choice | |agieval_jec_qa_kd |lm_eval/tasks/agieval/jec-qa-kd.yaml |multiple_choice | |agieval_logiqa_en |lm_eval/tasks/agieval/logiqa-en.yaml |multiple_choice | |agieval_logiqa_zh |lm_eval/tasks/agieval/logiqa-zh.yaml |multiple_choice | |agieval_lsat_ar |lm_eval/tasks/agieval/lsat-ar.yaml |multiple_choice | |agieval_lsat_lr |lm_eval/tasks/agieval/lsat-lr.yaml |multiple_choice | |agieval_lsat_rc |lm_eval/tasks/agieval/lsat-rc.yaml |multiple_choice | |agieval_math |lm_eval/tasks/agieval/math.yaml |generate_until | |agieval_sat_en |lm_eval/tasks/agieval/sat-en.yaml |multiple_choice | |agieval_sat_en_without_passage |lm_eval/tasks/agieval/sat-en-without-passage.yaml |multiple_choice | |agieval_sat_math |lm_eval/tasks/agieval/sat-math.yaml |multiple_choice | |anagrams1 |lm_eval/tasks/unscramble/anagrams1.yaml |generate_until | |anagrams2 |lm_eval/tasks/unscramble/anagrams2.yaml |generate_until | |anli_r1 |lm_eval/tasks/anli/anli_r1.yaml |multiple_choice | |anli_r2 |lm_eval/tasks/anli/anli_r2.yaml |multiple_choice | |anli_r3 |lm_eval/tasks/anli/anli_r3.yaml |multiple_choice | |arabicmmlu_arabic_language_(general) |lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml |multiple_choice | |arabicmmlu_arabic_language_(grammar) |lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml |multiple_choice | |arabicmmlu_driving_test |lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml |multiple_choice | |arabicmmlu_general_knowledge |lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml |multiple_choice | |arabicmmlu_high_arabic_language |lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml |multiple_choice | |arabicmmlu_high_biology |lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml |multiple_choice | |arabicmmlu_high_civics |lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml |multiple_choice | |arabicmmlu_high_computer_science |lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml |multiple_choice | |arabicmmlu_high_economics |lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml |multiple_choice | |arabicmmlu_high_geography |lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml |multiple_choice | |arabicmmlu_high_history |lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml |multiple_choice | |arabicmmlu_high_islamic_studies |lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml |multiple_choice | |arabicmmlu_high_philosophy |lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml |multiple_choice | |arabicmmlu_high_physics |lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml |multiple_choice | |arabicmmlu_islamic_studies |lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml |multiple_choice | |arabicmmlu_middle_arabic_language |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml |multiple_choice | |arabicmmlu_middle_civics |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml |multiple_choice | |arabicmmlu_middle_computer_science |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml |multiple_choice | |arabicmmlu_middle_economics |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml |multiple_choice | |arabicmmlu_middle_general_knowledge |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml |multiple_choice | |arabicmmlu_middle_geography |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml |multiple_choice | |arabicmmlu_middle_history |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml |multiple_choice | |arabicmmlu_middle_islamic_studies |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml |multiple_choice | |arabicmmlu_middle_natural_science |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml |multiple_choice | |arabicmmlu_middle_social_science |lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml |multiple_choice | |arabicmmlu_primary_arabic_language |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml |multiple_choice | |arabicmmlu_primary_computer_science |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml |multiple_choice | |arabicmmlu_primary_general_knowledge |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml |multiple_choice | |arabicmmlu_primary_geography |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml |multiple_choice | |arabicmmlu_primary_history |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml |multiple_choice | |arabicmmlu_primary_islamic_studies |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml |multiple_choice | |arabicmmlu_primary_math |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml |multiple_choice | |arabicmmlu_primary_natural_science |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml |multiple_choice | |arabicmmlu_primary_social_science |lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml |multiple_choice | |arabicmmlu_prof_law |lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml |multiple_choice | |arabicmmlu_univ_accounting |lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml |multiple_choice | |arabicmmlu_univ_computer_science |lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml |multiple_choice | |arabicmmlu_univ_economics |lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml |multiple_choice | |arabicmmlu_univ_management |lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml |multiple_choice | |arabicmmlu_univ_political_science |lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml |multiple_choice | |arc_ar |lm_eval/tasks/okapi/arc_multilingual/arc_ar.yaml |multiple_choice | |arc_bn |lm_eval/tasks/okapi/arc_multilingual/arc_bn.yaml |multiple_choice | |arc_ca |lm_eval/tasks/okapi/arc_multilingual/arc_ca.yaml |multiple_choice | |arc_challenge |lm_eval/tasks/arc/arc_challenge.yaml |multiple_choice | |arc_challenge_mt_da |lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml |multiple_choice | |arc_challenge_mt_de |lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml |multiple_choice | |arc_challenge_mt_el |lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml |multiple_choice | |arc_challenge_mt_es |lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml |multiple_choice | |arc_challenge_mt_fi |lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml |multiple_choice | |arc_challenge_mt_hu |lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml |multiple_choice | |arc_challenge_mt_is |lm_eval/tasks/arc_mt/arc_challenge_mt_is.yaml |multiple_choice | |arc_challenge_mt_it |lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml |multiple_choice | |arc_challenge_mt_nb |lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml |multiple_choice | |arc_challenge_mt_pl |lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml |multiple_choice | |arc_challenge_mt_pt |lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml |multiple_choice | |arc_challenge_mt_sv |lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml |multiple_choice | |arc_da |lm_eval/tasks/okapi/arc_multilingual/arc_da.yaml |multiple_choice | |arc_de |lm_eval/tasks/okapi/arc_multilingual/arc_de.yaml |multiple_choice | |arc_easy |lm_eval/tasks/arc/arc_easy.yaml |multiple_choice | |arc_es |lm_eval/tasks/okapi/arc_multilingual/arc_es.yaml |multiple_choice | |arc_eu |lm_eval/tasks/okapi/arc_multilingual/arc_eu.yaml |multiple_choice | |arc_fr |lm_eval/tasks/okapi/arc_multilingual/arc_fr.yaml |multiple_choice | |arc_gu |lm_eval/tasks/okapi/arc_multilingual/arc_gu.yaml |multiple_choice | |arc_hi |lm_eval/tasks/okapi/arc_multilingual/arc_hi.yaml |multiple_choice | |arc_hr |lm_eval/tasks/okapi/arc_multilingual/arc_hr.yaml |multiple_choice | |arc_hu |lm_eval/tasks/okapi/arc_multilingual/arc_hu.yaml |multiple_choice | |arc_hy |lm_eval/tasks/okapi/arc_multilingual/arc_hy.yaml |multiple_choice | |arc_id |lm_eval/tasks/okapi/arc_multilingual/arc_id.yaml |multiple_choice | |arc_it |lm_eval/tasks/okapi/arc_multilingual/arc_it.yaml |multiple_choice | |arc_kn |lm_eval/tasks/okapi/arc_multilingual/arc_kn.yaml |multiple_choice | |arc_ml |lm_eval/tasks/okapi/arc_multilingual/arc_ml.yaml |multiple_choice | |arc_mr |lm_eval/tasks/okapi/arc_multilingual/arc_mr.yaml |multiple_choice | |arc_ne |lm_eval/tasks/okapi/arc_multilingual/arc_ne.yaml |multiple_choice | |arc_nl |lm_eval/tasks/okapi/arc_multilingual/arc_nl.yaml |multiple_choice | |arc_pt |lm_eval/tasks/okapi/arc_multilingual/arc_pt.yaml |multiple_choice | |arc_ro |lm_eval/tasks/okapi/arc_multilingual/arc_ro.yaml |multiple_choice | |arc_ru |lm_eval/tasks/okapi/arc_multilingual/arc_ru.yaml |multiple_choice | |arc_sk |lm_eval/tasks/okapi/arc_multilingual/arc_sk.yaml |multiple_choice | |arc_sr |lm_eval/tasks/okapi/arc_multilingual/arc_sr.yaml |multiple_choice | |arc_sv |lm_eval/tasks/okapi/arc_multilingual/arc_sv.yaml |multiple_choice | |arc_ta |lm_eval/tasks/okapi/arc_multilingual/arc_ta.yaml |multiple_choice | |arc_te |lm_eval/tasks/okapi/arc_multilingual/arc_te.yaml |multiple_choice | |arc_uk |lm_eval/tasks/okapi/arc_multilingual/arc_uk.yaml |multiple_choice | |arc_vi |lm_eval/tasks/okapi/arc_multilingual/arc_vi.yaml |multiple_choice | |arc_zh |lm_eval/tasks/okapi/arc_multilingual/arc_zh.yaml |multiple_choice | |arithmetic_1dc |lm_eval/tasks/arithmetic/arithmetic_1dc.yaml |loglikelihood | |arithmetic_2da |lm_eval/tasks/arithmetic/arithmetic_2da.yaml |loglikelihood | |arithmetic_2dm |lm_eval/tasks/arithmetic/arithmetic_2dm.yaml |loglikelihood | |arithmetic_2ds |lm_eval/tasks/arithmetic/arithmetic_2ds.yaml |loglikelihood | |arithmetic_3da |lm_eval/tasks/arithmetic/arithmetic_3da.yaml |loglikelihood | |arithmetic_3ds |lm_eval/tasks/arithmetic/arithmetic_3ds.yaml |loglikelihood | |arithmetic_4da |lm_eval/tasks/arithmetic/arithmetic_4da.yaml |loglikelihood | |arithmetic_4ds |lm_eval/tasks/arithmetic/arithmetic_4ds.yaml |loglikelihood | |arithmetic_5da |lm_eval/tasks/arithmetic/arithmetic_5da.yaml |loglikelihood | |arithmetic_5ds |lm_eval/tasks/arithmetic/arithmetic_5ds.yaml |loglikelihood | |asdiv |lm_eval/tasks/asdiv/default.yaml |loglikelihood | |babi |lm_eval/tasks/babi/babi.yaml |generate_until | |bbh_cot_fewshot_boolean_expressions |lm_eval/tasks/bbh/cot_fewshot/boolean_expressions.yaml |generate_until | |bbh_cot_fewshot_causal_judgement |lm_eval/tasks/bbh/cot_fewshot/causal_judgement.yaml |generate_until | |bbh_cot_fewshot_date_understanding |lm_eval/tasks/bbh/cot_fewshot/date_understanding.yaml |generate_until | |bbh_cot_fewshot_disambiguation_qa |lm_eval/tasks/bbh/cot_fewshot/disambiguation_qa.yaml |generate_until | |bbh_cot_fewshot_dyck_languages |lm_eval/tasks/bbh/cot_fewshot/dyck_languages.yaml |generate_until | |bbh_cot_fewshot_formal_fallacies |lm_eval/tasks/bbh/cot_fewshot/formal_fallacies.yaml |generate_until | |bbh_cot_fewshot_geometric_shapes |lm_eval/tasks/bbh/cot_fewshot/geometric_shapes.yaml |generate_until | |bbh_cot_fewshot_hyperbaton |lm_eval/tasks/bbh/cot_fewshot/hyperbaton.yaml |generate_until | |bbh_cot_fewshot_logical_deduction_five_objects |lm_eval/tasks/bbh/cot_fewshot/logical_deduction_five_objects.yaml |generate_until | |bbh_cot_fewshot_logical_deduction_seven_objects |lm_eval/tasks/bbh/cot_fewshot/logical_deduction_seven_objects.yaml |generate_until | |bbh_cot_fewshot_logical_deduction_three_objects |lm_eval/tasks/bbh/cot_fewshot/logical_deduction_three_objects.yaml |generate_until | |bbh_cot_fewshot_movie_recommendation |lm_eval/tasks/bbh/cot_fewshot/movie_recommendation.yaml |generate_until | |bbh_cot_fewshot_multistep_arithmetic_two |lm_eval/tasks/bbh/cot_fewshot/multistep_arithmetic_two.yaml |generate_until | |bbh_cot_fewshot_navigate |lm_eval/tasks/bbh/cot_fewshot/navigate.yaml |generate_until | |bbh_cot_fewshot_object_counting |lm_eval/tasks/bbh/cot_fewshot/object_counting.yaml |generate_until | |bbh_cot_fewshot_penguins_in_a_table |lm_eval/tasks/bbh/cot_fewshot/penguins_in_a_table.yaml |generate_until | |bbh_cot_fewshot_reasoning_about_colored_objects |lm_eval/tasks/bbh/cot_fewshot/reasoning_about_colored_objects.yaml |generate_until | |bbh_cot_fewshot_ruin_names |lm_eval/tasks/bbh/cot_fewshot/ruin_names.yaml |generate_until | |bbh_cot_fewshot_salient_translation_error_detection |lm_eval/tasks/bbh/cot_fewshot/salient_translation_error_detection.yaml |generate_until | |bbh_cot_fewshot_snarks |lm_eval/tasks/bbh/cot_fewshot/snarks.yaml |generate_until | |bbh_cot_fewshot_sports_understanding |lm_eval/tasks/bbh/cot_fewshot/sports_understanding.yaml |generate_until | |bbh_cot_fewshot_temporal_sequences |lm_eval/tasks/bbh/cot_fewshot/temporal_sequences.yaml |generate_until | |bbh_cot_fewshot_tracking_shuffled_objects_five_objects |lm_eval/tasks/bbh/cot_fewshot/tracking_shuffled_objects_five_objects.yaml |generate_until | |bbh_cot_fewshot_tracking_shuffled_objects_seven_objects |lm_eval/tasks/bbh/cot_fewshot/tracking_shuffled_objects_seven_objects.yaml |generate_until | |bbh_cot_fewshot_tracking_shuffled_objects_three_objects |lm_eval/tasks/bbh/cot_fewshot/tracking_shuffled_objects_three_objects.yaml |generate_until | |bbh_cot_fewshot_web_of_lies |lm_eval/tasks/bbh/cot_fewshot/web_of_lies.yaml |generate_until | |bbh_cot_fewshot_word_sorting |lm_eval/tasks/bbh/cot_fewshot/word_sorting.yaml |generate_until | |bbh_cot_zeroshot_boolean_expressions |lm_eval/tasks/bbh/cot_zeroshot/boolean_expressions.yaml |generate_until | |bbh_cot_zeroshot_causal_judgement |lm_eval/tasks/bbh/cot_zeroshot/causal_judgement.yaml |generate_until | |bbh_cot_zeroshot_date_understanding |lm_eval/tasks/bbh/cot_zeroshot/date_understanding.yaml |generate_until | |bbh_cot_zeroshot_disambiguation_qa |lm_eval/tasks/bbh/cot_zeroshot/disambiguation_qa.yaml |generate_until | |bbh_cot_zeroshot_dyck_languages |lm_eval/tasks/bbh/cot_zeroshot/dyck_languages.yaml |generate_until | |bbh_cot_zeroshot_formal_fallacies |lm_eval/tasks/bbh/cot_zeroshot/formal_fallacies.yaml |generate_until | |bbh_cot_zeroshot_geometric_shapes |lm_eval/tasks/bbh/cot_zeroshot/geometric_shapes.yaml |generate_until | |bbh_cot_zeroshot_hyperbaton |lm_eval/tasks/bbh/cot_zeroshot/hyperbaton.yaml |generate_until | |bbh_cot_zeroshot_logical_deduction_five_objects |lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_five_objects.yaml |generate_until | |bbh_cot_zeroshot_logical_deduction_seven_objects |lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_seven_objects.yaml |generate_until | |bbh_cot_zeroshot_logical_deduction_three_objects |lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_three_objects.yaml |generate_until | |bbh_cot_zeroshot_movie_recommendation |lm_eval/tasks/bbh/cot_zeroshot/movie_recommendation.yaml |generate_until | |bbh_cot_zeroshot_multistep_arithmetic_two |lm_eval/tasks/bbh/cot_zeroshot/multistep_arithmetic_two.yaml |generate_until | |bbh_cot_zeroshot_navigate |lm_eval/tasks/bbh/cot_zeroshot/navigate.yaml |generate_until | |bbh_cot_zeroshot_object_counting |lm_eval/tasks/bbh/cot_zeroshot/object_counting.yaml |generate_until | |bbh_cot_zeroshot_penguins_in_a_table |lm_eval/tasks/bbh/cot_zeroshot/penguins_in_a_table.yaml |generate_until | |bbh_cot_zeroshot_reasoning_about_colored_objects |lm_eval/tasks/bbh/cot_zeroshot/reasoning_about_colored_objects.yaml |generate_until | |bbh_cot_zeroshot_ruin_names |lm_eval/tasks/bbh/cot_zeroshot/ruin_names.yaml |generate_until | |bbh_cot_zeroshot_salient_translation_error_detection |lm_eval/tasks/bbh/cot_zeroshot/salient_translation_error_detection.yaml |generate_until | |bbh_cot_zeroshot_snarks |lm_eval/tasks/bbh/cot_zeroshot/snarks.yaml |generate_until | |bbh_cot_zeroshot_sports_understanding |lm_eval/tasks/bbh/cot_zeroshot/sports_understanding.yaml |generate_until | |bbh_cot_zeroshot_temporal_sequences |lm_eval/tasks/bbh/cot_zeroshot/temporal_sequences.yaml |generate_until | |bbh_cot_zeroshot_tracking_shuffled_objects_five_objects |lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_five_objects.yaml |generate_until | |bbh_cot_zeroshot_tracking_shuffled_objects_seven_objects |lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_seven_objects.yaml |generate_until | |bbh_cot_zeroshot_tracking_shuffled_objects_three_objects |lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_three_objects.yaml |generate_until | |bbh_cot_zeroshot_web_of_lies |lm_eval/tasks/bbh/cot_zeroshot/web_of_lies.yaml |generate_until | |bbh_cot_zeroshot_word_sorting |lm_eval/tasks/bbh/cot_zeroshot/word_sorting.yaml |generate_until | |bbh_fewshot_boolean_expressions |lm_eval/tasks/bbh/fewshot/boolean_expressions.yaml |generate_until | |bbh_fewshot_causal_judgement |lm_eval/tasks/bbh/fewshot/causal_judgement.yaml |generate_until | |bbh_fewshot_date_understanding |lm_eval/tasks/bbh/fewshot/date_understanding.yaml |generate_until | |bbh_fewshot_disambiguation_qa |lm_eval/tasks/bbh/fewshot/disambiguation_qa.yaml |generate_until | |bbh_fewshot_dyck_languages |lm_eval/tasks/bbh/fewshot/dyck_languages.yaml |generate_until | |bbh_fewshot_formal_fallacies |lm_eval/tasks/bbh/fewshot/formal_fallacies.yaml |generate_until | |bbh_fewshot_geometric_shapes |lm_eval/tasks/bbh/fewshot/geometric_shapes.yaml |generate_until | |bbh_fewshot_hyperbaton |lm_eval/tasks/bbh/fewshot/hyperbaton.yaml |generate_until | |bbh_fewshot_logical_deduction_five_objects |lm_eval/tasks/bbh/fewshot/logical_deduction_five_objects.yaml |generate_until | |bbh_fewshot_logical_deduction_seven_objects |lm_eval/tasks/bbh/fewshot/logical_deduction_seven_objects.yaml |generate_until | |bbh_fewshot_logical_deduction_three_objects |lm_eval/tasks/bbh/fewshot/logical_deduction_three_objects.yaml |generate_until | |bbh_fewshot_movie_recommendation |lm_eval/tasks/bbh/fewshot/movie_recommendation.yaml |generate_until | |bbh_fewshot_multistep_arithmetic_two |lm_eval/tasks/bbh/fewshot/multistep_arithmetic_two.yaml |generate_until | |bbh_fewshot_navigate |lm_eval/tasks/bbh/fewshot/navigate.yaml |generate_until | |bbh_fewshot_object_counting |lm_eval/tasks/bbh/fewshot/object_counting.yaml |generate_until | |bbh_fewshot_penguins_in_a_table |lm_eval/tasks/bbh/fewshot/penguins_in_a_table.yaml |generate_until | |bbh_fewshot_reasoning_about_colored_objects |lm_eval/tasks/bbh/fewshot/reasoning_about_colored_objects.yaml |generate_until | |bbh_fewshot_ruin_names |lm_eval/tasks/bbh/fewshot/ruin_names.yaml |generate_until | |bbh_fewshot_salient_translation_error_detection |lm_eval/tasks/bbh/fewshot/salient_translation_error_detection.yaml |generate_until | |bbh_fewshot_snarks |lm_eval/tasks/bbh/fewshot/snarks.yaml |generate_until | |bbh_fewshot_sports_understanding |lm_eval/tasks/bbh/fewshot/sports_understanding.yaml |generate_until | |bbh_fewshot_temporal_sequences |lm_eval/tasks/bbh/fewshot/temporal_sequences.yaml |generate_until | |bbh_fewshot_tracking_shuffled_objects_five_objects |lm_eval/tasks/bbh/fewshot/tracking_shuffled_objects_five_objects.yaml |generate_until | |bbh_fewshot_tracking_shuffled_objects_seven_objects |lm_eval/tasks/bbh/fewshot/tracking_shuffled_objects_seven_objects.yaml |generate_until | |bbh_fewshot_tracking_shuffled_objects_three_objects |lm_eval/tasks/bbh/fewshot/tracking_shuffled_objects_three_objects.yaml |generate_until | |bbh_fewshot_web_of_lies |lm_eval/tasks/bbh/fewshot/web_of_lies.yaml |generate_until | |bbh_fewshot_word_sorting |lm_eval/tasks/bbh/fewshot/word_sorting.yaml |generate_until | |bbh_zeroshot_boolean_expressions |lm_eval/tasks/bbh/zeroshot/boolean_expressions.yaml |generate_until | |bbh_zeroshot_causal_judgement |lm_eval/tasks/bbh/zeroshot/causal_judgement.yaml |generate_until | |bbh_zeroshot_date_understanding |lm_eval/tasks/bbh/zeroshot/date_understanding.yaml |generate_until | |bbh_zeroshot_disambiguation_qa |lm_eval/tasks/bbh/zeroshot/disambiguation_qa.yaml |generate_until | |bbh_zeroshot_dyck_languages |lm_eval/tasks/bbh/zeroshot/dyck_languages.yaml |generate_until | |bbh_zeroshot_formal_fallacies |lm_eval/tasks/bbh/zeroshot/formal_fallacies.yaml |generate_until | |bbh_zeroshot_geometric_shapes |lm_eval/tasks/bbh/zeroshot/geometric_shapes.yaml |generate_until | |bbh_zeroshot_hyperbaton |lm_eval/tasks/bbh/zeroshot/hyperbaton.yaml |generate_until | |bbh_zeroshot_logical_deduction_five_objects |lm_eval/tasks/bbh/zeroshot/logical_deduction_five_objects.yaml |generate_until | |bbh_zeroshot_logical_deduction_seven_objects |lm_eval/tasks/bbh/zeroshot/logical_deduction_seven_objects.yaml |generate_until | |bbh_zeroshot_logical_deduction_three_objects |lm_eval/tasks/bbh/zeroshot/logical_deduction_three_objects.yaml |generate_until | |bbh_zeroshot_movie_recommendation |lm_eval/tasks/bbh/zeroshot/movie_recommendation.yaml |generate_until | |bbh_zeroshot_multistep_arithmetic_two |lm_eval/tasks/bbh/zeroshot/multistep_arithmetic_two.yaml |generate_until | |bbh_zeroshot_navigate |lm_eval/tasks/bbh/zeroshot/navigate.yaml |generate_until | |bbh_zeroshot_object_counting |lm_eval/tasks/bbh/zeroshot/object_counting.yaml |generate_until | |bbh_zeroshot_penguins_in_a_table |lm_eval/tasks/bbh/zeroshot/penguins_in_a_table.yaml |generate_until | |bbh_zeroshot_reasoning_about_colored_objects |lm_eval/tasks/bbh/zeroshot/reasoning_about_colored_objects.yaml |generate_until | |bbh_zeroshot_ruin_names |lm_eval/tasks/bbh/zeroshot/ruin_names.yaml |generate_until | |bbh_zeroshot_salient_translation_error_detection |lm_eval/tasks/bbh/zeroshot/salient_translation_error_detection.yaml |generate_until | |bbh_zeroshot_snarks |lm_eval/tasks/bbh/zeroshot/snarks.yaml |generate_until | |bbh_zeroshot_sports_understanding |lm_eval/tasks/bbh/zeroshot/sports_understanding.yaml |generate_until | |bbh_zeroshot_temporal_sequences |lm_eval/tasks/bbh/zeroshot/temporal_sequences.yaml |generate_until | |bbh_zeroshot_tracking_shuffled_objects_five_objects |lm_eval/tasks/bbh/zeroshot/tracking_shuffled_objects_five_objects.yaml |generate_until | |bbh_zeroshot_tracking_shuffled_objects_seven_objects |lm_eval/tasks/bbh/zeroshot/tracking_shuffled_objects_seven_objects.yaml |generate_until | |bbh_zeroshot_tracking_shuffled_objects_three_objects |lm_eval/tasks/bbh/zeroshot/tracking_shuffled_objects_three_objects.yaml |generate_until | |bbh_zeroshot_web_of_lies |lm_eval/tasks/bbh/zeroshot/web_of_lies.yaml |generate_until | |bbh_zeroshot_word_sorting |lm_eval/tasks/bbh/zeroshot/word_sorting.yaml |generate_until | |bec2016eu |lm_eval/tasks/basqueglue/bec.yaml |multiple_choice | |belebele_acm_Arab |lm_eval/tasks/belebele/belebele_acm_Arab.yaml |multiple_choice | |belebele_afr_Latn |lm_eval/tasks/belebele/belebele_afr_Latn.yaml |multiple_choice | |belebele_als_Latn |lm_eval/tasks/belebele/belebele_als_Latn.yaml |multiple_choice | |belebele_amh_Ethi |lm_eval/tasks/belebele/belebele_amh_Ethi.yaml |multiple_choice | |belebele_apc_Arab |lm_eval/tasks/belebele/belebele_apc_Arab.yaml |multiple_choice | |belebele_arb_Arab |lm_eval/tasks/belebele/belebele_arb_Arab.yaml |multiple_choice | |belebele_arb_Latn |lm_eval/tasks/belebele/belebele_arb_Latn.yaml |multiple_choice | |belebele_ars_Arab |lm_eval/tasks/belebele/belebele_ars_Arab.yaml |multiple_choice | |belebele_ary_Arab |lm_eval/tasks/belebele/belebele_ary_Arab.yaml |multiple_choice | |belebele_arz_Arab |lm_eval/tasks/belebele/belebele_arz_Arab.yaml |multiple_choice | |belebele_asm_Beng |lm_eval/tasks/belebele/belebele_asm_Beng.yaml |multiple_choice | |belebele_azj_Latn |lm_eval/tasks/belebele/belebele_azj_Latn.yaml |multiple_choice | |belebele_bam_Latn |lm_eval/tasks/belebele/belebele_bam_Latn.yaml |multiple_choice | |belebele_ben_Beng |lm_eval/tasks/belebele/belebele_ben_Beng.yaml |multiple_choice | |belebele_ben_Latn |lm_eval/tasks/belebele/belebele_ben_Latn.yaml |multiple_choice | |belebele_bod_Tibt |lm_eval/tasks/belebele/belebele_bod_Tibt.yaml |multiple_choice | |belebele_bul_Cyrl |lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml |multiple_choice | |belebele_cat_Latn |lm_eval/tasks/belebele/belebele_cat_Latn.yaml |multiple_choice | |belebele_ceb_Latn |lm_eval/tasks/belebele/belebele_ceb_Latn.yaml |multiple_choice | |belebele_ces_Latn |lm_eval/tasks/belebele/belebele_ces_Latn.yaml |multiple_choice | |belebele_ckb_Arab |lm_eval/tasks/belebele/belebele_ckb_Arab.yaml |multiple_choice | |belebele_dan_Latn |lm_eval/tasks/belebele/belebele_dan_Latn.yaml |multiple_choice | |belebele_deu_Latn |lm_eval/tasks/belebele/belebele_deu_Latn.yaml |multiple_choice | |belebele_ell_Grek |lm_eval/tasks/belebele/belebele_ell_Grek.yaml |multiple_choice | |belebele_eng_Latn |lm_eval/tasks/belebele/belebele_eng_Latn.yaml |multiple_choice | |belebele_est_Latn |lm_eval/tasks/belebele/belebele_est_Latn.yaml |multiple_choice | |belebele_eus_Latn |lm_eval/tasks/belebele/belebele_eus_Latn.yaml |multiple_choice | |belebele_fin_Latn |lm_eval/tasks/belebele/belebele_fin_Latn.yaml |multiple_choice | |belebele_fra_Latn |lm_eval/tasks/belebele/belebele_fra_Latn.yaml |multiple_choice | |belebele_fuv_Latn |lm_eval/tasks/belebele/belebele_fuv_Latn.yaml |multiple_choice | |belebele_gaz_Latn |lm_eval/tasks/belebele/belebele_gaz_Latn.yaml |multiple_choice | |belebele_grn_Latn |lm_eval/tasks/belebele/belebele_grn_Latn.yaml |multiple_choice | |belebele_guj_Gujr |lm_eval/tasks/belebele/belebele_guj_Gujr.yaml |multiple_choice | |belebele_hat_Latn |lm_eval/tasks/belebele/belebele_hat_Latn.yaml |multiple_choice | |belebele_hau_Latn |lm_eval/tasks/belebele/belebele_hau_Latn.yaml |multiple_choice | |belebele_heb_Hebr |lm_eval/tasks/belebele/belebele_heb_Hebr.yaml |multiple_choice | |belebele_hin_Deva |lm_eval/tasks/belebele/belebele_hin_Deva.yaml |multiple_choice | |belebele_hin_Latn |lm_eval/tasks/belebele/belebele_hin_Latn.yaml |multiple_choice | |belebele_hrv_Latn |lm_eval/tasks/belebele/belebele_hrv_Latn.yaml |multiple_choice | |belebele_hun_Latn |lm_eval/tasks/belebele/belebele_hun_Latn.yaml |multiple_choice | |belebele_hye_Armn |lm_eval/tasks/belebele/belebele_hye_Armn.yaml |multiple_choice | |belebele_ibo_Latn |lm_eval/tasks/belebele/belebele_ibo_Latn.yaml |multiple_choice | |belebele_ilo_Latn |lm_eval/tasks/belebele/belebele_ilo_Latn.yaml |multiple_choice | |belebele_ind_Latn |lm_eval/tasks/belebele/belebele_ind_Latn.yaml |multiple_choice | |belebele_isl_Latn |lm_eval/tasks/belebele/belebele_isl_Latn.yaml |multiple_choice | |belebele_ita_Latn |lm_eval/tasks/belebele/belebele_ita_Latn.yaml |multiple_choice | |belebele_jav_Latn |lm_eval/tasks/belebele/belebele_jav_Latn.yaml |multiple_choice | |belebele_jpn_Jpan |lm_eval/tasks/belebele/belebele_jpn_Jpan.yaml |multiple_choice | |belebele_kac_Latn |lm_eval/tasks/belebele/belebele_kac_Latn.yaml |multiple_choice | |belebele_kan_Knda |lm_eval/tasks/belebele/belebele_kan_Knda.yaml |multiple_choice | |belebele_kat_Geor |lm_eval/tasks/belebele/belebele_kat_Geor.yaml |multiple_choice | |belebele_kaz_Cyrl |lm_eval/tasks/belebele/belebele_kaz_Cyrl.yaml |multiple_choice | |belebele_kea_Latn |lm_eval/tasks/belebele/belebele_kea_Latn.yaml |multiple_choice | |belebele_khk_Cyrl |lm_eval/tasks/belebele/belebele_khk_Cyrl.yaml |multiple_choice | |belebele_khm_Khmr |lm_eval/tasks/belebele/belebele_khm_Khmr.yaml |multiple_choice | |belebele_kin_Latn |lm_eval/tasks/belebele/belebele_kin_Latn.yaml |multiple_choice | |belebele_kir_Cyrl |lm_eval/tasks/belebele/belebele_kir_Cyrl.yaml |multiple_choice | |belebele_kor_Hang |lm_eval/tasks/belebele/belebele_kor_Hang.yaml |multiple_choice | |belebele_lao_Laoo |lm_eval/tasks/belebele/belebele_lao_Laoo.yaml |multiple_choice | |belebele_lin_Latn |lm_eval/tasks/belebele/belebele_lin_Latn.yaml |multiple_choice | |belebele_lit_Latn |lm_eval/tasks/belebele/belebele_lit_Latn.yaml |multiple_choice | |belebele_lug_Latn |lm_eval/tasks/belebele/belebele_lug_Latn.yaml |multiple_choice | |belebele_luo_Latn |lm_eval/tasks/belebele/belebele_luo_Latn.yaml |multiple_choice | |belebele_lvs_Latn |lm_eval/tasks/belebele/belebele_lvs_Latn.yaml |multiple_choice | |belebele_mal_Mlym |lm_eval/tasks/belebele/belebele_mal_Mlym.yaml |multiple_choice | |belebele_mar_Deva |lm_eval/tasks/belebele/belebele_mar_Deva.yaml |multiple_choice | |belebele_mkd_Cyrl |lm_eval/tasks/belebele/belebele_mkd_Cyrl.yaml |multiple_choice | |belebele_mlt_Latn |lm_eval/tasks/belebele/belebele_mlt_Latn.yaml |multiple_choice | |belebele_mri_Latn |lm_eval/tasks/belebele/belebele_mri_Latn.yaml |multiple_choice | |belebele_mya_Mymr |lm_eval/tasks/belebele/belebele_mya_Mymr.yaml |multiple_choice | |belebele_nld_Latn |lm_eval/tasks/belebele/belebele_nld_Latn.yaml |multiple_choice | |belebele_nob_Latn |lm_eval/tasks/belebele/belebele_nob_Latn.yaml |multiple_choice | |belebele_npi_Deva |lm_eval/tasks/belebele/belebele_npi_Deva.yaml |multiple_choice | |belebele_npi_Latn |lm_eval/tasks/belebele/belebele_npi_Latn.yaml |multiple_choice | |belebele_nso_Latn |lm_eval/tasks/belebele/belebele_nso_Latn.yaml |multiple_choice | |belebele_nya_Latn |lm_eval/tasks/belebele/belebele_nya_Latn.yaml |multiple_choice | |belebele_ory_Orya |lm_eval/tasks/belebele/belebele_ory_Orya.yaml |multiple_choice | |belebele_pan_Guru |lm_eval/tasks/belebele/belebele_pan_Guru.yaml |multiple_choice | |belebele_pbt_Arab |lm_eval/tasks/belebele/belebele_pbt_Arab.yaml |multiple_choice | |belebele_pes_Arab |lm_eval/tasks/belebele/belebele_pes_Arab.yaml |multiple_choice | |belebele_plt_Latn |lm_eval/tasks/belebele/belebele_plt_Latn.yaml |multiple_choice | |belebele_pol_Latn |lm_eval/tasks/belebele/belebele_pol_Latn.yaml |multiple_choice | |belebele_por_Latn |lm_eval/tasks/belebele/belebele_por_Latn.yaml |multiple_choice | |belebele_ron_Latn |lm_eval/tasks/belebele/belebele_ron_Latn.yaml |multiple_choice | |belebele_rus_Cyrl |lm_eval/tasks/belebele/belebele_rus_Cyrl.yaml |multiple_choice | |belebele_shn_Mymr |lm_eval/tasks/belebele/belebele_shn_Mymr.yaml |multiple_choice | |belebele_sin_Latn |lm_eval/tasks/belebele/belebele_sin_Latn.yaml |multiple_choice | |belebele_sin_Sinh |lm_eval/tasks/belebele/belebele_sin_Sinh.yaml |multiple_choice | |belebele_slk_Latn |lm_eval/tasks/belebele/belebele_slk_Latn.yaml |multiple_choice | |belebele_slv_Latn |lm_eval/tasks/belebele/belebele_slv_Latn.yaml |multiple_choice | |belebele_sna_Latn |lm_eval/tasks/belebele/belebele_sna_Latn.yaml |multiple_choice | |belebele_snd_Arab |lm_eval/tasks/belebele/belebele_snd_Arab.yaml |multiple_choice | |belebele_som_Latn |lm_eval/tasks/belebele/belebele_som_Latn.yaml |multiple_choice | |belebele_sot_Latn |lm_eval/tasks/belebele/belebele_sot_Latn.yaml |multiple_choice | |belebele_spa_Latn |lm_eval/tasks/belebele/belebele_spa_Latn.yaml |multiple_choice | |belebele_srp_Cyrl |lm_eval/tasks/belebele/belebele_srp_Cyrl.yaml |multiple_choice | |belebele_ssw_Latn |lm_eval/tasks/belebele/belebele_ssw_Latn.yaml |multiple_choice | |belebele_sun_Latn |lm_eval/tasks/belebele/belebele_sun_Latn.yaml |multiple_choice | |belebele_swe_Latn |lm_eval/tasks/belebele/belebele_swe_Latn.yaml |multiple_choice | |belebele_swh_Latn |lm_eval/tasks/belebele/belebele_swh_Latn.yaml |multiple_choice | |belebele_tam_Taml |lm_eval/tasks/belebele/belebele_tam_Taml.yaml |multiple_choice | |belebele_tel_Telu |lm_eval/tasks/belebele/belebele_tel_Telu.yaml |multiple_choice | |belebele_tgk_Cyrl |lm_eval/tasks/belebele/belebele_tgk_Cyrl.yaml |multiple_choice | |belebele_tgl_Latn |lm_eval/tasks/belebele/belebele_tgl_Latn.yaml |multiple_choice | |belebele_tha_Thai |lm_eval/tasks/belebele/belebele_tha_Thai.yaml |multiple_choice | |belebele_tir_Ethi |lm_eval/tasks/belebele/belebele_tir_Ethi.yaml |multiple_choice | |belebele_tsn_Latn |lm_eval/tasks/belebele/belebele_tsn_Latn.yaml |multiple_choice | |belebele_tso_Latn |lm_eval/tasks/belebele/belebele_tso_Latn.yaml |multiple_choice | |belebele_tur_Latn |lm_eval/tasks/belebele/belebele_tur_Latn.yaml |multiple_choice | |belebele_ukr_Cyrl |lm_eval/tasks/belebele/belebele_ukr_Cyrl.yaml |multiple_choice | |belebele_urd_Arab |lm_eval/tasks/belebele/belebele_urd_Arab.yaml |multiple_choice | |belebele_urd_Latn |lm_eval/tasks/belebele/belebele_urd_Latn.yaml |multiple_choice | |belebele_uzn_Latn |lm_eval/tasks/belebele/belebele_uzn_Latn.yaml |multiple_choice | |belebele_vie_Latn |lm_eval/tasks/belebele/belebele_vie_Latn.yaml |multiple_choice | |belebele_war_Latn |lm_eval/tasks/belebele/belebele_war_Latn.yaml |multiple_choice | |belebele_wol_Latn |lm_eval/tasks/belebele/belebele_wol_Latn.yaml |multiple_choice | |belebele_xho_Latn |lm_eval/tasks/belebele/belebele_xho_Latn.yaml |multiple_choice | |belebele_yor_Latn |lm_eval/tasks/belebele/belebele_yor_Latn.yaml |multiple_choice | |belebele_zho_Hans |lm_eval/tasks/belebele/belebele_zho_Hans.yaml |multiple_choice | |belebele_zho_Hant |lm_eval/tasks/belebele/belebele_zho_Hant.yaml |multiple_choice | |belebele_zsm_Latn |lm_eval/tasks/belebele/belebele_zsm_Latn.yaml |multiple_choice | |belebele_zul_Latn |lm_eval/tasks/belebele/belebele_zul_Latn.yaml |multiple_choice | |bertaqa_en |lm_eval/tasks/bertaqa/bertaqa_en.yaml |multiple_choice | |bertaqa_en_mt_gemma-7b |lm_eval/tasks/bertaqa/bertaqa_en_mt_gemma-7b.yaml |multiple_choice | |bertaqa_en_mt_hitz |lm_eval/tasks/bertaqa/bertaqa_en_mt_hitz.yaml |multiple_choice | |bertaqa_en_mt_itzuli |lm_eval/tasks/bertaqa/bertaqa_en_mt_itzuli.yaml |multiple_choice | |bertaqa_en_mt_latxa-13b-v1 |lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-13b-v1.yaml |multiple_choice | |bertaqa_en_mt_latxa-13b-v1.1 |lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-13b-v1.1.yaml |multiple_choice | |bertaqa_en_mt_latxa-70b-v1 |lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-70b-v1.yaml |multiple_choice | |bertaqa_en_mt_latxa-70b-v1.1 |lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-70b-v1.1.yaml |multiple_choice | |bertaqa_en_mt_latxa-7b-v1 |lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-7b-v1.yaml |multiple_choice | |bertaqa_en_mt_latxa-7b-v1.1 |lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-7b-v1.1.yaml |multiple_choice | |bertaqa_en_mt_llama-2-13b |lm_eval/tasks/bertaqa/bertaqa_en_mt_llama-2-13b.yaml |multiple_choice | |bertaqa_en_mt_llama-2-70b |lm_eval/tasks/bertaqa/bertaqa_en_mt_llama-2-70b.yaml |multiple_choice | |bertaqa_en_mt_llama-2-7b |lm_eval/tasks/bertaqa/bertaqa_en_mt_llama-2-7b.yaml |multiple_choice | |bertaqa_en_mt_madlad |lm_eval/tasks/bertaqa/bertaqa_en_mt_madlad.yaml |multiple_choice | |bertaqa_en_mt_nllb |lm_eval/tasks/bertaqa/bertaqa_en_mt_nllb.yaml |multiple_choice | |bertaqa_eu |lm_eval/tasks/bertaqa/bertaqa_eu.yaml |multiple_choice | |bhtc_v2 |lm_eval/tasks/basqueglue/bhtc.yaml |multiple_choice | |bigbench_abstract_narrative_understanding_generate_until |lm_eval/tasks/bigbench/generate_until/abstract_narrative_understanding.yaml |generate_until | |bigbench_abstract_narrative_understanding_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml |multiple_choice | |bigbench_anachronisms_generate_until |lm_eval/tasks/bigbench/generate_until/anachronisms.yaml |generate_until | |bigbench_anachronisms_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml |multiple_choice | |bigbench_analogical_similarity_generate_until |lm_eval/tasks/bigbench/generate_until/analogical_similarity.yaml |generate_until | |bigbench_analogical_similarity_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml |multiple_choice | |bigbench_analytic_entailment_generate_until |lm_eval/tasks/bigbench/generate_until/analytic_entailment.yaml |generate_until | |bigbench_analytic_entailment_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml |multiple_choice | |bigbench_arithmetic_generate_until |lm_eval/tasks/bigbench/generate_until/arithmetic.yaml |generate_until | |bigbench_arithmetic_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml |multiple_choice | |bigbench_ascii_word_recognition_generate_until |lm_eval/tasks/bigbench/generate_until/ascii_word_recognition.yaml |generate_until | |bigbench_authorship_verification_generate_until |lm_eval/tasks/bigbench/generate_until/authorship_verification.yaml |generate_until | |bigbench_authorship_verification_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml |multiple_choice | |bigbench_auto_categorization_generate_until |lm_eval/tasks/bigbench/generate_until/auto_categorization.yaml |generate_until | |bigbench_auto_debugging_generate_until |lm_eval/tasks/bigbench/generate_until/auto_debugging.yaml |generate_until | |bigbench_bbq_lite_json_generate_until |lm_eval/tasks/bigbench/generate_until/bbq_lite_json.yaml |generate_until | |bigbench_bbq_lite_json_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml |multiple_choice | |bigbench_bridging_anaphora_resolution_barqa_generate_until |lm_eval/tasks/bigbench/generate_until/bridging_anaphora_resolution_barqa.yaml |generate_until | |bigbench_causal_judgment_generate_until |lm_eval/tasks/bigbench/generate_until/causal_judgment.yaml |generate_until | |bigbench_causal_judgment_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml |multiple_choice | |bigbench_cause_and_effect_generate_until |lm_eval/tasks/bigbench/generate_until/cause_and_effect.yaml |generate_until | |bigbench_cause_and_effect_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml |multiple_choice | |bigbench_checkmate_in_one_generate_until |lm_eval/tasks/bigbench/generate_until/checkmate_in_one.yaml |generate_until | |bigbench_checkmate_in_one_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml |multiple_choice | |bigbench_chess_state_tracking_generate_until |lm_eval/tasks/bigbench/generate_until/chess_state_tracking.yaml |generate_until | |bigbench_chinese_remainder_theorem_generate_until |lm_eval/tasks/bigbench/generate_until/chinese_remainder_theorem.yaml |generate_until | |bigbench_cifar10_classification_generate_until |lm_eval/tasks/bigbench/generate_until/cifar10_classification.yaml |generate_until | |bigbench_cifar10_classification_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml |multiple_choice | |bigbench_code_line_description_generate_until |lm_eval/tasks/bigbench/generate_until/code_line_description.yaml |generate_until | |bigbench_code_line_description_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml |multiple_choice | |bigbench_codenames_generate_until |lm_eval/tasks/bigbench/generate_until/codenames.yaml |generate_until | |bigbench_color_generate_until |lm_eval/tasks/bigbench/generate_until/color.yaml |generate_until | |bigbench_color_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/color.yaml |multiple_choice | |bigbench_common_morpheme_generate_until |lm_eval/tasks/bigbench/generate_until/common_morpheme.yaml |generate_until | |bigbench_common_morpheme_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml |multiple_choice | |bigbench_conceptual_combinations_generate_until |lm_eval/tasks/bigbench/generate_until/conceptual_combinations.yaml |generate_until | |bigbench_conceptual_combinations_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml |multiple_choice | |bigbench_conlang_translation_generate_until |lm_eval/tasks/bigbench/generate_until/conlang_translation.yaml |generate_until | |bigbench_contextual_parametric_knowledge_conflicts_generate_until |lm_eval/tasks/bigbench/generate_until/contextual_parametric_knowledge_conflicts.yaml |generate_until | |bigbench_contextual_parametric_knowledge_conflicts_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml |multiple_choice | |bigbench_crash_blossom_generate_until |lm_eval/tasks/bigbench/generate_until/crash_blossom.yaml |generate_until | |bigbench_crash_blossom_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml |multiple_choice | |bigbench_crass_ai_generate_until |lm_eval/tasks/bigbench/generate_until/crass_ai.yaml |generate_until | |bigbench_crass_ai_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml |multiple_choice | |bigbench_cryobiology_spanish_generate_until |lm_eval/tasks/bigbench/generate_until/cryobiology_spanish.yaml |generate_until | |bigbench_cryobiology_spanish_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml |multiple_choice | |bigbench_cryptonite_generate_until |lm_eval/tasks/bigbench/generate_until/cryptonite.yaml |generate_until | |bigbench_cs_algorithms_generate_until |lm_eval/tasks/bigbench/generate_until/cs_algorithms.yaml |generate_until | |bigbench_cs_algorithms_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml |multiple_choice | |bigbench_dark_humor_detection_generate_until |lm_eval/tasks/bigbench/generate_until/dark_humor_detection.yaml |generate_until | |bigbench_dark_humor_detection_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml |multiple_choice | |bigbench_date_understanding_generate_until |lm_eval/tasks/bigbench/generate_until/date_understanding.yaml |generate_until | |bigbench_date_understanding_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml |multiple_choice | |bigbench_disambiguation_qa_generate_until |lm_eval/tasks/bigbench/generate_until/disambiguation_qa.yaml |generate_until | |bigbench_disambiguation_qa_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml |multiple_choice | |bigbench_discourse_marker_prediction_generate_until |lm_eval/tasks/bigbench/generate_until/discourse_marker_prediction.yaml |generate_until | |bigbench_discourse_marker_prediction_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml |multiple_choice | |bigbench_disfl_qa_generate_until |lm_eval/tasks/bigbench/generate_until/disfl_qa.yaml |generate_until | |bigbench_dyck_languages_generate_until |lm_eval/tasks/bigbench/generate_until/dyck_languages.yaml |generate_until | |bigbench_dyck_languages_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml |multiple_choice | |bigbench_elementary_math_qa_generate_until |lm_eval/tasks/bigbench/generate_until/elementary_math_qa.yaml |generate_until | |bigbench_elementary_math_qa_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml |multiple_choice | |bigbench_emoji_movie_generate_until |lm_eval/tasks/bigbench/generate_until/emoji_movie.yaml |generate_until | |bigbench_emoji_movie_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml |multiple_choice | |bigbench_emojis_emotion_prediction_generate_until |lm_eval/tasks/bigbench/generate_until/emojis_emotion_prediction.yaml |generate_until | |bigbench_emojis_emotion_prediction_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml |multiple_choice | |bigbench_empirical_judgments_generate_until |lm_eval/tasks/bigbench/generate_until/empirical_judgments.yaml |generate_until | |bigbench_empirical_judgments_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml |multiple_choice | |bigbench_english_proverbs_generate_until |lm_eval/tasks/bigbench/generate_until/english_proverbs.yaml |generate_until | |bigbench_english_proverbs_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml |multiple_choice | |bigbench_english_russian_proverbs_generate_until |lm_eval/tasks/bigbench/generate_until/english_russian_proverbs.yaml |generate_until | |bigbench_english_russian_proverbs_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml |multiple_choice | |bigbench_entailed_polarity_generate_until |lm_eval/tasks/bigbench/generate_until/entailed_polarity.yaml |generate_until | |bigbench_entailed_polarity_hindi_generate_until |lm_eval/tasks/bigbench/generate_until/entailed_polarity_hindi.yaml |generate_until | |bigbench_entailed_polarity_hindi_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml |multiple_choice | |bigbench_entailed_polarity_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml |multiple_choice | |bigbench_epistemic_reasoning_generate_until |lm_eval/tasks/bigbench/generate_until/epistemic_reasoning.yaml |generate_until | |bigbench_epistemic_reasoning_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml |multiple_choice | |bigbench_evaluating_information_essentiality_generate_until |lm_eval/tasks/bigbench/generate_until/evaluating_information_essentiality.yaml |generate_until | |bigbench_evaluating_information_essentiality_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml |multiple_choice | |bigbench_fact_checker_generate_until |lm_eval/tasks/bigbench/generate_until/fact_checker.yaml |generate_until | |bigbench_fact_checker_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml |multiple_choice | |bigbench_fantasy_reasoning_generate_until |lm_eval/tasks/bigbench/generate_until/fantasy_reasoning.yaml |generate_until | |bigbench_fantasy_reasoning_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml |multiple_choice | |bigbench_few_shot_nlg_generate_until |lm_eval/tasks/bigbench/generate_until/few_shot_nlg.yaml |generate_until | |bigbench_figure_of_speech_detection_generate_until |lm_eval/tasks/bigbench/generate_until/figure_of_speech_detection.yaml |generate_until | |bigbench_figure_of_speech_detection_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml |multiple_choice | |bigbench_formal_fallacies_syllogisms_negation_generate_until |lm_eval/tasks/bigbench/generate_until/formal_fallacies_syllogisms_negation.yaml |generate_until | |bigbench_formal_fallacies_syllogisms_negation_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml |multiple_choice | |bigbench_gem_generate_until |lm_eval/tasks/bigbench/generate_until/gem.yaml |generate_until | |bigbench_gender_inclusive_sentences_german_generate_until |lm_eval/tasks/bigbench/generate_until/gender_inclusive_sentences_german.yaml |generate_until | |bigbench_general_knowledge_generate_until |lm_eval/tasks/bigbench/generate_until/general_knowledge.yaml |generate_until | |bigbench_general_knowledge_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml |multiple_choice | |bigbench_geometric_shapes_generate_until |lm_eval/tasks/bigbench/generate_until/geometric_shapes.yaml |generate_until | |bigbench_geometric_shapes_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml |multiple_choice | |bigbench_goal_step_wikihow_generate_until |lm_eval/tasks/bigbench/generate_until/goal_step_wikihow.yaml |generate_until | |bigbench_goal_step_wikihow_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml |multiple_choice | |bigbench_gre_reading_comprehension_generate_until |lm_eval/tasks/bigbench/generate_until/gre_reading_comprehension.yaml |generate_until | |bigbench_gre_reading_comprehension_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml |multiple_choice | |bigbench_hhh_alignment_generate_until |lm_eval/tasks/bigbench/generate_until/hhh_alignment.yaml |generate_until | |bigbench_hhh_alignment_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml |multiple_choice | |bigbench_hindi_question_answering_generate_until |lm_eval/tasks/bigbench/generate_until/hindi_question_answering.yaml |generate_until | |bigbench_hindu_knowledge_generate_until |lm_eval/tasks/bigbench/generate_until/hindu_knowledge.yaml |generate_until | |bigbench_hindu_knowledge_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml |multiple_choice | |bigbench_hinglish_toxicity_generate_until |lm_eval/tasks/bigbench/generate_until/hinglish_toxicity.yaml |generate_until | |bigbench_hinglish_toxicity_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml |multiple_choice | |bigbench_human_organs_senses_generate_until |lm_eval/tasks/bigbench/generate_until/human_organs_senses.yaml |generate_until | |bigbench_human_organs_senses_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml |multiple_choice | |bigbench_hyperbaton_generate_until |lm_eval/tasks/bigbench/generate_until/hyperbaton.yaml |generate_until | |bigbench_hyperbaton_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml |multiple_choice | |bigbench_identify_math_theorems_generate_until |lm_eval/tasks/bigbench/generate_until/identify_math_theorems.yaml |generate_until | |bigbench_identify_math_theorems_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml |multiple_choice | |bigbench_identify_odd_metaphor_generate_until |lm_eval/tasks/bigbench/generate_until/identify_odd_metaphor.yaml |generate_until | |bigbench_identify_odd_metaphor_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml |multiple_choice | |bigbench_implicatures_generate_until |lm_eval/tasks/bigbench/generate_until/implicatures.yaml |generate_until | |bigbench_implicatures_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml |multiple_choice | |bigbench_implicit_relations_generate_until |lm_eval/tasks/bigbench/generate_until/implicit_relations.yaml |generate_until | |bigbench_implicit_relations_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml |multiple_choice | |bigbench_intent_recognition_generate_until |lm_eval/tasks/bigbench/generate_until/intent_recognition.yaml |generate_until | |bigbench_intent_recognition_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml |multiple_choice | |bigbench_international_phonetic_alphabet_nli_generate_until |lm_eval/tasks/bigbench/generate_until/international_phonetic_alphabet_nli.yaml |generate_until | |bigbench_international_phonetic_alphabet_nli_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml |multiple_choice | |bigbench_international_phonetic_alphabet_transliterate_generate_until |lm_eval/tasks/bigbench/generate_until/international_phonetic_alphabet_transliterate.yaml |generate_until | |bigbench_intersect_geometry_generate_until |lm_eval/tasks/bigbench/generate_until/intersect_geometry.yaml |generate_until | |bigbench_intersect_geometry_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml |multiple_choice | |bigbench_irony_identification_generate_until |lm_eval/tasks/bigbench/generate_until/irony_identification.yaml |generate_until | |bigbench_irony_identification_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml |multiple_choice | |bigbench_kanji_ascii_generate_until |lm_eval/tasks/bigbench/generate_until/kanji_ascii.yaml |generate_until | |bigbench_kanji_ascii_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml |multiple_choice | |bigbench_kannada_generate_until |lm_eval/tasks/bigbench/generate_until/kannada.yaml |generate_until | |bigbench_kannada_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/kannada.yaml |multiple_choice | |bigbench_key_value_maps_generate_until |lm_eval/tasks/bigbench/generate_until/key_value_maps.yaml |generate_until | |bigbench_key_value_maps_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml |multiple_choice | |bigbench_known_unknowns_generate_until |lm_eval/tasks/bigbench/generate_until/known_unknowns.yaml |generate_until | |bigbench_known_unknowns_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml |multiple_choice | |bigbench_language_games_generate_until |lm_eval/tasks/bigbench/generate_until/language_games.yaml |generate_until | |bigbench_language_identification_generate_until |lm_eval/tasks/bigbench/generate_until/language_identification.yaml |generate_until | |bigbench_language_identification_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml |multiple_choice | |bigbench_linguistic_mappings_generate_until |lm_eval/tasks/bigbench/generate_until/linguistic_mappings.yaml |generate_until | |bigbench_linguistics_puzzles_generate_until |lm_eval/tasks/bigbench/generate_until/linguistics_puzzles.yaml |generate_until | |bigbench_list_functions_generate_until |lm_eval/tasks/bigbench/generate_until/list_functions.yaml |generate_until | |bigbench_logic_grid_puzzle_generate_until |lm_eval/tasks/bigbench/generate_until/logic_grid_puzzle.yaml |generate_until | |bigbench_logic_grid_puzzle_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml |multiple_choice | |bigbench_logical_args_generate_until |lm_eval/tasks/bigbench/generate_until/logical_args.yaml |generate_until | |bigbench_logical_args_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml |multiple_choice | |bigbench_logical_deduction_generate_until |lm_eval/tasks/bigbench/generate_until/logical_deduction.yaml |generate_until | |bigbench_logical_deduction_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml |multiple_choice | |bigbench_logical_fallacy_detection_generate_until |lm_eval/tasks/bigbench/generate_until/logical_fallacy_detection.yaml |generate_until | |bigbench_logical_fallacy_detection_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml |multiple_choice | |bigbench_logical_sequence_generate_until |lm_eval/tasks/bigbench/generate_until/logical_sequence.yaml |generate_until | |bigbench_logical_sequence_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml |multiple_choice | |bigbench_mathematical_induction_generate_until |lm_eval/tasks/bigbench/generate_until/mathematical_induction.yaml |generate_until | |bigbench_mathematical_induction_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml |multiple_choice | |bigbench_matrixshapes_generate_until |lm_eval/tasks/bigbench/generate_until/matrixshapes.yaml |generate_until | |bigbench_metaphor_boolean_generate_until |lm_eval/tasks/bigbench/generate_until/metaphor_boolean.yaml |generate_until | |bigbench_metaphor_boolean_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml |multiple_choice | |bigbench_metaphor_understanding_generate_until |lm_eval/tasks/bigbench/generate_until/metaphor_understanding.yaml |generate_until | |bigbench_metaphor_understanding_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml |multiple_choice | |bigbench_minute_mysteries_qa_generate_until |lm_eval/tasks/bigbench/generate_until/minute_mysteries_qa.yaml |generate_until | |bigbench_misconceptions_generate_until |lm_eval/tasks/bigbench/generate_until/misconceptions.yaml |generate_until | |bigbench_misconceptions_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml |multiple_choice | |bigbench_misconceptions_russian_generate_until |lm_eval/tasks/bigbench/generate_until/misconceptions_russian.yaml |generate_until | |bigbench_misconceptions_russian_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml |multiple_choice | |bigbench_mnist_ascii_generate_until |lm_eval/tasks/bigbench/generate_until/mnist_ascii.yaml |generate_until | |bigbench_mnist_ascii_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml |multiple_choice | |bigbench_modified_arithmetic_generate_until |lm_eval/tasks/bigbench/generate_until/modified_arithmetic.yaml |generate_until | |bigbench_moral_permissibility_generate_until |lm_eval/tasks/bigbench/generate_until/moral_permissibility.yaml |generate_until | |bigbench_moral_permissibility_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml |multiple_choice | |bigbench_movie_dialog_same_or_different_generate_until |lm_eval/tasks/bigbench/generate_until/movie_dialog_same_or_different.yaml |generate_until | |bigbench_movie_dialog_same_or_different_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml |multiple_choice | |bigbench_movie_recommendation_generate_until |lm_eval/tasks/bigbench/generate_until/movie_recommendation.yaml |generate_until | |bigbench_movie_recommendation_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml |multiple_choice | |bigbench_mult_data_wrangling_generate_until |lm_eval/tasks/bigbench/generate_until/mult_data_wrangling.yaml |generate_until | |bigbench_multiemo_generate_until |lm_eval/tasks/bigbench/generate_until/multiemo.yaml |generate_until | |bigbench_multiemo_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml |multiple_choice | |bigbench_natural_instructions_generate_until |lm_eval/tasks/bigbench/generate_until/natural_instructions.yaml |generate_until | |bigbench_navigate_generate_until |lm_eval/tasks/bigbench/generate_until/navigate.yaml |generate_until | |bigbench_navigate_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/navigate.yaml |multiple_choice | |bigbench_nonsense_words_grammar_generate_until |lm_eval/tasks/bigbench/generate_until/nonsense_words_grammar.yaml |generate_until | |bigbench_nonsense_words_grammar_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml |multiple_choice | |bigbench_novel_concepts_generate_until |lm_eval/tasks/bigbench/generate_until/novel_concepts.yaml |generate_until | |bigbench_novel_concepts_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml |multiple_choice | |bigbench_object_counting_generate_until |lm_eval/tasks/bigbench/generate_until/object_counting.yaml |generate_until | |bigbench_odd_one_out_generate_until |lm_eval/tasks/bigbench/generate_until/odd_one_out.yaml |generate_until | |bigbench_odd_one_out_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml |multiple_choice | |bigbench_operators_generate_until |lm_eval/tasks/bigbench/generate_until/operators.yaml |generate_until | |bigbench_paragraph_segmentation_generate_until |lm_eval/tasks/bigbench/generate_until/paragraph_segmentation.yaml |generate_until | |bigbench_parsinlu_qa_generate_until |lm_eval/tasks/bigbench/generate_until/parsinlu_qa.yaml |generate_until | |bigbench_parsinlu_qa_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml |multiple_choice | |bigbench_parsinlu_reading_comprehension_generate_until |lm_eval/tasks/bigbench/generate_until/parsinlu_reading_comprehension.yaml |generate_until | |bigbench_penguins_in_a_table_generate_until |lm_eval/tasks/bigbench/generate_until/penguins_in_a_table.yaml |generate_until | |bigbench_penguins_in_a_table_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml |multiple_choice | |bigbench_periodic_elements_generate_until |lm_eval/tasks/bigbench/generate_until/periodic_elements.yaml |generate_until | |bigbench_periodic_elements_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml |multiple_choice | |bigbench_persian_idioms_generate_until |lm_eval/tasks/bigbench/generate_until/persian_idioms.yaml |generate_until | |bigbench_persian_idioms_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml |multiple_choice | |bigbench_phrase_relatedness_generate_until |lm_eval/tasks/bigbench/generate_until/phrase_relatedness.yaml |generate_until | |bigbench_phrase_relatedness_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml |multiple_choice | |bigbench_physical_intuition_generate_until |lm_eval/tasks/bigbench/generate_until/physical_intuition.yaml |generate_until | |bigbench_physical_intuition_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml |multiple_choice | |bigbench_physics_generate_until |lm_eval/tasks/bigbench/generate_until/physics.yaml |generate_until | |bigbench_physics_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/physics.yaml |multiple_choice | |bigbench_physics_questions_generate_until |lm_eval/tasks/bigbench/generate_until/physics_questions.yaml |generate_until | |bigbench_play_dialog_same_or_different_generate_until |lm_eval/tasks/bigbench/generate_until/play_dialog_same_or_different.yaml |generate_until | |bigbench_play_dialog_same_or_different_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml |multiple_choice | |bigbench_polish_sequence_labeling_generate_until |lm_eval/tasks/bigbench/generate_until/polish_sequence_labeling.yaml |generate_until | |bigbench_presuppositions_as_nli_generate_until |lm_eval/tasks/bigbench/generate_until/presuppositions_as_nli.yaml |generate_until | |bigbench_presuppositions_as_nli_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml |multiple_choice | |bigbench_qa_wikidata_generate_until |lm_eval/tasks/bigbench/generate_until/qa_wikidata.yaml |generate_until | |bigbench_question_selection_generate_until |lm_eval/tasks/bigbench/generate_until/question_selection.yaml |generate_until | |bigbench_question_selection_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml |multiple_choice | |bigbench_real_or_fake_text_generate_until |lm_eval/tasks/bigbench/generate_until/real_or_fake_text.yaml |generate_until | |bigbench_real_or_fake_text_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml |multiple_choice | |bigbench_reasoning_about_colored_objects_generate_until |lm_eval/tasks/bigbench/generate_until/reasoning_about_colored_objects.yaml |generate_until | |bigbench_reasoning_about_colored_objects_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml |multiple_choice | |bigbench_repeat_copy_logic_generate_until |lm_eval/tasks/bigbench/generate_until/repeat_copy_logic.yaml |generate_until | |bigbench_rephrase_generate_until |lm_eval/tasks/bigbench/generate_until/rephrase.yaml |generate_until | |bigbench_riddle_sense_generate_until |lm_eval/tasks/bigbench/generate_until/riddle_sense.yaml |generate_until | |bigbench_riddle_sense_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml |multiple_choice | |bigbench_ruin_names_generate_until |lm_eval/tasks/bigbench/generate_until/ruin_names.yaml |generate_until | |bigbench_ruin_names_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml |multiple_choice | |bigbench_salient_translation_error_detection_generate_until |lm_eval/tasks/bigbench/generate_until/salient_translation_error_detection.yaml |generate_until | |bigbench_salient_translation_error_detection_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml |multiple_choice | |bigbench_scientific_press_release_generate_until |lm_eval/tasks/bigbench/generate_until/scientific_press_release.yaml |generate_until | |bigbench_semantic_parsing_in_context_sparc_generate_until |lm_eval/tasks/bigbench/generate_until/semantic_parsing_in_context_sparc.yaml |generate_until | |bigbench_semantic_parsing_spider_generate_until |lm_eval/tasks/bigbench/generate_until/semantic_parsing_spider.yaml |generate_until | |bigbench_sentence_ambiguity_generate_until |lm_eval/tasks/bigbench/generate_until/sentence_ambiguity.yaml |generate_until | |bigbench_sentence_ambiguity_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml |multiple_choice | |bigbench_similarities_abstraction_generate_until |lm_eval/tasks/bigbench/generate_until/similarities_abstraction.yaml |generate_until | |bigbench_similarities_abstraction_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml |multiple_choice | |bigbench_simp_turing_concept_generate_until |lm_eval/tasks/bigbench/generate_until/simp_turing_concept.yaml |generate_until | |bigbench_simple_arithmetic_json_generate_until |lm_eval/tasks/bigbench/generate_until/simple_arithmetic_json.yaml |generate_until | |bigbench_simple_arithmetic_json_multiple_choice_generate_until |lm_eval/tasks/bigbench/generate_until/simple_arithmetic_json_multiple_choice.yaml |generate_until | |bigbench_simple_arithmetic_json_subtasks_generate_until |lm_eval/tasks/bigbench/generate_until/simple_arithmetic_json_subtasks.yaml |generate_until | |bigbench_simple_arithmetic_multiple_targets_json_generate_until |lm_eval/tasks/bigbench/generate_until/simple_arithmetic_multiple_targets_json.yaml |generate_until | |bigbench_simple_ethical_questions_generate_until |lm_eval/tasks/bigbench/generate_until/simple_ethical_questions.yaml |generate_until | |bigbench_simple_ethical_questions_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml |multiple_choice | |bigbench_simple_text_editing_generate_until |lm_eval/tasks/bigbench/generate_until/simple_text_editing.yaml |generate_until | |bigbench_snarks_generate_until |lm_eval/tasks/bigbench/generate_until/snarks.yaml |generate_until | |bigbench_snarks_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/snarks.yaml |multiple_choice | |bigbench_social_iqa_generate_until |lm_eval/tasks/bigbench/generate_until/social_iqa.yaml |generate_until | |bigbench_social_iqa_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml |multiple_choice | |bigbench_social_support_generate_until |lm_eval/tasks/bigbench/generate_until/social_support.yaml |generate_until | |bigbench_social_support_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/social_support.yaml |multiple_choice | |bigbench_sports_understanding_generate_until |lm_eval/tasks/bigbench/generate_until/sports_understanding.yaml |generate_until | |bigbench_sports_understanding_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml |multiple_choice | |bigbench_strange_stories_generate_until |lm_eval/tasks/bigbench/generate_until/strange_stories.yaml |generate_until | |bigbench_strange_stories_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml |multiple_choice | |bigbench_strategyqa_generate_until |lm_eval/tasks/bigbench/generate_until/strategyqa.yaml |generate_until | |bigbench_strategyqa_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml |multiple_choice | |bigbench_sufficient_information_generate_until |lm_eval/tasks/bigbench/generate_until/sufficient_information.yaml |generate_until | |bigbench_suicide_risk_generate_until |lm_eval/tasks/bigbench/generate_until/suicide_risk.yaml |generate_until | |bigbench_suicide_risk_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml |multiple_choice | |bigbench_swahili_english_proverbs_generate_until |lm_eval/tasks/bigbench/generate_until/swahili_english_proverbs.yaml |generate_until | |bigbench_swahili_english_proverbs_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml |multiple_choice | |bigbench_swedish_to_german_proverbs_generate_until |lm_eval/tasks/bigbench/generate_until/swedish_to_german_proverbs.yaml |generate_until | |bigbench_swedish_to_german_proverbs_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml |multiple_choice | |bigbench_symbol_interpretation_generate_until |lm_eval/tasks/bigbench/generate_until/symbol_interpretation.yaml |generate_until | |bigbench_symbol_interpretation_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml |multiple_choice | |bigbench_temporal_sequences_generate_until |lm_eval/tasks/bigbench/generate_until/temporal_sequences.yaml |generate_until | |bigbench_temporal_sequences_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml |multiple_choice | |bigbench_tense_generate_until |lm_eval/tasks/bigbench/generate_until/tense.yaml |generate_until | |bigbench_timedial_generate_until |lm_eval/tasks/bigbench/generate_until/timedial.yaml |generate_until | |bigbench_timedial_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/timedial.yaml |multiple_choice | |bigbench_topical_chat_generate_until |lm_eval/tasks/bigbench/generate_until/topical_chat.yaml |generate_until | |bigbench_tracking_shuffled_objects_generate_until |lm_eval/tasks/bigbench/generate_until/tracking_shuffled_objects.yaml |generate_until | |bigbench_tracking_shuffled_objects_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml |multiple_choice | |bigbench_understanding_fables_generate_until |lm_eval/tasks/bigbench/generate_until/understanding_fables.yaml |generate_until | |bigbench_understanding_fables_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml |multiple_choice | |bigbench_undo_permutation_generate_until |lm_eval/tasks/bigbench/generate_until/undo_permutation.yaml |generate_until | |bigbench_undo_permutation_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml |multiple_choice | |bigbench_unit_conversion_generate_until |lm_eval/tasks/bigbench/generate_until/unit_conversion.yaml |generate_until | |bigbench_unit_conversion_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml |multiple_choice | |bigbench_unit_interpretation_generate_until |lm_eval/tasks/bigbench/generate_until/unit_interpretation.yaml |generate_until | |bigbench_unit_interpretation_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml |multiple_choice | |bigbench_unnatural_in_context_learning_generate_until |lm_eval/tasks/bigbench/generate_until/unnatural_in_context_learning.yaml |generate_until | |bigbench_vitaminc_fact_verification_generate_until |lm_eval/tasks/bigbench/generate_until/vitaminc_fact_verification.yaml |generate_until | |bigbench_vitaminc_fact_verification_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml |multiple_choice | |bigbench_what_is_the_tao_generate_until |lm_eval/tasks/bigbench/generate_until/what_is_the_tao.yaml |generate_until | |bigbench_what_is_the_tao_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml |multiple_choice | |bigbench_which_wiki_edit_generate_until |lm_eval/tasks/bigbench/generate_until/which_wiki_edit.yaml |generate_until | |bigbench_which_wiki_edit_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml |multiple_choice | |bigbench_winowhy_generate_until |lm_eval/tasks/bigbench/generate_until/winowhy.yaml |generate_until | |bigbench_winowhy_multiple_choice |lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml |multiple_choice | |bigbench_word_sorting_generate_until |lm_eval/tasks/bigbench/generate_until/word_sorting.yaml |generate_until | |bigbench_word_unscrambling_generate_until |lm_eval/tasks/bigbench/generate_until/word_unscrambling.yaml |generate_until | |blimp_adjunct_island |lm_eval/tasks/blimp/adjunct_island.yaml |multiple_choice | |blimp_anaphor_gender_agreement |lm_eval/tasks/blimp/anaphor_gender_agreement.yaml |multiple_choice | |blimp_anaphor_number_agreement |lm_eval/tasks/blimp/anaphor_number_agreement.yaml |multiple_choice | |blimp_animate_subject_passive |lm_eval/tasks/blimp/animate_subject_passive.yaml |multiple_choice | |blimp_animate_subject_trans |lm_eval/tasks/blimp/animate_subject_trans.yaml |multiple_choice | |blimp_causative |lm_eval/tasks/blimp/causative.yaml |multiple_choice | |blimp_complex_NP_island |lm_eval/tasks/blimp/complex_NP_island.yaml |multiple_choice | |blimp_coordinate_structure_constraint_complex_left_branch |lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml |multiple_choice | |blimp_coordinate_structure_constraint_object_extraction |lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml |multiple_choice | |blimp_determiner_noun_agreement_1 |lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml |multiple_choice | |blimp_determiner_noun_agreement_2 |lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml |multiple_choice | |blimp_determiner_noun_agreement_irregular_1 |lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml |multiple_choice | |blimp_determiner_noun_agreement_irregular_2 |lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml |multiple_choice | |blimp_determiner_noun_agreement_with_adj_2 |lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml |multiple_choice | |blimp_determiner_noun_agreement_with_adj_irregular_1 |lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml |multiple_choice | |blimp_determiner_noun_agreement_with_adj_irregular_2 |lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml |multiple_choice | |blimp_determiner_noun_agreement_with_adjective_1 |lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml |multiple_choice | |blimp_distractor_agreement_relational_noun |lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml |multiple_choice | |blimp_distractor_agreement_relative_clause |lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml |multiple_choice | |blimp_drop_argument |lm_eval/tasks/blimp/drop_argument.yaml |multiple_choice | |blimp_ellipsis_n_bar_1 |lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml |multiple_choice | |blimp_ellipsis_n_bar_2 |lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml |multiple_choice | |blimp_existential_there_object_raising |lm_eval/tasks/blimp/existential_there_object_raising.yaml |multiple_choice | |blimp_existential_there_quantifiers_1 |lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml |multiple_choice | |blimp_existential_there_quantifiers_2 |lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml |multiple_choice | |blimp_existential_there_subject_raising |lm_eval/tasks/blimp/existential_there_subject_raising.yaml |multiple_choice | |blimp_expletive_it_object_raising |lm_eval/tasks/blimp/expletive_it_object_raising.yaml |multiple_choice | |blimp_inchoative |lm_eval/tasks/blimp/inchoative.yaml |multiple_choice | |blimp_intransitive |lm_eval/tasks/blimp/intransitive.yaml |multiple_choice | |blimp_irregular_past_participle_adjectives |lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml |multiple_choice | |blimp_irregular_past_participle_verbs |lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml |multiple_choice | |blimp_irregular_plural_subject_verb_agreement_1 |lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml |multiple_choice | |blimp_irregular_plural_subject_verb_agreement_2 |lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml |multiple_choice | |blimp_left_branch_island_echo_question |lm_eval/tasks/blimp/left_branch_island_echo_question.yaml |multiple_choice | |blimp_left_branch_island_simple_question |lm_eval/tasks/blimp/left_branch_island_simple_question.yaml |multiple_choice | |blimp_matrix_question_npi_licensor_present |lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml |multiple_choice | |blimp_npi_present_1 |lm_eval/tasks/blimp/npi_present_1.yaml |multiple_choice | |blimp_npi_present_2 |lm_eval/tasks/blimp/npi_present_2.yaml |multiple_choice | |blimp_only_npi_licensor_present |lm_eval/tasks/blimp/only_npi_licensor_present.yaml |multiple_choice | |blimp_only_npi_scope |lm_eval/tasks/blimp/only_npi_scope.yaml |multiple_choice | |blimp_passive_1 |lm_eval/tasks/blimp/passive_1.yaml |multiple_choice | |blimp_passive_2 |lm_eval/tasks/blimp/passive_2.yaml |multiple_choice | |blimp_principle_A_c_command |lm_eval/tasks/blimp/principle_A_c_command.yaml |multiple_choice | |blimp_principle_A_case_1 |lm_eval/tasks/blimp/principle_A_case_1.yaml |multiple_choice | |blimp_principle_A_case_2 |lm_eval/tasks/blimp/principle_A_case_2.yaml |multiple_choice | |blimp_principle_A_domain_1 |lm_eval/tasks/blimp/principle_A_domain_1.yaml |multiple_choice | |blimp_principle_A_domain_2 |lm_eval/tasks/blimp/principle_A_domain_2.yaml |multiple_choice | |blimp_principle_A_domain_3 |lm_eval/tasks/blimp/principle_A_domain_3.yaml |multiple_choice | |blimp_principle_A_reconstruction |lm_eval/tasks/blimp/principle_A_reconstruction.yaml |multiple_choice | |blimp_regular_plural_subject_verb_agreement_1 |lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml |multiple_choice | |blimp_regular_plural_subject_verb_agreement_2 |lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml |multiple_choice | |blimp_sentential_negation_npi_licensor_present |lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml |multiple_choice | |blimp_sentential_negation_npi_scope |lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml |multiple_choice | |blimp_sentential_subject_island |lm_eval/tasks/blimp/sentential_subject_island.yaml |multiple_choice | |blimp_superlative_quantifiers_1 |lm_eval/tasks/blimp/superlative_quantifiers_1.yaml |multiple_choice | |blimp_superlative_quantifiers_2 |lm_eval/tasks/blimp/superlative_quantifiers_2.yaml |multiple_choice | |blimp_tough_vs_raising_1 |lm_eval/tasks/blimp/tough_vs_raising_1.yaml |multiple_choice | |blimp_tough_vs_raising_2 |lm_eval/tasks/blimp/tough_vs_raising_2.yaml |multiple_choice | |blimp_transitive |lm_eval/tasks/blimp/transitive.yaml |multiple_choice | |blimp_wh_island |lm_eval/tasks/blimp/wh_island.yaml |multiple_choice | |blimp_wh_questions_object_gap |lm_eval/tasks/blimp/wh_questions_object_gap.yaml |multiple_choice | |blimp_wh_questions_subject_gap |lm_eval/tasks/blimp/wh_questions_subject_gap.yaml |multiple_choice | |blimp_wh_questions_subject_gap_long_distance |lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml |multiple_choice | |blimp_wh_vs_that_no_gap |lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml |multiple_choice | |blimp_wh_vs_that_no_gap_long_distance |lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml |multiple_choice | |blimp_wh_vs_that_with_gap |lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml |multiple_choice | |blimp_wh_vs_that_with_gap_long_distance |lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml |multiple_choice | |boolq |lm_eval/tasks/super_glue/boolq/default.yaml |multiple_choice | |boolq-seq2seq |lm_eval/tasks/super_glue/boolq/seq2seq.yaml |generate_until | |cb |lm_eval/tasks/super_glue/cb/default.yaml |multiple_choice | |ceval-valid_accountant |lm_eval/tasks/ceval/ceval-valid_accountant.yaml |multiple_choice | |ceval-valid_advanced_mathematics |lm_eval/tasks/ceval/ceval-valid_advanced_mathematics.yaml |multiple_choice | |ceval-valid_art_studies |lm_eval/tasks/ceval/ceval-valid_art_studies.yaml |multiple_choice | |ceval-valid_basic_medicine |lm_eval/tasks/ceval/ceval-valid_basic_medicine.yaml |multiple_choice | |ceval-valid_business_administration |lm_eval/tasks/ceval/ceval-valid_business_administration.yaml |multiple_choice | |ceval-valid_chinese_language_and_literature |lm_eval/tasks/ceval/ceval-valid_chinese_language_and_literature.yaml |multiple_choice | |ceval-valid_civil_servant |lm_eval/tasks/ceval/ceval-valid_civil_servant.yaml |multiple_choice | |ceval-valid_clinical_medicine |lm_eval/tasks/ceval/ceval-valid_clinical_medicine.yaml |multiple_choice | |ceval-valid_college_chemistry |lm_eval/tasks/ceval/ceval-valid_college_chemistry.yaml |multiple_choice | |ceval-valid_college_economics |lm_eval/tasks/ceval/ceval-valid_college_economics.yaml |multiple_choice | |ceval-valid_college_physics |lm_eval/tasks/ceval/ceval-valid_college_physics.yaml |multiple_choice | |ceval-valid_college_programming |lm_eval/tasks/ceval/ceval-valid_college_programming.yaml |multiple_choice | |ceval-valid_computer_architecture |lm_eval/tasks/ceval/ceval-valid_computer_architecture.yaml |multiple_choice | |ceval-valid_computer_network |lm_eval/tasks/ceval/ceval-valid_computer_network.yaml |multiple_choice | |ceval-valid_discrete_mathematics |lm_eval/tasks/ceval/ceval-valid_discrete_mathematics.yaml |multiple_choice | |ceval-valid_education_science |lm_eval/tasks/ceval/ceval-valid_education_science.yaml |multiple_choice | |ceval-valid_electrical_engineer |lm_eval/tasks/ceval/ceval-valid_electrical_engineer.yaml |multiple_choice | |ceval-valid_environmental_impact_assessment_engineer |lm_eval/tasks/ceval/ceval-valid_environmental_impact_assessment_engineer.yaml |multiple_choice | |ceval-valid_fire_engineer |lm_eval/tasks/ceval/ceval-valid_fire_engineer.yaml |multiple_choice | |ceval-valid_high_school_biology |lm_eval/tasks/ceval/ceval-valid_high_school_biology.yaml |multiple_choice | |ceval-valid_high_school_chemistry |lm_eval/tasks/ceval/ceval-valid_high_school_chemistry.yaml |multiple_choice | |ceval-valid_high_school_chinese |lm_eval/tasks/ceval/ceval-valid_high_school_chinese.yaml |multiple_choice | |ceval-valid_high_school_geography |lm_eval/tasks/ceval/ceval-valid_high_school_geography.yaml |multiple_choice | |ceval-valid_high_school_history |lm_eval/tasks/ceval/ceval-valid_high_school_history.yaml |multiple_choice | |ceval-valid_high_school_mathematics |lm_eval/tasks/ceval/ceval-valid_high_school_mathematics.yaml |multiple_choice | |ceval-valid_high_school_physics |lm_eval/tasks/ceval/ceval-valid_high_school_physics.yaml |multiple_choice | |ceval-valid_high_school_politics |lm_eval/tasks/ceval/ceval-valid_high_school_politics.yaml |multiple_choice | |ceval-valid_ideological_and_moral_cultivation |lm_eval/tasks/ceval/ceval-valid_ideological_and_moral_cultivation.yaml |multiple_choice | |ceval-valid_law |lm_eval/tasks/ceval/ceval-valid_law.yaml |multiple_choice | |ceval-valid_legal_professional |lm_eval/tasks/ceval/ceval-valid_legal_professional.yaml |multiple_choice | |ceval-valid_logic |lm_eval/tasks/ceval/ceval-valid_logic.yaml |multiple_choice | |ceval-valid_mao_zedong_thought |lm_eval/tasks/ceval/ceval-valid_mao_zedong_thought.yaml |multiple_choice | |ceval-valid_marxism |lm_eval/tasks/ceval/ceval-valid_marxism.yaml |multiple_choice | |ceval-valid_metrology_engineer |lm_eval/tasks/ceval/ceval-valid_metrology_engineer.yaml |multiple_choice | |ceval-valid_middle_school_biology |lm_eval/tasks/ceval/ceval-valid_middle_school_biology.yaml |multiple_choice | |ceval-valid_middle_school_chemistry |lm_eval/tasks/ceval/ceval-valid_middle_school_chemistry.yaml |multiple_choice | |ceval-valid_middle_school_geography |lm_eval/tasks/ceval/ceval-valid_middle_school_geography.yaml |multiple_choice | |ceval-valid_middle_school_history |lm_eval/tasks/ceval/ceval-valid_middle_school_history.yaml |multiple_choice | |ceval-valid_middle_school_mathematics |lm_eval/tasks/ceval/ceval-valid_middle_school_mathematics.yaml |multiple_choice | |ceval-valid_middle_school_physics |lm_eval/tasks/ceval/ceval-valid_middle_school_physics.yaml |multiple_choice | |ceval-valid_middle_school_politics |lm_eval/tasks/ceval/ceval-valid_middle_school_politics.yaml |multiple_choice | |ceval-valid_modern_chinese_history |lm_eval/tasks/ceval/ceval-valid_modern_chinese_history.yaml |multiple_choice | |ceval-valid_operating_system |lm_eval/tasks/ceval/ceval-valid_operating_system.yaml |multiple_choice | |ceval-valid_physician |lm_eval/tasks/ceval/ceval-valid_physician.yaml |multiple_choice | |ceval-valid_plant_protection |lm_eval/tasks/ceval/ceval-valid_plant_protection.yaml |multiple_choice | |ceval-valid_probability_and_statistics |lm_eval/tasks/ceval/ceval-valid_probability_and_statistics.yaml |multiple_choice | |ceval-valid_professional_tour_guide |lm_eval/tasks/ceval/ceval-valid_professional_tour_guide.yaml |multiple_choice | |ceval-valid_sports_science |lm_eval/tasks/ceval/ceval-valid_sports_science.yaml |multiple_choice | |ceval-valid_tax_accountant |lm_eval/tasks/ceval/ceval-valid_tax_accountant.yaml |multiple_choice | |ceval-valid_teacher_qualification |lm_eval/tasks/ceval/ceval-valid_teacher_qualification.yaml |multiple_choice | |ceval-valid_urban_and_rural_planner |lm_eval/tasks/ceval/ceval-valid_urban_and_rural_planner.yaml |multiple_choice | |ceval-valid_veterinary_medicine |lm_eval/tasks/ceval/ceval-valid_veterinary_medicine.yaml |multiple_choice | |cmmlu_agronomy |lm_eval/tasks/cmmlu/cmmlu_default_agronomy.yaml |multiple_choice | |cmmlu_anatomy |lm_eval/tasks/cmmlu/cmmlu_default_anatomy.yaml |multiple_choice | |cmmlu_ancient_chinese |lm_eval/tasks/cmmlu/cmmlu_default_ancient_chinese.yaml |multiple_choice | |cmmlu_arts |lm_eval/tasks/cmmlu/cmmlu_arts.yaml |multiple_choice | |cmmlu_astronomy |lm_eval/tasks/cmmlu/cmmlu_default_astronomy.yaml |multiple_choice | |cmmlu_business_ethics |lm_eval/tasks/cmmlu/cmmlu_business_ethics.yaml |multiple_choice | |cmmlu_chinese_civil_service_exam |lm_eval/tasks/cmmlu/cmmlu_default_chinese_civil_service_exam.yaml |multiple_choice | |cmmlu_chinese_driving_rule |lm_eval/tasks/cmmlu/cmmlu_default_chinese_driving_rule.yaml |multiple_choice | |cmmlu_chinese_food_culture |lm_eval/tasks/cmmlu/cmmlu_default_chinese_food_culture.yaml |multiple_choice | |cmmlu_chinese_foreign_policy |lm_eval/tasks/cmmlu/cmmlu_chinese_foreign_policy.yaml |multiple_choice | |cmmlu_chinese_history |lm_eval/tasks/cmmlu/cmmlu_chinese_history.yaml |multiple_choice | |cmmlu_chinese_literature |lm_eval/tasks/cmmlu/cmmlu_default_chinese_literature.yaml |multiple_choice | |cmmlu_chinese_teacher_qualification |lm_eval/tasks/cmmlu/cmmlu_chinese_teacher_qualification.yaml |multiple_choice | |cmmlu_clinical_knowledge |lm_eval/tasks/cmmlu/cmmlu_clinical_knowledge.yaml |multiple_choice | |cmmlu_college_actuarial_science |lm_eval/tasks/cmmlu/cmmlu_college_actuarial_science.yaml |multiple_choice | |cmmlu_college_education |lm_eval/tasks/cmmlu/cmmlu_default_college_education.yaml |multiple_choice | |cmmlu_college_engineering_hydrology |lm_eval/tasks/cmmlu/cmmlu_college_engineering_hydrology.yaml |multiple_choice | |cmmlu_college_law |lm_eval/tasks/cmmlu/cmmlu_default_college_law.yaml |multiple_choice | |cmmlu_college_mathematics |lm_eval/tasks/cmmlu/cmmlu_college_mathematics.yaml |multiple_choice | |cmmlu_college_medical_statistics |lm_eval/tasks/cmmlu/cmmlu_college_medical_statistics.yaml |multiple_choice | |cmmlu_college_medicine |lm_eval/tasks/cmmlu/cmmlu_default_college_medicine.yaml |multiple_choice | |cmmlu_computer_science |lm_eval/tasks/cmmlu/cmmlu_computer_science.yaml |multiple_choice | |cmmlu_computer_security |lm_eval/tasks/cmmlu/cmmlu_computer_security.yaml |multiple_choice | |cmmlu_conceptual_physics |lm_eval/tasks/cmmlu/cmmlu_default_conceptual_physics.yaml |multiple_choice | |cmmlu_construction_project_management |lm_eval/tasks/cmmlu/cmmlu_construction_project_management.yaml |multiple_choice | |cmmlu_economics |lm_eval/tasks/cmmlu/cmmlu_default_economics.yaml |multiple_choice | |cmmlu_education |lm_eval/tasks/cmmlu/cmmlu_education.yaml |multiple_choice | |cmmlu_electrical_engineering |lm_eval/tasks/cmmlu/cmmlu_electrical_engineering.yaml |multiple_choice | |cmmlu_elementary_chinese |lm_eval/tasks/cmmlu/cmmlu_elementary_chinese.yaml |multiple_choice | |cmmlu_elementary_commonsense |lm_eval/tasks/cmmlu/cmmlu_elementary_commonsense.yaml |multiple_choice | |cmmlu_elementary_information_and_technology |lm_eval/tasks/cmmlu/cmmlu_default_elementary_information_and_technology.yaml |multiple_choice | |cmmlu_elementary_mathematics |lm_eval/tasks/cmmlu/cmmlu_elementary_mathematics.yaml |multiple_choice | |cmmlu_ethnology |lm_eval/tasks/cmmlu/cmmlu_ethnology.yaml |multiple_choice | |cmmlu_food_science |lm_eval/tasks/cmmlu/cmmlu_default_food_science.yaml |multiple_choice | |cmmlu_genetics |lm_eval/tasks/cmmlu/cmmlu_default_genetics.yaml |multiple_choice | |cmmlu_global_facts |lm_eval/tasks/cmmlu/cmmlu_default_global_facts.yaml |multiple_choice | |cmmlu_high_school_biology |lm_eval/tasks/cmmlu/cmmlu_high_school_biology.yaml |multiple_choice | |cmmlu_high_school_chemistry |lm_eval/tasks/cmmlu/cmmlu_default_high_school_chemistry.yaml |multiple_choice | |cmmlu_high_school_geography |lm_eval/tasks/cmmlu/cmmlu_high_school_geography.yaml |multiple_choice | |cmmlu_high_school_mathematics |lm_eval/tasks/cmmlu/cmmlu_high_school_mathematics.yaml |multiple_choice | |cmmlu_high_school_physics |lm_eval/tasks/cmmlu/cmmlu_default_high_school_physics.yaml |multiple_choice | |cmmlu_high_school_politics |lm_eval/tasks/cmmlu/cmmlu_high_school_politics.yaml |multiple_choice | |cmmlu_human_sexuality |lm_eval/tasks/cmmlu/cmmlu_default_human_sexuality.yaml |multiple_choice | |cmmlu_international_law |lm_eval/tasks/cmmlu/cmmlu_international_law.yaml |multiple_choice | |cmmlu_journalism |lm_eval/tasks/cmmlu/cmmlu_default_journalism.yaml |multiple_choice | |cmmlu_jurisprudence |lm_eval/tasks/cmmlu/cmmlu_jurisprudence.yaml |multiple_choice | |cmmlu_legal_and_moral_basis |lm_eval/tasks/cmmlu/cmmlu_default_legal_and_moral_basis.yaml |multiple_choice | |cmmlu_logical |lm_eval/tasks/cmmlu/cmmlu_logical.yaml |multiple_choice | |cmmlu_machine_learning |lm_eval/tasks/cmmlu/cmmlu_machine_learning.yaml |multiple_choice | |cmmlu_management |lm_eval/tasks/cmmlu/cmmlu_default_management.yaml |multiple_choice | |cmmlu_marketing |lm_eval/tasks/cmmlu/cmmlu_marketing.yaml |multiple_choice | |cmmlu_marxist_theory |lm_eval/tasks/cmmlu/cmmlu_default_marxist_theory.yaml |multiple_choice | |cmmlu_modern_chinese |lm_eval/tasks/cmmlu/cmmlu_default_modern_chinese.yaml |multiple_choice | |cmmlu_nutrition |lm_eval/tasks/cmmlu/cmmlu_nutrition.yaml |multiple_choice | |cmmlu_philosophy |lm_eval/tasks/cmmlu/cmmlu_default_philosophy.yaml |multiple_choice | |cmmlu_professional_accounting |lm_eval/tasks/cmmlu/cmmlu_professional_accounting.yaml |multiple_choice | |cmmlu_professional_law |lm_eval/tasks/cmmlu/cmmlu_professional_law.yaml |multiple_choice | |cmmlu_professional_medicine |lm_eval/tasks/cmmlu/cmmlu_default_professional_medicine.yaml |multiple_choice | |cmmlu_professional_psychology |lm_eval/tasks/cmmlu/cmmlu_default_professional_psychology.yaml |multiple_choice | |cmmlu_public_relations |lm_eval/tasks/cmmlu/cmmlu_public_relations.yaml |multiple_choice | |cmmlu_security_study |lm_eval/tasks/cmmlu/cmmlu_default_security_study.yaml |multiple_choice | |cmmlu_sociology |lm_eval/tasks/cmmlu/cmmlu_sociology.yaml |multiple_choice | |cmmlu_sports_science |lm_eval/tasks/cmmlu/cmmlu_default_sports_science.yaml |multiple_choice | |cmmlu_traditional_chinese_medicine |lm_eval/tasks/cmmlu/cmmlu_traditional_chinese_medicine.yaml |multiple_choice | |cmmlu_virology |lm_eval/tasks/cmmlu/cmmlu_default_virology.yaml |multiple_choice | |cmmlu_world_history |lm_eval/tasks/cmmlu/cmmlu_world_history.yaml |multiple_choice | |cmmlu_world_religions |lm_eval/tasks/cmmlu/cmmlu_default_world_religions.yaml |multiple_choice | |code2text_go |lm_eval/tasks/code_x_glue/code-text/go.yaml |generate_until | |code2text_java |lm_eval/tasks/code_x_glue/code-text/java.yaml |generate_until | |code2text_javascript |lm_eval/tasks/code_x_glue/code-text/javascript.yaml |generate_until | |code2text_php |lm_eval/tasks/code_x_glue/code-text/php.yaml |generate_until | |code2text_python |lm_eval/tasks/code_x_glue/code-text/python.yaml |generate_until | |code2text_ruby |lm_eval/tasks/code_x_glue/code-text/ruby.yaml |generate_until | |cola |lm_eval/tasks/glue/cola/default.yaml |multiple_choice | |commonsense_qa |lm_eval/tasks/commonsense_qa/default.yaml |multiple_choice | |copa |lm_eval/tasks/super_glue/copa/default.yaml |multiple_choice | |copa_ar |lm_eval/tasks/alghafa/copa_ar/copa_ar.yaml |multiple_choice | |copal_id_colloquial |lm_eval/tasks/copal_id/colloquial.yaml |multiple_choice | |copal_id_standard |lm_eval/tasks/copal_id/standard.yaml |multiple_choice | |coqa |lm_eval/tasks/coqa/default.yaml |generate_until | |crows_pairs_english |lm_eval/tasks/crows_pairs/crows_pairs_english.yaml |multiple_choice | |crows_pairs_english_age |lm_eval/tasks/crows_pairs/crows_pairs_english_age.yaml |multiple_choice | |crows_pairs_english_autre |lm_eval/tasks/crows_pairs/crows_pairs_english_autre.yaml |multiple_choice | |crows_pairs_english_disability |lm_eval/tasks/crows_pairs/crows_pairs_english_disability.yaml |multiple_choice | |crows_pairs_english_gender |lm_eval/tasks/crows_pairs/crows_pairs_english_gender.yaml |multiple_choice | |crows_pairs_english_nationality |lm_eval/tasks/crows_pairs/crows_pairs_english_nationality.yaml |multiple_choice | |crows_pairs_english_physical_appearance |lm_eval/tasks/crows_pairs/crows_pairs_english_physical_appearance.yaml |multiple_choice | |crows_pairs_english_race_color |lm_eval/tasks/crows_pairs/crows_pairs_english_race_color.yaml |multiple_choice | |crows_pairs_english_religion |lm_eval/tasks/crows_pairs/crows_pairs_english_religion.yaml |multiple_choice | |crows_pairs_english_sexual_orientation |lm_eval/tasks/crows_pairs/crows_pairs_english_sexual_orientation.yaml |multiple_choice | |crows_pairs_english_socioeconomic |lm_eval/tasks/crows_pairs/crows_pairs_english_socioeconomic.yaml |multiple_choice | |crows_pairs_french |lm_eval/tasks/crows_pairs/crows_pairs_french.yaml |multiple_choice | |crows_pairs_french_age |lm_eval/tasks/crows_pairs/crows_pairs_french_age.yaml |multiple_choice | |crows_pairs_french_autre |lm_eval/tasks/crows_pairs/crows_pairs_french_autre.yaml |multiple_choice | |crows_pairs_french_disability |lm_eval/tasks/crows_pairs/crows_pairs_french_disability.yaml |multiple_choice | |crows_pairs_french_gender |lm_eval/tasks/crows_pairs/crows_pairs_french_gender.yaml |multiple_choice | |crows_pairs_french_nationality |lm_eval/tasks/crows_pairs/crows_pairs_french_nationality.yaml |multiple_choice | |crows_pairs_french_physical_appearance |lm_eval/tasks/crows_pairs/crows_pairs_french_physical_appearance.yaml |multiple_choice | |crows_pairs_french_race_color |lm_eval/tasks/crows_pairs/crows_pairs_french_race_color.yaml |multiple_choice | |crows_pairs_french_religion |lm_eval/tasks/crows_pairs/crows_pairs_french_religion.yaml |multiple_choice | |crows_pairs_french_sexual_orientation |lm_eval/tasks/crows_pairs/crows_pairs_french_sexual_orientation.yaml |multiple_choice | |crows_pairs_french_socioeconomic |lm_eval/tasks/crows_pairs/crows_pairs_french_socioeconomic.yaml |multiple_choice | |csatqa_gr |lm_eval/tasks/csatqa/csatqa_gr.yaml |multiple_choice | |csatqa_li |lm_eval/tasks/csatqa/csatqa_li.yaml |multiple_choice | |csatqa_rch |lm_eval/tasks/csatqa/csatqa_rch.yaml |multiple_choice | |csatqa_rcs |lm_eval/tasks/csatqa/csatqa_rcs.yaml |multiple_choice | |csatqa_rcss |lm_eval/tasks/csatqa/csatqa_rcss.yaml |multiple_choice | |csatqa_wr |lm_eval/tasks/csatqa/csatqa_wr.yaml |multiple_choice | |cycle_letters |lm_eval/tasks/unscramble/cycle_letters.yaml |generate_until | |drop |lm_eval/tasks/drop/default.yaml |generate_until | |epec_koref_bin |lm_eval/tasks/basqueglue/coref.yaml |multiple_choice | |eq_bench |lm_eval/tasks/eq_bench/default.yaml |generate_until | |ethics_cm |lm_eval/tasks/hendrycks_ethics/commonsense.yaml |multiple_choice | |ethics_deontology |lm_eval/tasks/hendrycks_ethics/deontology.yaml |multiple_choice | |ethics_justice |lm_eval/tasks/hendrycks_ethics/justice.yaml |multiple_choice | |ethics_utilitarianism |lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml |multiple_choice | |ethics_virtue |lm_eval/tasks/hendrycks_ethics/virtue.yaml |multiple_choice | |eus_exams_es_ejadministrativo |lm_eval/tasks/eus_exams/eus_exams_es_ejadministrativo.yaml |multiple_choice | |eus_exams_es_ejauxiliar |lm_eval/tasks/eus_exams/eus_exams_es_ejauxiliar.yaml |multiple_choice | |eus_exams_es_ejsubalterno |lm_eval/tasks/eus_exams/eus_exams_es_ejsubalterno.yaml |multiple_choice | |eus_exams_es_ejtecnico |lm_eval/tasks/eus_exams/eus_exams_es_ejtecnico.yaml |multiple_choice | |eus_exams_es_opeayuntamientovitoria |lm_eval/tasks/eus_exams/eus_exams_es_opeayuntamientovitoria.yaml |multiple_choice | |eus_exams_es_opebilbao |lm_eval/tasks/eus_exams/eus_exams_es_opebilbao.yaml |multiple_choice | |eus_exams_es_opeehuadmin |lm_eval/tasks/eus_exams/eus_exams_es_opeehuadmin.yaml |multiple_choice | |eus_exams_es_opeehuaux |lm_eval/tasks/eus_exams/eus_exams_es_opeehuaux.yaml |multiple_choice | |eus_exams_es_opeehubiblio |lm_eval/tasks/eus_exams/eus_exams_es_opeehubiblio.yaml |multiple_choice | |eus_exams_es_opeehuderecho |lm_eval/tasks/eus_exams/eus_exams_es_opeehuderecho.yaml |multiple_choice | |eus_exams_es_opeehueconomicas |lm_eval/tasks/eus_exams/eus_exams_es_opeehueconomicas.yaml |multiple_choice | |eus_exams_es_opeehuempresariales |lm_eval/tasks/eus_exams/eus_exams_es_opeehuempresariales.yaml |multiple_choice | |eus_exams_es_opeehusubalterno |lm_eval/tasks/eus_exams/eus_exams_es_opeehusubalterno.yaml |multiple_choice | |eus_exams_es_opeehutecnico |lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnico.yaml |multiple_choice | |eus_exams_es_opeehutecnicob |lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnicob.yaml |multiple_choice | |eus_exams_es_opeosakiadmin |lm_eval/tasks/eus_exams/eus_exams_es_opeosakiadmin.yaml |multiple_choice | |eus_exams_es_opeosakiaux |lm_eval/tasks/eus_exams/eus_exams_es_opeosakiaux.yaml |multiple_choice | |eus_exams_es_opeosakiauxenf |lm_eval/tasks/eus_exams/eus_exams_es_opeosakiauxenf.yaml |multiple_choice | |eus_exams_es_opeosakicelador |lm_eval/tasks/eus_exams/eus_exams_es_opeosakicelador.yaml |multiple_choice | |eus_exams_es_opeosakienf |lm_eval/tasks/eus_exams/eus_exams_es_opeosakienf.yaml |multiple_choice | |eus_exams_es_opeosakijuridico |lm_eval/tasks/eus_exams/eus_exams_es_opeosakijuridico.yaml |multiple_choice | |eus_exams_es_opeosakioperario |lm_eval/tasks/eus_exams/eus_exams_es_opeosakioperario.yaml |multiple_choice | |eus_exams_es_opeosakitecnico |lm_eval/tasks/eus_exams/eus_exams_es_opeosakitecnico.yaml |multiple_choice | |eus_exams_es_opeosakivarios |lm_eval/tasks/eus_exams/eus_exams_es_opeosakivarios.yaml |multiple_choice | |eus_exams_es_osakidetza1c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza1c.yaml |multiple_choice | |eus_exams_es_osakidetza2c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza2c.yaml |multiple_choice | |eus_exams_es_osakidetza3c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza3c.yaml |multiple_choice | |eus_exams_es_osakidetza4c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza4c.yaml |multiple_choice | |eus_exams_es_osakidetza5c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza5c.yaml |multiple_choice | |eus_exams_es_osakidetza6c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza6c.yaml |multiple_choice | |eus_exams_es_osakidetza7c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza7c.yaml |multiple_choice | |eus_exams_es_osakidetza8c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza8c.yaml |multiple_choice | |eus_exams_es_osakidetza9c |lm_eval/tasks/eus_exams/eus_exams_es_osakidetza9c.yaml |multiple_choice | |eus_exams_eu_ejadministrari |lm_eval/tasks/eus_exams/eus_exams_eu_ejadministrari.yaml |multiple_choice | |eus_exams_eu_ejlaguntza |lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntza.yaml |multiple_choice | |eus_exams_eu_ejlaguntzaile |lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntzaile.yaml |multiple_choice | |eus_exams_eu_ejteknikari |lm_eval/tasks/eus_exams/eus_exams_eu_ejteknikari.yaml |multiple_choice | |eus_exams_eu_opebilbaoeu |lm_eval/tasks/eus_exams/eus_exams_eu_opebilbaoeu.yaml |multiple_choice | |eus_exams_eu_opeehuadmineu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehuadmineu.yaml |multiple_choice | |eus_exams_eu_opeehuauxeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehuauxeu.yaml |multiple_choice | |eus_exams_eu_opeehubiblioeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehubiblioeu.yaml |multiple_choice | |eus_exams_eu_opeehuderechoeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehuderechoeu.yaml |multiple_choice | |eus_exams_eu_opeehueconomicaseu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehueconomicaseu.yaml |multiple_choice | |eus_exams_eu_opeehuempresarialeseu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehuempresarialeseu.yaml |multiple_choice | |eus_exams_eu_opeehusubalternoeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehusubalternoeu.yaml |multiple_choice | |eus_exams_eu_opeehutecnicoeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeehutecnicoeu.yaml |multiple_choice | |eus_exams_eu_opeehuteknikarib |lm_eval/tasks/eus_exams/eus_exams_eu_opeehuteknikarib.yaml |multiple_choice | |eus_exams_eu_opegasteizkoudala |lm_eval/tasks/eus_exams/eus_exams_eu_opegasteizkoudala.yaml |multiple_choice | |eus_exams_eu_opeosakiadmineu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiadmineu.yaml |multiple_choice | |eus_exams_eu_opeosakiauxenfeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxenfeu.yaml |multiple_choice | |eus_exams_eu_opeosakiauxeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxeu.yaml |multiple_choice | |eus_exams_eu_opeosakiceladoreu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiceladoreu.yaml |multiple_choice | |eus_exams_eu_opeosakienfeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakienfeu.yaml |multiple_choice | |eus_exams_eu_opeosakioperarioeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakioperarioeu.yaml |multiple_choice | |eus_exams_eu_opeosakitecnicoeu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakitecnicoeu.yaml |multiple_choice | |eus_exams_eu_opeosakivarioseu |lm_eval/tasks/eus_exams/eus_exams_eu_opeosakivarioseu.yaml |multiple_choice | |eus_exams_eu_osakidetza1e |lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza1e.yaml |multiple_choice | |eus_exams_eu_osakidetza2e |lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza2e.yaml |multiple_choice | |eus_exams_eu_osakidetza3e |lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza3e.yaml |multiple_choice | |eus_exams_eu_osakidetza5e |lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza5e.yaml |multiple_choice | |eus_exams_eu_osakidetza6e |lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza6e.yaml |multiple_choice | |eus_exams_eu_osakidetza7e |lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza7e.yaml |multiple_choice | |eus_proficiency |lm_eval/tasks/eus_proficiency/eus_proficiency.yaml |multiple_choice | |eus_reading |lm_eval/tasks/eus_reading/eus_reading.yaml |multiple_choice | |eus_trivia |lm_eval/tasks/eus_trivia/eus_trivia.yaml |multiple_choice | |fld_default |lm_eval/tasks/fld/fld_default.yaml | | |fld_logical_formula_default |lm_eval/tasks/fld/fld_logical_formula_default.yaml | | |fld_logical_formula_star |lm_eval/tasks/fld/fld_logical_formula_star.yaml | | |fld_star |lm_eval/tasks/fld/fld_star.yaml | | |french_bench_arc_challenge |lm_eval/tasks/french_bench/french_bench_arc_challenge.yaml |multiple_choice | |french_bench_boolqa |lm_eval/tasks/french_bench/french_bench_boolqa.yaml |multiple_choice | |french_bench_fquadv2 |lm_eval/tasks/french_bench/french_bench_fquadv2.yaml |generate_until | |french_bench_fquadv2_bool |lm_eval/tasks/french_bench/french_bench_fquadv2_bool.yaml |multiple_choice | |french_bench_fquadv2_genq |lm_eval/tasks/french_bench/french_bench_fquadv2_genq.yaml |generate_until | |french_bench_fquadv2_hasAns |lm_eval/tasks/french_bench/french_bench_fquadv2_hasAns.yaml |generate_until | |french_bench_grammar |lm_eval/tasks/french_bench/french_bench_grammar.yaml |multiple_choice | |french_bench_hellaswag |lm_eval/tasks/french_bench/french_bench_hellaswag.yaml |multiple_choice | |french_bench_multifquad |lm_eval/tasks/french_bench/french_bench_multifquad.yaml |generate_until | |french_bench_opus_perplexity |lm_eval/tasks/french_bench/french_bench_opus_perplexity.yaml |loglikelihood_rolling| |french_bench_orangesum_abstract |lm_eval/tasks/french_bench/french_bench_orangesum_abstract.yaml |generate_until | |french_bench_orangesum_title |lm_eval/tasks/french_bench/french_bench_orangesum_title.yaml |generate_until | |french_bench_reading_comp |lm_eval/tasks/french_bench/french_bench_reading_comp.yaml |multiple_choice | |french_bench_topic_based_nli |lm_eval/tasks/french_bench/french_bench_topic_based_nli.yaml |multiple_choice | |french_bench_trivia |lm_eval/tasks/french_bench/french_bench_trivia.yaml |generate_until | |french_bench_vocab |lm_eval/tasks/french_bench/french_bench_vocab.yaml |multiple_choice | |french_bench_wikitext_fr |lm_eval/tasks/french_bench/french_bench_wikitext_fr.yaml |loglikelihood_rolling| |french_bench_xnli |lm_eval/tasks/french_bench/french_bench_xnli.yaml |multiple_choice | |glianorex |lm_eval/tasks/glianorex/glianorex.yaml |multiple_choice | |glianorex_en |lm_eval/tasks/glianorex/glianorex_en.yaml |multiple_choice | |glianorex_fr |lm_eval/tasks/glianorex/glianorex_fr.yaml |multiple_choice | |gpqa_diamond_cot_n_shot |lm_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml |generate_until | |gpqa_diamond_cot_zeroshot |lm_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml |generate_until | |gpqa_diamond_generative_n_shot |lm_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml |generate_until | |gpqa_diamond_n_shot |lm_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml |multiple_choice | |gpqa_diamond_zeroshot |lm_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml |multiple_choice | |gpqa_extended_cot_n_shot |lm_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml |generate_until | |gpqa_extended_cot_zeroshot |lm_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml |generate_until | |gpqa_extended_generative_n_shot |lm_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml |generate_until | |gpqa_extended_n_shot |lm_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml |multiple_choice | |gpqa_extended_zeroshot |lm_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml |multiple_choice | |gpqa_main_cot_n_shot |lm_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml |generate_until | |gpqa_main_cot_zeroshot |lm_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml |generate_until | |gpqa_main_generative_n_shot |lm_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml |generate_until | |gpqa_main_n_shot |lm_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml |multiple_choice | |gpqa_main_zeroshot |lm_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml |multiple_choice | |gsm8k |lm_eval/tasks/gsm8k/gsm8k.yaml |generate_until | |gsm8k_cot |lm_eval/tasks/gsm8k/gsm8k-cot.yaml |generate_until | |gsm8k_cot_self_consistency |lm_eval/tasks/gsm8k/gsm8k-cot-self-consistency.yaml |generate_until | |gsm8k_cot_zeroshot |lm_eval/tasks/gsm8k/gsm8k-cot-zeroshot.yaml |generate_until | |gsm_plus |lm_eval/tasks/gsm_plus/gsm_plus.yaml |generate_until | |gsm_plus_mini |lm_eval/tasks/gsm_plus/gsm_plus_mini.yaml |generate_until | |haerae_general_knowledge |lm_eval/tasks/haerae/haerae_gk.yaml |multiple_choice | |haerae_history |lm_eval/tasks/haerae/haerae_hi.yaml |multiple_choice | |haerae_loan_word |lm_eval/tasks/haerae/haerae_lw.yaml |multiple_choice | |haerae_rare_word |lm_eval/tasks/haerae/haerae_rw.yaml |multiple_choice | |haerae_standard_nomenclature |lm_eval/tasks/haerae/haerae_sn.yaml |multiple_choice | |headqa_en |lm_eval/tasks/headqa/headqa_en.yaml |multiple_choice | |headqa_es |lm_eval/tasks/headqa/headqa_es.yaml |multiple_choice | |hellaswag |lm_eval/tasks/hellaswag/hellaswag.yaml |multiple_choice | |hellaswag_ar |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ar.yaml |multiple_choice | |hellaswag_bn |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_bn.yaml |multiple_choice | |hellaswag_ca |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ca.yaml |multiple_choice | |hellaswag_da |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_da.yaml |multiple_choice | |hellaswag_de |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_de.yaml |multiple_choice | |hellaswag_es |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_es.yaml |multiple_choice | |hellaswag_eu |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_eu.yaml |multiple_choice | |hellaswag_fr |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_fr.yaml |multiple_choice | |hellaswag_gu |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_gu.yaml |multiple_choice | |hellaswag_hi |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hi.yaml |multiple_choice | |hellaswag_hr |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hr.yaml |multiple_choice | |hellaswag_hu |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hu.yaml |multiple_choice | |hellaswag_hy |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hy.yaml |multiple_choice | |hellaswag_id |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_id.yaml |multiple_choice | |hellaswag_it |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_it.yaml |multiple_choice | |hellaswag_kn |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_kn.yaml |multiple_choice | |hellaswag_ml |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ml.yaml |multiple_choice | |hellaswag_mr |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_mr.yaml |multiple_choice | |hellaswag_ne |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ne.yaml |multiple_choice | |hellaswag_nl |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_nl.yaml |multiple_choice | |hellaswag_pt |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_pt.yaml |multiple_choice | |hellaswag_ro |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ro.yaml |multiple_choice | |hellaswag_ru |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ru.yaml |multiple_choice | |hellaswag_sk |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sk.yaml |multiple_choice | |hellaswag_sr |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sr.yaml |multiple_choice | |hellaswag_sv |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sv.yaml |multiple_choice | |hellaswag_ta |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ta.yaml |multiple_choice | |hellaswag_te |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_te.yaml |multiple_choice | |hellaswag_uk |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_uk.yaml |multiple_choice | |hellaswag_vi |lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_vi.yaml |multiple_choice | |hendrycks_math_algebra |lm_eval/tasks/hendrycks_math/hendrycks_math_algebra.yaml |generate_until | |hendrycks_math_counting_and_prob |lm_eval/tasks/hendrycks_math/hendrycks_math_counting_and_prob.yaml |generate_until | |hendrycks_math_geometry |lm_eval/tasks/hendrycks_math/hendrycks_math_geometry.yaml |generate_until | |hendrycks_math_intermediate_algebra |lm_eval/tasks/hendrycks_math/hendrycks_math_intermediate_algebra.yaml |generate_until | |hendrycks_math_num_theory |lm_eval/tasks/hendrycks_math/hendrycks_math_num_theory.yaml |generate_until | |hendrycks_math_prealgebra |lm_eval/tasks/hendrycks_math/hendrycks_math_prealgebra.yaml |generate_until | |hendrycks_math_precalc |lm_eval/tasks/hendrycks_math/hendrycks_math_precalc.yaml |generate_until | |ifeval |lm_eval/tasks/ifeval/ifeval.yaml |generate_until | |inverse_scaling_hindsight_neglect_10shot |lm_eval/tasks/inverse_scaling/inverse_scaling_hindsight_neglect.yaml |multiple_choice | |inverse_scaling_into_the_unknown |lm_eval/tasks/inverse_scaling/inverse_scaling_into_the_unknown.yaml |multiple_choice | |inverse_scaling_memo_trap |lm_eval/tasks/inverse_scaling/inverse_scaling_memo_trap.yaml |multiple_choice | |inverse_scaling_modus_tollens |lm_eval/tasks/inverse_scaling/inverse_scaling_modus_tollens.yaml |multiple_choice | |inverse_scaling_neqa |lm_eval/tasks/inverse_scaling/inverse_scaling_neqa.yaml |multiple_choice | |inverse_scaling_pattern_matching_suppression |lm_eval/tasks/inverse_scaling/inverse_scaling_pattern_matching_suppression.yaml |multiple_choice | |inverse_scaling_quote_repetition |lm_eval/tasks/inverse_scaling/inverse_scaling_quote_repetition.yaml |multiple_choice | |inverse_scaling_redefine_math |lm_eval/tasks/inverse_scaling/inverse_scaling_redefine_math.yaml |multiple_choice | |inverse_scaling_repetitive_algebra |lm_eval/tasks/inverse_scaling/inverse_scaling_repetitive_algebra.yaml |multiple_choice | |inverse_scaling_sig_figs |lm_eval/tasks/inverse_scaling/inverse_scaling_sig_figs.yaml |multiple_choice | |inverse_scaling_winobias_antistereotype |lm_eval/tasks/inverse_scaling/inverse_scaling_winobias_antistereotype.yaml |multiple_choice | |iwslt2017-ar-en |lm_eval/tasks/translation/iwslt2017_ar-en.yaml |generate_until | |iwslt2017-en-ar |lm_eval/tasks/translation/iwslt2017_en-ar.yaml |generate_until | |kmmlu_direct_accounting |lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml |generate_until | |kmmlu_direct_agricultural_sciences |lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml |generate_until | |kmmlu_direct_aviation_engineering_and_maintenance |lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml |generate_until | |kmmlu_direct_biology |lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml |generate_until | |kmmlu_direct_chemical_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml |generate_until | |kmmlu_direct_chemistry |lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml |generate_until | |kmmlu_direct_civil_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml |generate_until | |kmmlu_direct_computer_science |lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml |generate_until | |kmmlu_direct_construction |lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml |generate_until | |kmmlu_direct_criminal_law |lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml |generate_until | |kmmlu_direct_ecology |lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml |generate_until | |kmmlu_direct_economics |lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml |generate_until | |kmmlu_direct_education |lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml |generate_until | |kmmlu_direct_electrical_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml |generate_until | |kmmlu_direct_electronics_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml |generate_until | |kmmlu_direct_energy_management |lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml |generate_until | |kmmlu_direct_environmental_science |lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml |generate_until | |kmmlu_direct_fashion |lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml |generate_until | |kmmlu_direct_food_processing |lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml |generate_until | |kmmlu_direct_gas_technology_and_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml |generate_until | |kmmlu_direct_geomatics |lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml |generate_until | |kmmlu_direct_health |lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml |generate_until | |kmmlu_direct_industrial_engineer |lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml |generate_until | |kmmlu_direct_information_technology |lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml |generate_until | |kmmlu_direct_interior_architecture_and_design |lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml |generate_until | |kmmlu_direct_korean_history |lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml |generate_until | |kmmlu_direct_law |lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml |generate_until | |kmmlu_direct_machine_design_and_manufacturing |lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml |generate_until | |kmmlu_direct_management |lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml |generate_until | |kmmlu_direct_maritime_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml |generate_until | |kmmlu_direct_marketing |lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml |generate_until | |kmmlu_direct_materials_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml |generate_until | |kmmlu_direct_math |lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml |generate_until | |kmmlu_direct_mechanical_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml |generate_until | |kmmlu_direct_nondestructive_testing |lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml |generate_until | |kmmlu_direct_patent |lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml |generate_until | |kmmlu_direct_political_science_and_sociology |lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml |generate_until | |kmmlu_direct_psychology |lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml |generate_until | |kmmlu_direct_public_safety |lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml |generate_until | |kmmlu_direct_railway_and_automotive_engineering |lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml |generate_until | |kmmlu_direct_real_estate |lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml |generate_until | |kmmlu_direct_refrigerating_machinery |lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml |generate_until | |kmmlu_direct_social_welfare |lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml |generate_until | |kmmlu_direct_taxation |lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml |generate_until | |kmmlu_direct_telecommunications_and_wireless_technology |lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml |generate_until | |kmmlu_hard_accounting |lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml |multiple_choice | |kmmlu_hard_agricultural_sciences |lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml |multiple_choice | |kmmlu_hard_aviation_engineering_and_maintenance |lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml |multiple_choice | |kmmlu_hard_biology |lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml |multiple_choice | |kmmlu_hard_chemical_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml |multiple_choice | |kmmlu_hard_chemistry |lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml |multiple_choice | |kmmlu_hard_civil_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml |multiple_choice | |kmmlu_hard_computer_science |lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml |multiple_choice | |kmmlu_hard_construction |lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml |multiple_choice | |kmmlu_hard_cot_accounting |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml |generate_until | |kmmlu_hard_cot_agricultural_sciences |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml |generate_until | |kmmlu_hard_cot_aviation_engineering_and_maintenance |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml |generate_until | |kmmlu_hard_cot_biology |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml |generate_until | |kmmlu_hard_cot_chemical_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml |generate_until | |kmmlu_hard_cot_chemistry |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml |generate_until | |kmmlu_hard_cot_civil_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml |generate_until | |kmmlu_hard_cot_computer_science |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml |generate_until | |kmmlu_hard_cot_construction |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml |generate_until | |kmmlu_hard_cot_criminal_law |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml |generate_until | |kmmlu_hard_cot_ecology |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml |generate_until | |kmmlu_hard_cot_economics |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml |generate_until | |kmmlu_hard_cot_education |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml |generate_until | |kmmlu_hard_cot_electrical_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml |generate_until | |kmmlu_hard_cot_electronics_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml |generate_until | |kmmlu_hard_cot_energy_management |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml |generate_until | |kmmlu_hard_cot_environmental_science |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml |generate_until | |kmmlu_hard_cot_fashion |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml |generate_until | |kmmlu_hard_cot_food_processing |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml |generate_until | |kmmlu_hard_cot_gas_technology_and_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml |generate_until | |kmmlu_hard_cot_geomatics |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml |generate_until | |kmmlu_hard_cot_health |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml |generate_until | |kmmlu_hard_cot_industrial_engineer |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml |generate_until | |kmmlu_hard_cot_information_technology |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml |generate_until | |kmmlu_hard_cot_interior_architecture_and_design |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml |generate_until | |kmmlu_hard_cot_korean_history |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml |generate_until | |kmmlu_hard_cot_law |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml |generate_until | |kmmlu_hard_cot_machine_design_and_manufacturing |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml |generate_until | |kmmlu_hard_cot_management |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml |generate_until | |kmmlu_hard_cot_maritime_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml |generate_until | |kmmlu_hard_cot_marketing |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml |generate_until | |kmmlu_hard_cot_materials_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml |generate_until | |kmmlu_hard_cot_math |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml |generate_until | |kmmlu_hard_cot_mechanical_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml |generate_until | |kmmlu_hard_cot_nondestructive_testing |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml |generate_until | |kmmlu_hard_cot_patent |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml |generate_until | |kmmlu_hard_cot_political_science_and_sociology |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml |generate_until | |kmmlu_hard_cot_psychology |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml |generate_until | |kmmlu_hard_cot_public_safety |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml |generate_until | |kmmlu_hard_cot_railway_and_automotive_engineering |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml |generate_until | |kmmlu_hard_cot_real_estate |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml |generate_until | |kmmlu_hard_cot_refrigerating_machinery |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml |generate_until | |kmmlu_hard_cot_social_welfare |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml |generate_until | |kmmlu_hard_cot_taxation |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml |generate_until | |kmmlu_hard_cot_telecommunications_and_wireless_technology |lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml |generate_until | |kmmlu_hard_criminal_law |lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml |multiple_choice | |kmmlu_hard_direct_accounting |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml |generate_until | |kmmlu_hard_direct_agricultural_sciences |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml |generate_until | |kmmlu_hard_direct_aviation_engineering_and_maintenance |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml |generate_until | |kmmlu_hard_direct_biology |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml |generate_until | |kmmlu_hard_direct_chemical_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml |generate_until | |kmmlu_hard_direct_chemistry |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml |generate_until | |kmmlu_hard_direct_civil_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml |generate_until | |kmmlu_hard_direct_computer_science |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml |generate_until | |kmmlu_hard_direct_construction |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml |generate_until | |kmmlu_hard_direct_criminal_law |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml |generate_until | |kmmlu_hard_direct_ecology |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml |generate_until | |kmmlu_hard_direct_economics |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml |generate_until | |kmmlu_hard_direct_education |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml |generate_until | |kmmlu_hard_direct_electrical_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml |generate_until | |kmmlu_hard_direct_electronics_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml |generate_until | |kmmlu_hard_direct_energy_management |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml |generate_until | |kmmlu_hard_direct_environmental_science |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml |generate_until | |kmmlu_hard_direct_fashion |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml |generate_until | |kmmlu_hard_direct_food_processing |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml |generate_until | |kmmlu_hard_direct_gas_technology_and_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml |generate_until | |kmmlu_hard_direct_geomatics |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml |generate_until | |kmmlu_hard_direct_health |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml |generate_until | |kmmlu_hard_direct_industrial_engineer |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml |generate_until | |kmmlu_hard_direct_information_technology |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml |generate_until | |kmmlu_hard_direct_interior_architecture_and_design |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml |generate_until | |kmmlu_hard_direct_korean_history |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml |generate_until | |kmmlu_hard_direct_law |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml |generate_until | |kmmlu_hard_direct_machine_design_and_manufacturing |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml |generate_until | |kmmlu_hard_direct_management |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml |generate_until | |kmmlu_hard_direct_maritime_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml |generate_until | |kmmlu_hard_direct_marketing |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml |generate_until | |kmmlu_hard_direct_materials_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml |generate_until | |kmmlu_hard_direct_math |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml |generate_until | |kmmlu_hard_direct_mechanical_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml |generate_until | |kmmlu_hard_direct_nondestructive_testing |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml |generate_until | |kmmlu_hard_direct_patent |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml |generate_until | |kmmlu_hard_direct_political_science_and_sociology |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml |generate_until | |kmmlu_hard_direct_psychology |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml |generate_until | |kmmlu_hard_direct_public_safety |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml |generate_until | |kmmlu_hard_direct_railway_and_automotive_engineering |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml |generate_until | |kmmlu_hard_direct_real_estate |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml |generate_until | |kmmlu_hard_direct_refrigerating_machinery |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml |generate_until | |kmmlu_hard_direct_social_welfare |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml |generate_until | |kmmlu_hard_direct_taxation |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml |generate_until | |kmmlu_hard_direct_telecommunications_and_wireless_technology |lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml |generate_until | |kmmlu_hard_ecology |lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml |multiple_choice | |kmmlu_hard_economics |lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml |multiple_choice | |kmmlu_hard_education |lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml |multiple_choice | |kmmlu_hard_electrical_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml |multiple_choice | |kmmlu_hard_electronics_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml |multiple_choice | |kmmlu_hard_energy_management |lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml |multiple_choice | |kmmlu_hard_environmental_science |lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml |multiple_choice | |kmmlu_hard_fashion |lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml |multiple_choice | |kmmlu_hard_food_processing |lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml |multiple_choice | |kmmlu_hard_gas_technology_and_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml |multiple_choice | |kmmlu_hard_geomatics |lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml |multiple_choice | |kmmlu_hard_health |lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml |multiple_choice | |kmmlu_hard_industrial_engineer |lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml |multiple_choice | |kmmlu_hard_information_technology |lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml |multiple_choice | |kmmlu_hard_interior_architecture_and_design |lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml |multiple_choice | |kmmlu_hard_korean_history |lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml |multiple_choice | |kmmlu_hard_law |lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml |multiple_choice | |kmmlu_hard_machine_design_and_manufacturing |lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml |multiple_choice | |kmmlu_hard_management |lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml |multiple_choice | |kmmlu_hard_maritime_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml |multiple_choice | |kmmlu_hard_marketing |lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml |multiple_choice | |kmmlu_hard_materials_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml |multiple_choice | |kmmlu_hard_math |lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml |multiple_choice | |kmmlu_hard_mechanical_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml |multiple_choice | |kmmlu_hard_nondestructive_testing |lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml |multiple_choice | |kmmlu_hard_patent |lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml |multiple_choice | |kmmlu_hard_political_science_and_sociology |lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml |multiple_choice | |kmmlu_hard_psychology |lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml |multiple_choice | |kmmlu_hard_public_safety |lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml |multiple_choice | |kmmlu_hard_railway_and_automotive_engineering |lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml |multiple_choice | |kmmlu_hard_real_estate |lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml |multiple_choice | |kmmlu_hard_refrigerating_machinery |lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml |multiple_choice | |kmmlu_hard_social_welfare |lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml |multiple_choice | |kmmlu_hard_taxation |lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml |multiple_choice | |kmmlu_hard_telecommunications_and_wireless_technology |lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml |multiple_choice | |kobest_boolq |lm_eval/tasks/kobest/kobest_boolq.yaml |multiple_choice | |kobest_copa |lm_eval/tasks/kobest/kobest_copa.yaml |multiple_choice | |kobest_hellaswag |lm_eval/tasks/kobest/kobest_hellaswag.yaml |multiple_choice | |kobest_sentineg |lm_eval/tasks/kobest/kobest_sentineg.yaml |multiple_choice | |kobest_wic |lm_eval/tasks/kobest/kobest_wic.yaml |multiple_choice | |kormedmcqa_doctor |lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml |generate_until | |kormedmcqa_nurse |lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml |generate_until | |kormedmcqa_pharm |lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml |generate_until | |lambada_openai |lm_eval/tasks/lambada/lambada_openai.yaml |loglikelihood | |lambada_openai_cloze_yaml |lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml |loglikelihood | |lambada_openai_mt_de |lm_eval/tasks/lambada_multilingual/lambada_mt_de.yaml |loglikelihood | |lambada_openai_mt_en |lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml |loglikelihood | |lambada_openai_mt_es |lm_eval/tasks/lambada_multilingual/lambada_mt_es.yaml |loglikelihood | |lambada_openai_mt_fr |lm_eval/tasks/lambada_multilingual/lambada_mt_fr.yaml |loglikelihood | |lambada_openai_mt_it |lm_eval/tasks/lambada_multilingual/lambada_mt_it.yaml |loglikelihood | |lambada_openai_mt_stablelm_de |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_de.yaml |loglikelihood | |lambada_openai_mt_stablelm_en |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_en.yaml |loglikelihood | |lambada_openai_mt_stablelm_es |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_es.yaml |loglikelihood | |lambada_openai_mt_stablelm_fr |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_fr.yaml |loglikelihood | |lambada_openai_mt_stablelm_it |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_it.yaml |loglikelihood | |lambada_openai_mt_stablelm_nl |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_nl.yaml |loglikelihood | |lambada_openai_mt_stablelm_pt |lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_pt.yaml |loglikelihood | |lambada_standard |lm_eval/tasks/lambada/lambada_standard.yaml |loglikelihood | |lambada_standard_cloze_yaml |lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml |loglikelihood | |leaderboard_bbh_boolean_expressions |lm_eval/tasks/leaderboard/bbh_mc/boolean_expressions.yaml |multiple_choice | |leaderboard_bbh_causal_judgement |lm_eval/tasks/leaderboard/bbh_mc/causal_judgement.yaml |multiple_choice | |leaderboard_bbh_date_understanding |lm_eval/tasks/leaderboard/bbh_mc/date_understanding.yaml |multiple_choice | |leaderboard_bbh_disambiguation_qa |lm_eval/tasks/leaderboard/bbh_mc/disambiguation_qa.yaml |multiple_choice | |leaderboard_bbh_formal_fallacies |lm_eval/tasks/leaderboard/bbh_mc/formal_fallacies.yaml |multiple_choice | |leaderboard_bbh_geometric_shapes |lm_eval/tasks/leaderboard/bbh_mc/geometric_shapes.yaml |multiple_choice | |leaderboard_bbh_hyperbaton |lm_eval/tasks/leaderboard/bbh_mc/hyperbaton.yaml |multiple_choice | |leaderboard_bbh_logical_deduction_five_objects |lm_eval/tasks/leaderboard/bbh_mc/logical_deduction_five_objects.yaml |multiple_choice | |leaderboard_bbh_logical_deduction_seven_objects |lm_eval/tasks/leaderboard/bbh_mc/logical_deduction_seven_objects.yaml |multiple_choice | |leaderboard_bbh_logical_deduction_three_objects |lm_eval/tasks/leaderboard/bbh_mc/logical_deduction_three_objects.yaml |multiple_choice | |leaderboard_bbh_movie_recommendation |lm_eval/tasks/leaderboard/bbh_mc/movie_recommendation.yaml |multiple_choice | |leaderboard_bbh_navigate |lm_eval/tasks/leaderboard/bbh_mc/navigate.yaml |multiple_choice | |leaderboard_bbh_object_counting |lm_eval/tasks/leaderboard/bbh_mc/object_counting.yaml |multiple_choice | |leaderboard_bbh_penguins_in_a_table |lm_eval/tasks/leaderboard/bbh_mc/penguins_in_a_table.yaml |multiple_choice | |leaderboard_bbh_reasoning_about_colored_objects |lm_eval/tasks/leaderboard/bbh_mc/reasoning_about_colored_objects.yaml |multiple_choice | |leaderboard_bbh_ruin_names |lm_eval/tasks/leaderboard/bbh_mc/ruin_names.yaml |multiple_choice | |leaderboard_bbh_salient_translation_error_detection |lm_eval/tasks/leaderboard/bbh_mc/salient_translation_error_detection.yaml |multiple_choice | |leaderboard_bbh_snarks |lm_eval/tasks/leaderboard/bbh_mc/snarks.yaml |multiple_choice | |leaderboard_bbh_sports_understanding |lm_eval/tasks/leaderboard/bbh_mc/sports_understanding.yaml |multiple_choice | |leaderboard_bbh_temporal_sequences |lm_eval/tasks/leaderboard/bbh_mc/temporal_sequences.yaml |multiple_choice | |leaderboard_bbh_tracking_shuffled_objects_five_objects |lm_eval/tasks/leaderboard/bbh_mc/tracking_shuffled_objects_five_objects.yaml |multiple_choice | |leaderboard_bbh_tracking_shuffled_objects_seven_objects |lm_eval/tasks/leaderboard/bbh_mc/tracking_shuffled_objects_seven_objects.yaml |multiple_choice | |leaderboard_bbh_tracking_shuffled_objects_three_objects |lm_eval/tasks/leaderboard/bbh_mc/tracking_shuffled_objects_three_objects.yaml |multiple_choice | |leaderboard_bbh_web_of_lies |lm_eval/tasks/leaderboard/bbh_mc/web_of_lies.yaml |multiple_choice | |leaderboard_gpqa_diamond |lm_eval/tasks/leaderboard/gpqa/gpqa_diamond_zeroshot.yaml |multiple_choice | |leaderboard_gpqa_extended |lm_eval/tasks/leaderboard/gpqa/gpqa_extended_zeroshot.yaml |multiple_choice | |leaderboard_gpqa_main |lm_eval/tasks/leaderboard/gpqa/gpqa_main_zeroshot.yaml |multiple_choice | |leaderboard_ifeval |lm_eval/tasks/leaderboard/ifeval/ifeval.yaml |generate_until | |leaderboard_math_algebra_hard |lm_eval/tasks/leaderboard/math/math_algebra.yaml |generate_until | |leaderboard_math_counting_and_prob_hard |lm_eval/tasks/leaderboard/math/math_counting_and_prob.yaml |generate_until | |leaderboard_math_geometry_hard |lm_eval/tasks/leaderboard/math/math_geometry.yaml |generate_until | |leaderboard_math_intermediate_algebra_hard |lm_eval/tasks/leaderboard/math/math_intermediate_algebra.yaml |generate_until | |leaderboard_math_num_theory_hard |lm_eval/tasks/leaderboard/math/math_num_theory.yaml |generate_until | |leaderboard_math_prealgebra_hard |lm_eval/tasks/leaderboard/math/math_prealgebra.yaml |generate_until | |leaderboard_math_precalculus_hard |lm_eval/tasks/leaderboard/math/math_precalculus.yaml |generate_until | |leaderboard_mmlu_pro |lm_eval/tasks/leaderboard/mmlu_pro/mmlu_pro.yaml |multiple_choice | |leaderboard_musr_murder_mysteries |lm_eval/tasks/leaderboard/musr/musr_murder_mysteries.yaml |multiple_choice | |leaderboard_musr_object_placements |lm_eval/tasks/leaderboard/musr/musr_object_placements.yaml |multiple_choice | |leaderboard_musr_team_allocation |lm_eval/tasks/leaderboard/musr/musr_team_allocation.yaml |multiple_choice | |logieval |lm_eval/tasks/logiqa2/logieval.yaml |generate_until | |logiqa |lm_eval/tasks/logiqa/logiqa.yaml |multiple_choice | |logiqa2 |lm_eval/tasks/logiqa2/logiqa2.yaml |multiple_choice | |m_mmlu_ar |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ar.yaml |multiple_choice | |m_mmlu_bn |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_bn.yaml |multiple_choice | |m_mmlu_ca |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ca.yaml |multiple_choice | |m_mmlu_da |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_da.yaml |multiple_choice | |m_mmlu_de |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_de.yaml |multiple_choice | |m_mmlu_en |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_en.yaml |multiple_choice | |m_mmlu_es |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_es.yaml |multiple_choice | |m_mmlu_eu |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_eu.yaml |multiple_choice | |m_mmlu_fr |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_fr.yaml |multiple_choice | |m_mmlu_gu |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_gu.yaml |multiple_choice | |m_mmlu_hi |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hi.yaml |multiple_choice | |m_mmlu_hr |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hr.yaml |multiple_choice | |m_mmlu_hu |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hu.yaml |multiple_choice | |m_mmlu_hy |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hy.yaml |multiple_choice | |m_mmlu_id |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_id.yaml |multiple_choice | |m_mmlu_is |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_is.yaml |multiple_choice | |m_mmlu_it |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_it.yaml |multiple_choice | |m_mmlu_kn |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_kn.yaml |multiple_choice | |m_mmlu_ml |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ml.yaml |multiple_choice | |m_mmlu_mr |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_mr.yaml |multiple_choice | |m_mmlu_nb |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nb.yaml |multiple_choice | |m_mmlu_ne |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ne.yaml |multiple_choice | |m_mmlu_nl |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nl.yaml |multiple_choice | |m_mmlu_pt |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_pt.yaml |multiple_choice | |m_mmlu_ro |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ro.yaml |multiple_choice | |m_mmlu_ru |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ru.yaml |multiple_choice | |m_mmlu_sk |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sk.yaml |multiple_choice | |m_mmlu_sr |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sr.yaml |multiple_choice | |m_mmlu_sv |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sv.yaml |multiple_choice | |m_mmlu_ta |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ta.yaml |multiple_choice | |m_mmlu_te |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_te.yaml |multiple_choice | |m_mmlu_uk |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_uk.yaml |multiple_choice | |m_mmlu_vi |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_vi.yaml |multiple_choice | |m_mmlu_zh |lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_zh.yaml |multiple_choice | |mathqa |lm_eval/tasks/mathqa/mathqa.yaml |multiple_choice | |mc_taco |lm_eval/tasks/mc_taco/default.yaml |multiple_choice | |med_concepts_qa_atc_easy |lm_eval/tasks/med_concepts_qa/med_concepts_qa_atc_easy.yaml |multiple_choice | |med_concepts_qa_atc_hard |lm_eval/tasks/med_concepts_qa/med_concepts_qa_atc_hard.yaml |multiple_choice | |med_concepts_qa_atc_medium |lm_eval/tasks/med_concepts_qa/med_concepts_qa_atc_medium.yaml |multiple_choice | |med_concepts_qa_icd10cm_easy |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10cm_easy.yaml |multiple_choice | |med_concepts_qa_icd10cm_hard |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10cm_hard.yaml |multiple_choice | |med_concepts_qa_icd10cm_medium |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10cm_medium.yaml |multiple_choice | |med_concepts_qa_icd10proc_easy |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10proc_easy.yaml |multiple_choice | |med_concepts_qa_icd10proc_hard |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10proc_hard.yaml |multiple_choice | |med_concepts_qa_icd10proc_medium |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10proc_medium.yaml |multiple_choice | |med_concepts_qa_icd9cm_easy |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9cm_easy.yaml |multiple_choice | |med_concepts_qa_icd9cm_hard |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9cm_hard.yaml |multiple_choice | |med_concepts_qa_icd9cm_medium |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9cm_medium.yaml |multiple_choice | |med_concepts_qa_icd9proc_easy |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9proc_easy.yaml |multiple_choice | |med_concepts_qa_icd9proc_hard |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9proc_hard.yaml |multiple_choice | |med_concepts_qa_icd9proc_medium |lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9proc_medium.yaml |multiple_choice | |medmcqa |lm_eval/tasks/medmcqa/medmcqa.yaml |multiple_choice | |medqa_4options |lm_eval/tasks/medqa/medqa.yaml |multiple_choice | |mgsm_direct_bn |lm_eval/tasks/mgsm/direct/mgsm_direct_bn.yaml |generate_until | |mgsm_direct_de |lm_eval/tasks/mgsm/direct/mgsm_direct_de.yaml |generate_until | |mgsm_direct_en |lm_eval/tasks/mgsm/direct/mgsm_direct_en.yaml |generate_until | |mgsm_direct_es |lm_eval/tasks/mgsm/direct/mgsm_direct_es.yaml |generate_until | |mgsm_direct_fr |lm_eval/tasks/mgsm/direct/mgsm_direct_fr.yaml |generate_until | |mgsm_direct_ja |lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml |generate_until | |mgsm_direct_ru |lm_eval/tasks/mgsm/direct/mgsm_direct_ru.yaml |generate_until | |mgsm_direct_sw |lm_eval/tasks/mgsm/direct/mgsm_direct_sw.yaml |generate_until | |mgsm_direct_te |lm_eval/tasks/mgsm/direct/mgsm_direct_te.yaml |generate_until | |mgsm_direct_th |lm_eval/tasks/mgsm/direct/mgsm_direct_th.yaml |generate_until | |mgsm_direct_zh |lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml |generate_until | |mgsm_en_cot_bn |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_bn.yaml |generate_until | |mgsm_en_cot_de |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_de.yaml |generate_until | |mgsm_en_cot_en |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_en.yaml |generate_until | |mgsm_en_cot_es |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_es.yaml |generate_until | |mgsm_en_cot_fr |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_fr.yaml |generate_until | |mgsm_en_cot_ja |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml |generate_until | |mgsm_en_cot_ru |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ru.yaml |generate_until | |mgsm_en_cot_sw |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml |generate_until | |mgsm_en_cot_te |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml |generate_until | |mgsm_en_cot_th |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml |generate_until | |mgsm_en_cot_zh |lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml |generate_until | |mgsm_native_cot_bn |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml |generate_until | |mgsm_native_cot_de |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml |generate_until | |mgsm_native_cot_en |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml |generate_until | |mgsm_native_cot_es |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml |generate_until | |mgsm_native_cot_fr |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml |generate_until | |mgsm_native_cot_ja |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml |generate_until | |mgsm_native_cot_ru |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml |generate_until | |mgsm_native_cot_sw |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml |generate_until | |mgsm_native_cot_te |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml |generate_until | |mgsm_native_cot_th |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml |generate_until | |mgsm_native_cot_zh |lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml |generate_until | |minerva_math_algebra |lm_eval/tasks/minerva_math/minerva_math_algebra.yaml |generate_until | |minerva_math_counting_and_prob |lm_eval/tasks/minerva_math/minerva_math_counting_and_prob.yaml |generate_until | |minerva_math_geometry |lm_eval/tasks/minerva_math/minerva_math_geometry.yaml |generate_until | |minerva_math_intermediate_algebra |lm_eval/tasks/minerva_math/minerva_math_intermediate_algebra.yaml |generate_until | |minerva_math_num_theory |lm_eval/tasks/minerva_math/minerva_math_num_theory.yaml |generate_until | |minerva_math_prealgebra |lm_eval/tasks/minerva_math/minerva_math_prealgebra.yaml |generate_until | |minerva_math_precalc |lm_eval/tasks/minerva_math/minerva_math_precalc.yaml |generate_until | |mmlu_abstract_algebra |lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml |multiple_choice | |mmlu_abstract_algebra_generative |lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml |generate_until | |mmlu_anatomy |lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml |multiple_choice | |mmlu_anatomy_generative |lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml |generate_until | |mmlu_astronomy |lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml |multiple_choice | |mmlu_astronomy_generative |lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml |generate_until | |mmlu_business_ethics |lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml |multiple_choice | |mmlu_business_ethics_generative |lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml |generate_until | |mmlu_clinical_knowledge |lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml |multiple_choice | |mmlu_clinical_knowledge_generative |lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml |generate_until | |mmlu_college_biology |lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml |multiple_choice | |mmlu_college_biology_generative |lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml |generate_until | |mmlu_college_chemistry |lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml |multiple_choice | |mmlu_college_chemistry_generative |lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml |generate_until | |mmlu_college_computer_science |lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml |multiple_choice | |mmlu_college_computer_science_generative |lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml |generate_until | |mmlu_college_mathematics |lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml |multiple_choice | |mmlu_college_mathematics_generative |lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml |generate_until | |mmlu_college_medicine |lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml |multiple_choice | |mmlu_college_medicine_generative |lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml |generate_until | |mmlu_college_physics |lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml |multiple_choice | |mmlu_college_physics_generative |lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml |generate_until | |mmlu_computer_security |lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml |multiple_choice | |mmlu_computer_security_generative |lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml |generate_until | |mmlu_conceptual_physics |lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml |multiple_choice | |mmlu_conceptual_physics_generative |lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml |generate_until | |mmlu_continuation_abstract_algebra |lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml |multiple_choice | |mmlu_continuation_anatomy |lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml |multiple_choice | |mmlu_continuation_astronomy |lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml |multiple_choice | |mmlu_continuation_business_ethics |lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml |multiple_choice | |mmlu_continuation_clinical_knowledge |lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml |multiple_choice | |mmlu_continuation_college_biology |lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml |multiple_choice | |mmlu_continuation_college_chemistry |lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml |multiple_choice | |mmlu_continuation_college_computer_science |lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml |multiple_choice | |mmlu_continuation_college_mathematics |lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml |multiple_choice | |mmlu_continuation_college_medicine |lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml |multiple_choice | |mmlu_continuation_college_physics |lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml |multiple_choice | |mmlu_continuation_computer_security |lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml |multiple_choice | |mmlu_continuation_conceptual_physics |lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml |multiple_choice | |mmlu_continuation_econometrics |lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml |multiple_choice | |mmlu_continuation_electrical_engineering |lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml |multiple_choice | |mmlu_continuation_elementary_mathematics |lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml |multiple_choice | |mmlu_continuation_formal_logic |lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml |multiple_choice | |mmlu_continuation_global_facts |lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml |multiple_choice | |mmlu_continuation_high_school_biology |lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml |multiple_choice | |mmlu_continuation_high_school_chemistry |lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml |multiple_choice | |mmlu_continuation_high_school_computer_science |lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml |multiple_choice | |mmlu_continuation_high_school_european_history |lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml |multiple_choice | |mmlu_continuation_high_school_geography |lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml |multiple_choice | |mmlu_continuation_high_school_government_and_politics |lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml |multiple_choice | |mmlu_continuation_high_school_macroeconomics |lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml |multiple_choice | |mmlu_continuation_high_school_mathematics |lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml |multiple_choice | |mmlu_continuation_high_school_microeconomics |lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml |multiple_choice | |mmlu_continuation_high_school_physics |lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml |multiple_choice | |mmlu_continuation_high_school_psychology |lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml |multiple_choice | |mmlu_continuation_high_school_statistics |lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml |multiple_choice | |mmlu_continuation_high_school_us_history |lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml |multiple_choice | |mmlu_continuation_high_school_world_history |lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml |multiple_choice | |mmlu_continuation_human_aging |lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml |multiple_choice | |mmlu_continuation_human_sexuality |lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml |multiple_choice | |mmlu_continuation_international_law |lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml |multiple_choice | |mmlu_continuation_jurisprudence |lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml |multiple_choice | |mmlu_continuation_logical_fallacies |lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml |multiple_choice | |mmlu_continuation_machine_learning |lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml |multiple_choice | |mmlu_continuation_management |lm_eval/tasks/mmlu/continuation/mmlu_management.yaml |multiple_choice | |mmlu_continuation_marketing |lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml |multiple_choice | |mmlu_continuation_medical_genetics |lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml |multiple_choice | |mmlu_continuation_miscellaneous |lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml |multiple_choice | |mmlu_continuation_moral_disputes |lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml |multiple_choice | |mmlu_continuation_moral_scenarios |lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml |multiple_choice | |mmlu_continuation_nutrition |lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml |multiple_choice | |mmlu_continuation_philosophy |lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml |multiple_choice | |mmlu_continuation_prehistory |lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml |multiple_choice | |mmlu_continuation_professional_accounting |lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml |multiple_choice | |mmlu_continuation_professional_law |lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml |multiple_choice | |mmlu_continuation_professional_medicine |lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml |multiple_choice | |mmlu_continuation_professional_psychology |lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml |multiple_choice | |mmlu_continuation_public_relations |lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml |multiple_choice | |mmlu_continuation_security_studies |lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml |multiple_choice | |mmlu_continuation_sociology |lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml |multiple_choice | |mmlu_continuation_us_foreign_policy |lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml |multiple_choice | |mmlu_continuation_virology |lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml |multiple_choice | |mmlu_continuation_world_religions |lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml |multiple_choice | |mmlu_econometrics |lm_eval/tasks/mmlu/default/mmlu_econometrics.yaml |multiple_choice | |mmlu_econometrics_generative |lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml |generate_until | |mmlu_electrical_engineering |lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml |multiple_choice | |mmlu_electrical_engineering_generative |lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml |generate_until | |mmlu_elementary_mathematics |lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml |multiple_choice | |mmlu_elementary_mathematics_generative |lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml |generate_until | |mmlu_flan_cot_fewshot_abstract_algebra |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_abstract_algebra.yaml |generate_until | |mmlu_flan_cot_fewshot_anatomy |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_anatomy.yaml |generate_until | |mmlu_flan_cot_fewshot_astronomy |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_astronomy.yaml |generate_until | |mmlu_flan_cot_fewshot_business_ethics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_business_ethics.yaml |generate_until | |mmlu_flan_cot_fewshot_clinical_knowledge |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_clinical_knowledge.yaml |generate_until | |mmlu_flan_cot_fewshot_college_biology |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_biology.yaml |generate_until | |mmlu_flan_cot_fewshot_college_chemistry |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_chemistry.yaml |generate_until | |mmlu_flan_cot_fewshot_college_computer_science |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_computer_science.yaml |generate_until | |mmlu_flan_cot_fewshot_college_mathematics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_mathematics.yaml |generate_until | |mmlu_flan_cot_fewshot_college_medicine |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_medicine.yaml |generate_until | |mmlu_flan_cot_fewshot_college_physics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_physics.yaml |generate_until | |mmlu_flan_cot_fewshot_computer_security |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_computer_security.yaml |generate_until | |mmlu_flan_cot_fewshot_conceptual_physics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_conceptual_physics.yaml |generate_until | |mmlu_flan_cot_fewshot_econometrics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_econometrics.yaml |generate_until | |mmlu_flan_cot_fewshot_electrical_engineering |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_electrical_engineering.yaml |generate_until | |mmlu_flan_cot_fewshot_elementary_mathematics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_elementary_mathematics.yaml |generate_until | |mmlu_flan_cot_fewshot_formal_logic |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_formal_logic.yaml |generate_until | |mmlu_flan_cot_fewshot_global_facts |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_global_facts.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_biology |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_biology.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_chemistry |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_chemistry.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_computer_science |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_computer_science.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_european_history |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_european_history.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_geography |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_geography.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_government_and_politics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_government_and_politics.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_macroeconomics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_macroeconomics.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_mathematics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_mathematics.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_microeconomics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_microeconomics.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_physics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_physics.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_psychology |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_psychology.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_statistics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_statistics.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_us_history |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_us_history.yaml |generate_until | |mmlu_flan_cot_fewshot_high_school_world_history |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_world_history.yaml |generate_until | |mmlu_flan_cot_fewshot_human_aging |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_human_aging.yaml |generate_until | |mmlu_flan_cot_fewshot_human_sexuality |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_human_sexuality.yaml |generate_until | |mmlu_flan_cot_fewshot_international_law |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_international_law.yaml |generate_until | |mmlu_flan_cot_fewshot_jurisprudence |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_jurisprudence.yaml |generate_until | |mmlu_flan_cot_fewshot_logical_fallacies |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_logical_fallacies.yaml |generate_until | |mmlu_flan_cot_fewshot_machine_learning |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_machine_learning.yaml |generate_until | |mmlu_flan_cot_fewshot_management |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_management.yaml |generate_until | |mmlu_flan_cot_fewshot_marketing |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_marketing.yaml |generate_until | |mmlu_flan_cot_fewshot_medical_genetics |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_medical_genetics.yaml |generate_until | |mmlu_flan_cot_fewshot_miscellaneous |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_miscellaneous.yaml |generate_until | |mmlu_flan_cot_fewshot_moral_disputes |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_moral_disputes.yaml |generate_until | |mmlu_flan_cot_fewshot_moral_scenarios |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_moral_scenarios.yaml |generate_until | |mmlu_flan_cot_fewshot_nutrition |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_nutrition.yaml |generate_until | |mmlu_flan_cot_fewshot_philosophy |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_philosophy.yaml |generate_until | |mmlu_flan_cot_fewshot_prehistory |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_prehistory.yaml |generate_until | |mmlu_flan_cot_fewshot_professional_accounting |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_accounting.yaml |generate_until | |mmlu_flan_cot_fewshot_professional_law |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_law.yaml |generate_until | |mmlu_flan_cot_fewshot_professional_medicine |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_medicine.yaml |generate_until | |mmlu_flan_cot_fewshot_professional_psychology |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_psychology.yaml |generate_until | |mmlu_flan_cot_fewshot_public_relations |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_public_relations.yaml |generate_until | |mmlu_flan_cot_fewshot_security_studies |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_security_studies.yaml |generate_until | |mmlu_flan_cot_fewshot_sociology |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_sociology.yaml |generate_until | |mmlu_flan_cot_fewshot_us_foreign_policy |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_us_foreign_policy.yaml |generate_until | |mmlu_flan_cot_fewshot_virology |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_virology.yaml |generate_until | |mmlu_flan_cot_fewshot_world_religions |lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_world_religions.yaml |generate_until | |mmlu_flan_cot_zeroshot_abstract_algebra |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml |generate_until | |mmlu_flan_cot_zeroshot_anatomy |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml |generate_until | |mmlu_flan_cot_zeroshot_astronomy |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml |generate_until | |mmlu_flan_cot_zeroshot_business_ethics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml |generate_until | |mmlu_flan_cot_zeroshot_clinical_knowledge |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml |generate_until | |mmlu_flan_cot_zeroshot_college_biology |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml |generate_until | |mmlu_flan_cot_zeroshot_college_chemistry |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml |generate_until | |mmlu_flan_cot_zeroshot_college_computer_science |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml |generate_until | |mmlu_flan_cot_zeroshot_college_mathematics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml |generate_until | |mmlu_flan_cot_zeroshot_college_medicine |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml |generate_until | |mmlu_flan_cot_zeroshot_college_physics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml |generate_until | |mmlu_flan_cot_zeroshot_computer_security |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml |generate_until | |mmlu_flan_cot_zeroshot_conceptual_physics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml |generate_until | |mmlu_flan_cot_zeroshot_econometrics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml |generate_until | |mmlu_flan_cot_zeroshot_electrical_engineering |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml |generate_until | |mmlu_flan_cot_zeroshot_elementary_mathematics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml |generate_until | |mmlu_flan_cot_zeroshot_formal_logic |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml |generate_until | |mmlu_flan_cot_zeroshot_global_facts |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_biology |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_chemistry |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_computer_science |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_european_history |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_geography |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_government_and_politics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_macroeconomics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_mathematics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_microeconomics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_physics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_psychology |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_statistics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_us_history |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml |generate_until | |mmlu_flan_cot_zeroshot_high_school_world_history |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml |generate_until | |mmlu_flan_cot_zeroshot_human_aging |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml |generate_until | |mmlu_flan_cot_zeroshot_human_sexuality |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml |generate_until | |mmlu_flan_cot_zeroshot_international_law |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml |generate_until | |mmlu_flan_cot_zeroshot_jurisprudence |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml |generate_until | |mmlu_flan_cot_zeroshot_logical_fallacies |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml |generate_until | |mmlu_flan_cot_zeroshot_machine_learning |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml |generate_until | |mmlu_flan_cot_zeroshot_management |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml |generate_until | |mmlu_flan_cot_zeroshot_marketing |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml |generate_until | |mmlu_flan_cot_zeroshot_medical_genetics |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml |generate_until | |mmlu_flan_cot_zeroshot_miscellaneous |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml |generate_until | |mmlu_flan_cot_zeroshot_moral_disputes |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml |generate_until | |mmlu_flan_cot_zeroshot_moral_scenarios |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml |generate_until | |mmlu_flan_cot_zeroshot_nutrition |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml |generate_until | |mmlu_flan_cot_zeroshot_philosophy |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml |generate_until | |mmlu_flan_cot_zeroshot_prehistory |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml |generate_until | |mmlu_flan_cot_zeroshot_professional_accounting |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml |generate_until | |mmlu_flan_cot_zeroshot_professional_law |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml |generate_until | |mmlu_flan_cot_zeroshot_professional_medicine |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml |generate_until | |mmlu_flan_cot_zeroshot_professional_psychology |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml |generate_until | |mmlu_flan_cot_zeroshot_public_relations |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml |generate_until | |mmlu_flan_cot_zeroshot_security_studies |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml |generate_until | |mmlu_flan_cot_zeroshot_sociology |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml |generate_until | |mmlu_flan_cot_zeroshot_us_foreign_policy |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml |generate_until | |mmlu_flan_cot_zeroshot_virology |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml |generate_until | |mmlu_flan_cot_zeroshot_world_religions |lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml |generate_until | |mmlu_flan_n_shot_generative_abstract_algebra |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml |generate_until | |mmlu_flan_n_shot_generative_anatomy |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml |generate_until | |mmlu_flan_n_shot_generative_astronomy |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml |generate_until | |mmlu_flan_n_shot_generative_business_ethics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml |generate_until | |mmlu_flan_n_shot_generative_clinical_knowledge |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml |generate_until | |mmlu_flan_n_shot_generative_college_biology |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml |generate_until | |mmlu_flan_n_shot_generative_college_chemistry |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml |generate_until | |mmlu_flan_n_shot_generative_college_computer_science |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml |generate_until | |mmlu_flan_n_shot_generative_college_mathematics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml |generate_until | |mmlu_flan_n_shot_generative_college_medicine |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml |generate_until | |mmlu_flan_n_shot_generative_college_physics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml |generate_until | |mmlu_flan_n_shot_generative_computer_security |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml |generate_until | |mmlu_flan_n_shot_generative_conceptual_physics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml |generate_until | |mmlu_flan_n_shot_generative_econometrics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml |generate_until | |mmlu_flan_n_shot_generative_electrical_engineering |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml |generate_until | |mmlu_flan_n_shot_generative_elementary_mathematics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml |generate_until | |mmlu_flan_n_shot_generative_formal_logic |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml |generate_until | |mmlu_flan_n_shot_generative_global_facts |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_biology |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_chemistry |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_computer_science |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_european_history |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_geography |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_government_and_politics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_macroeconomics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_mathematics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_microeconomics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_physics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_psychology |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_statistics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_us_history |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml |generate_until | |mmlu_flan_n_shot_generative_high_school_world_history |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml |generate_until | |mmlu_flan_n_shot_generative_human_aging |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml |generate_until | |mmlu_flan_n_shot_generative_human_sexuality |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml |generate_until | |mmlu_flan_n_shot_generative_international_law |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml |generate_until | |mmlu_flan_n_shot_generative_jurisprudence |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml |generate_until | |mmlu_flan_n_shot_generative_logical_fallacies |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml |generate_until | |mmlu_flan_n_shot_generative_machine_learning |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml |generate_until | |mmlu_flan_n_shot_generative_management |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml |generate_until | |mmlu_flan_n_shot_generative_marketing |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml |generate_until | |mmlu_flan_n_shot_generative_medical_genetics |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml |generate_until | |mmlu_flan_n_shot_generative_miscellaneous |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml |generate_until | |mmlu_flan_n_shot_generative_moral_disputes |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml |generate_until | |mmlu_flan_n_shot_generative_moral_scenarios |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml |generate_until | |mmlu_flan_n_shot_generative_nutrition |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml |generate_until | |mmlu_flan_n_shot_generative_philosophy |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml |generate_until | |mmlu_flan_n_shot_generative_prehistory |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml |generate_until | |mmlu_flan_n_shot_generative_professional_accounting |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml |generate_until | |mmlu_flan_n_shot_generative_professional_law |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml |generate_until | |mmlu_flan_n_shot_generative_professional_medicine |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml |generate_until | |mmlu_flan_n_shot_generative_professional_psychology |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml |generate_until | |mmlu_flan_n_shot_generative_public_relations |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml |generate_until | |mmlu_flan_n_shot_generative_security_studies |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml |generate_until | |mmlu_flan_n_shot_generative_sociology |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml |generate_until | |mmlu_flan_n_shot_generative_us_foreign_policy |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml |generate_until | |mmlu_flan_n_shot_generative_virology |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml |generate_until | |mmlu_flan_n_shot_generative_world_religions |lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml |generate_until | |mmlu_flan_n_shot_loglikelihood_abstract_algebra |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_anatomy |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_astronomy |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_business_ethics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_clinical_knowledge |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_college_biology |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_college_chemistry |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_college_computer_science |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_college_mathematics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_college_medicine |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_college_physics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_computer_security |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_conceptual_physics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_econometrics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_electrical_engineering |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_elementary_mathematics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_formal_logic |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_global_facts |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_biology |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_chemistry |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_computer_science |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_european_history |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_geography |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_government_and_politics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_macroeconomics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_mathematics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_microeconomics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_physics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_psychology |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_statistics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_us_history |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_high_school_world_history |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_human_aging |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_human_sexuality |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_international_law |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_jurisprudence |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_logical_fallacies |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_machine_learning |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_management |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_marketing |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_medical_genetics |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_miscellaneous |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_moral_disputes |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_moral_scenarios |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_nutrition |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_philosophy |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_prehistory |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_professional_accounting |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_professional_law |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_professional_medicine |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_professional_psychology |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_public_relations |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_security_studies |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_sociology |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_us_foreign_policy |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_virology |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml |multiple_choice | |mmlu_flan_n_shot_loglikelihood_world_religions |lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml |multiple_choice | |mmlu_formal_logic |lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml |multiple_choice | |mmlu_formal_logic_generative |lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml |generate_until | |mmlu_global_facts |lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml |multiple_choice | |mmlu_global_facts_generative |lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml |generate_until | |mmlu_high_school_biology |lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml |multiple_choice | |mmlu_high_school_biology_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml |generate_until | |mmlu_high_school_chemistry |lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml |multiple_choice | |mmlu_high_school_chemistry_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml |generate_until | |mmlu_high_school_computer_science |lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml |multiple_choice | |mmlu_high_school_computer_science_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml |generate_until | |mmlu_high_school_european_history |lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml |multiple_choice | |mmlu_high_school_european_history_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml |generate_until | |mmlu_high_school_geography |lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml |multiple_choice | |mmlu_high_school_geography_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml |generate_until | |mmlu_high_school_government_and_politics |lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml |multiple_choice | |mmlu_high_school_government_and_politics_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml |generate_until | |mmlu_high_school_macroeconomics |lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml |multiple_choice | |mmlu_high_school_macroeconomics_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml |generate_until | |mmlu_high_school_mathematics |lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml |multiple_choice | |mmlu_high_school_mathematics_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml |generate_until | |mmlu_high_school_microeconomics |lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml |multiple_choice | |mmlu_high_school_microeconomics_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml |generate_until | |mmlu_high_school_physics |lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml |multiple_choice | |mmlu_high_school_physics_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml |generate_until | |mmlu_high_school_psychology |lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml |multiple_choice | |mmlu_high_school_psychology_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml |generate_until | |mmlu_high_school_statistics |lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml |multiple_choice | |mmlu_high_school_statistics_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml |generate_until | |mmlu_high_school_us_history |lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml |multiple_choice | |mmlu_high_school_us_history_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml |generate_until | |mmlu_high_school_world_history |lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml |multiple_choice | |mmlu_high_school_world_history_generative |lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml |generate_until | |mmlu_human_aging |lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml |multiple_choice | |mmlu_human_aging_generative |lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml |generate_until | |mmlu_human_sexuality |lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml |multiple_choice | |mmlu_human_sexuality_generative |lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml |generate_until | |mmlu_international_law |lm_eval/tasks/mmlu/default/mmlu_international_law.yaml |multiple_choice | |mmlu_international_law_generative |lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml |generate_until | |mmlu_jurisprudence |lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml |multiple_choice | |mmlu_jurisprudence_generative |lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml |generate_until | |mmlu_logical_fallacies |lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml |multiple_choice | |mmlu_logical_fallacies_generative |lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml |generate_until | |mmlu_machine_learning |lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml |multiple_choice | |mmlu_machine_learning_generative |lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml |generate_until | |mmlu_management |lm_eval/tasks/mmlu/default/mmlu_management.yaml |multiple_choice | |mmlu_management_generative |lm_eval/tasks/mmlu/generative/mmlu_management.yaml |generate_until | |mmlu_marketing |lm_eval/tasks/mmlu/default/mmlu_marketing.yaml |multiple_choice | |mmlu_marketing_generative |lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml |generate_until | |mmlu_medical_genetics |lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml |multiple_choice | |mmlu_medical_genetics_generative |lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml |generate_until | |mmlu_miscellaneous |lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml |multiple_choice | |mmlu_miscellaneous_generative |lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml |generate_until | |mmlu_moral_disputes |lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml |multiple_choice | |mmlu_moral_disputes_generative |lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml |generate_until | |mmlu_moral_scenarios |lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml |multiple_choice | |mmlu_moral_scenarios_generative |lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml |generate_until | |mmlu_nutrition |lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml |multiple_choice | |mmlu_nutrition_generative |lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml |generate_until | |mmlu_philosophy |lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml |multiple_choice | |mmlu_philosophy_generative |lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml |generate_until | |mmlu_prehistory |lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml |multiple_choice | |mmlu_prehistory_generative |lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml |generate_until | |mmlu_pro_biology |lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml |generate_until | |mmlu_pro_business |lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml |generate_until | |mmlu_pro_chemistry |lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml |generate_until | |mmlu_pro_computer_science |lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml |generate_until | |mmlu_pro_economics |lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml |generate_until | |mmlu_pro_engineering |lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml |generate_until | |mmlu_pro_health |lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml |generate_until | |mmlu_pro_history |lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml |generate_until | |mmlu_pro_law |lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml |generate_until | |mmlu_pro_math |lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml |generate_until | |mmlu_pro_other |lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml |generate_until | |mmlu_pro_philosophy |lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml |generate_until | |mmlu_pro_physics |lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml |generate_until | |mmlu_pro_psychology |lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml |generate_until | |mmlu_professional_accounting |lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml |multiple_choice | |mmlu_professional_accounting_generative |lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml |generate_until | |mmlu_professional_law |lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml |multiple_choice | |mmlu_professional_law_generative |lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml |generate_until | |mmlu_professional_medicine |lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml |multiple_choice | |mmlu_professional_medicine_generative |lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml |generate_until | |mmlu_professional_psychology |lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml |multiple_choice | |mmlu_professional_psychology_generative |lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml |generate_until | |mmlu_public_relations |lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml |multiple_choice | |mmlu_public_relations_generative |lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml |generate_until | |mmlu_security_studies |lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml |multiple_choice | |mmlu_security_studies_generative |lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml |generate_until | |mmlu_sociology |lm_eval/tasks/mmlu/default/mmlu_sociology.yaml |multiple_choice | |mmlu_sociology_generative |lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml |generate_until | |mmlu_us_foreign_policy |lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml |multiple_choice | |mmlu_us_foreign_policy_generative |lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml |generate_until | |mmlu_virology |lm_eval/tasks/mmlu/default/mmlu_virology.yaml |multiple_choice | |mmlu_virology_generative |lm_eval/tasks/mmlu/generative/mmlu_virology.yaml |generate_until | |mmlu_world_religions |lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml |multiple_choice | |mmlu_world_religions_generative |lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml |generate_until | |mmlusr_answer_only_abstract_algebra |lm_eval/tasks/mmlusr/answer_only/answer_only_abstract_algebra.yaml |multiple_choice | |mmlusr_answer_only_anatomy |lm_eval/tasks/mmlusr/answer_only/answer_only_anatomy.yaml |multiple_choice | |mmlusr_answer_only_astronomy |lm_eval/tasks/mmlusr/answer_only/answer_only_astronomy.yaml |multiple_choice | |mmlusr_answer_only_business_ethics |lm_eval/tasks/mmlusr/answer_only/answer_only_business_ethics.yaml |multiple_choice | |mmlusr_answer_only_clinical_knowledge |lm_eval/tasks/mmlusr/answer_only/answer_only_clinical_knowledge.yaml |multiple_choice | |mmlusr_answer_only_college_biology |lm_eval/tasks/mmlusr/answer_only/answer_only_college_biology.yaml |multiple_choice | |mmlusr_answer_only_college_chemistry |lm_eval/tasks/mmlusr/answer_only/answer_only_college_chemistry.yaml |multiple_choice | |mmlusr_answer_only_college_computer_science |lm_eval/tasks/mmlusr/answer_only/answer_only_college_computer_science.yaml |multiple_choice | |mmlusr_answer_only_college_mathematics |lm_eval/tasks/mmlusr/answer_only/answer_only_college_mathematics.yaml |multiple_choice | |mmlusr_answer_only_college_medicine |lm_eval/tasks/mmlusr/answer_only/answer_only_college_medicine.yaml |multiple_choice | |mmlusr_answer_only_college_physics |lm_eval/tasks/mmlusr/answer_only/answer_only_college_physics.yaml |multiple_choice | |mmlusr_answer_only_computer_security |lm_eval/tasks/mmlusr/answer_only/answer_only_computer_security.yaml |multiple_choice | |mmlusr_answer_only_conceptual_physics |lm_eval/tasks/mmlusr/answer_only/answer_only_conceptual_physics.yaml |multiple_choice | |mmlusr_answer_only_econometrics |lm_eval/tasks/mmlusr/answer_only/answer_only_econometrics.yaml |multiple_choice | |mmlusr_answer_only_electrical_engineering |lm_eval/tasks/mmlusr/answer_only/answer_only_electrical_engineering.yaml |multiple_choice | |mmlusr_answer_only_elementary_mathematics |lm_eval/tasks/mmlusr/answer_only/answer_only_elementary_mathematics.yaml |multiple_choice | |mmlusr_answer_only_formal_logic |lm_eval/tasks/mmlusr/answer_only/answer_only_formal_logic.yaml |multiple_choice | |mmlusr_answer_only_global_facts |lm_eval/tasks/mmlusr/answer_only/answer_only_global_facts.yaml |multiple_choice | |mmlusr_answer_only_high_school_biology |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_biology.yaml |multiple_choice | |mmlusr_answer_only_high_school_chemistry |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_chemistry.yaml |multiple_choice | |mmlusr_answer_only_high_school_computer_science |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_computer_science.yaml |multiple_choice | |mmlusr_answer_only_high_school_european_history |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_european_history.yaml |multiple_choice | |mmlusr_answer_only_high_school_geography |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_geography.yaml |multiple_choice | |mmlusr_answer_only_high_school_government_and_politics |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_government_and_politics.yaml |multiple_choice | |mmlusr_answer_only_high_school_macroeconomics |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_macroeconomics.yaml |multiple_choice | |mmlusr_answer_only_high_school_mathematics |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_mathematics.yaml |multiple_choice | |mmlusr_answer_only_high_school_microeconomics |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_microeconomics.yaml |multiple_choice | |mmlusr_answer_only_high_school_physics |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_physics.yaml |multiple_choice | |mmlusr_answer_only_high_school_psychology |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_psychology.yaml |multiple_choice | |mmlusr_answer_only_high_school_statistics |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_statistics.yaml |multiple_choice | |mmlusr_answer_only_high_school_us_history |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_us_history.yaml |multiple_choice | |mmlusr_answer_only_high_school_world_history |lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_world_history.yaml |multiple_choice | |mmlusr_answer_only_human_aging |lm_eval/tasks/mmlusr/answer_only/answer_only_human_aging.yaml |multiple_choice | |mmlusr_answer_only_human_sexuality |lm_eval/tasks/mmlusr/answer_only/answer_only_human_sexuality.yaml |multiple_choice | |mmlusr_answer_only_international_law |lm_eval/tasks/mmlusr/answer_only/answer_only_international_law.yaml |multiple_choice | |mmlusr_answer_only_jurisprudence |lm_eval/tasks/mmlusr/answer_only/answer_only_jurisprudence.yaml |multiple_choice | |mmlusr_answer_only_logical_fallacies |lm_eval/tasks/mmlusr/answer_only/answer_only_logical_fallacies.yaml |multiple_choice | |mmlusr_answer_only_machine_learning |lm_eval/tasks/mmlusr/answer_only/answer_only_machine_learning.yaml |multiple_choice | |mmlusr_answer_only_management |lm_eval/tasks/mmlusr/answer_only/answer_only_management.yaml |multiple_choice | |mmlusr_answer_only_marketing |lm_eval/tasks/mmlusr/answer_only/answer_only_marketing.yaml |multiple_choice | |mmlusr_answer_only_medical_genetics |lm_eval/tasks/mmlusr/answer_only/answer_only_medical_genetics.yaml |multiple_choice | |mmlusr_answer_only_miscellaneous |lm_eval/tasks/mmlusr/answer_only/answer_only_miscellaneous.yaml |multiple_choice | |mmlusr_answer_only_moral_disputes |lm_eval/tasks/mmlusr/answer_only/answer_only_moral_disputes.yaml |multiple_choice | |mmlusr_answer_only_moral_scenarios |lm_eval/tasks/mmlusr/answer_only/answer_only_moral_scenarios.yaml |multiple_choice | |mmlusr_answer_only_nutrition |lm_eval/tasks/mmlusr/answer_only/answer_only_nutrition.yaml |multiple_choice | |mmlusr_answer_only_philosophy |lm_eval/tasks/mmlusr/answer_only/answer_only_philosophy.yaml |multiple_choice | |mmlusr_answer_only_prehistory |lm_eval/tasks/mmlusr/answer_only/answer_only_prehistory.yaml |multiple_choice | |mmlusr_answer_only_professional_accounting |lm_eval/tasks/mmlusr/answer_only/answer_only_professional_accounting.yaml |multiple_choice | |mmlusr_answer_only_professional_law |lm_eval/tasks/mmlusr/answer_only/answer_only_professional_law.yaml |multiple_choice | |mmlusr_answer_only_professional_medicine |lm_eval/tasks/mmlusr/answer_only/answer_only_professional_medicine.yaml |multiple_choice | |mmlusr_answer_only_professional_psychology |lm_eval/tasks/mmlusr/answer_only/answer_only_professional_psychology.yaml |multiple_choice | |mmlusr_answer_only_public_relations |lm_eval/tasks/mmlusr/answer_only/answer_only_public_relations.yaml |multiple_choice | |mmlusr_answer_only_security_studies |lm_eval/tasks/mmlusr/answer_only/answer_only_security_studies.yaml |multiple_choice | |mmlusr_answer_only_sociology |lm_eval/tasks/mmlusr/answer_only/answer_only_sociology.yaml |multiple_choice | |mmlusr_answer_only_us_foreign_policy |lm_eval/tasks/mmlusr/answer_only/answer_only_us_foreign_policy.yaml |multiple_choice | |mmlusr_answer_only_virology |lm_eval/tasks/mmlusr/answer_only/answer_only_virology.yaml |multiple_choice | |mmlusr_answer_only_world_religions |lm_eval/tasks/mmlusr/answer_only/answer_only_world_religions.yaml |multiple_choice | |mmlusr_question_and_answer_abstract_algebra |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_abstract_algebra.yaml |multiple_choice | |mmlusr_question_and_answer_anatomy |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_anatomy.yaml |multiple_choice | |mmlusr_question_and_answer_astronomy |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_astronomy.yaml |multiple_choice | |mmlusr_question_and_answer_business_ethics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_business_ethics.yaml |multiple_choice | |mmlusr_question_and_answer_clinical_knowledge |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_clinical_knowledge.yaml |multiple_choice | |mmlusr_question_and_answer_college_biology |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_biology.yaml |multiple_choice | |mmlusr_question_and_answer_college_chemistry |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_chemistry.yaml |multiple_choice | |mmlusr_question_and_answer_college_computer_science |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_computer_science.yaml |multiple_choice | |mmlusr_question_and_answer_college_mathematics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_mathematics.yaml |multiple_choice | |mmlusr_question_and_answer_college_medicine |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_medicine.yaml |multiple_choice | |mmlusr_question_and_answer_college_physics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_physics.yaml |multiple_choice | |mmlusr_question_and_answer_computer_security |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_computer_security.yaml |multiple_choice | |mmlusr_question_and_answer_conceptual_physics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_conceptual_physics.yaml |multiple_choice | |mmlusr_question_and_answer_econometrics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_econometrics.yaml |multiple_choice | |mmlusr_question_and_answer_electrical_engineering |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_electrical_engineering.yaml |multiple_choice | |mmlusr_question_and_answer_elementary_mathematics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_elementary_mathematics.yaml |multiple_choice | |mmlusr_question_and_answer_formal_logic |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_formal_logic.yaml |multiple_choice | |mmlusr_question_and_answer_global_facts |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_global_facts.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_biology |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_biology.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_chemistry |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_chemistry.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_computer_science |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_computer_science.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_european_history |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_european_history.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_geography |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_geography.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_government_and_politics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_government_and_politics.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_macroeconomics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_macroeconomics.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_mathematics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_mathematics.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_microeconomics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_microeconomics.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_physics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_physics.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_psychology |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_psychology.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_statistics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_statistics.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_us_history |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_us_history.yaml |multiple_choice | |mmlusr_question_and_answer_high_school_world_history |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_world_history.yaml |multiple_choice | |mmlusr_question_and_answer_human_aging |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_aging.yaml |multiple_choice | |mmlusr_question_and_answer_human_sexuality |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_sexuality.yaml |multiple_choice | |mmlusr_question_and_answer_international_law |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_international_law.yaml |multiple_choice | |mmlusr_question_and_answer_jurisprudence |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_jurisprudence.yaml |multiple_choice | |mmlusr_question_and_answer_logical_fallacies |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_logical_fallacies.yaml |multiple_choice | |mmlusr_question_and_answer_machine_learning |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_machine_learning.yaml |multiple_choice | |mmlusr_question_and_answer_management |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_management.yaml |multiple_choice | |mmlusr_question_and_answer_marketing |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_marketing.yaml |multiple_choice | |mmlusr_question_and_answer_medical_genetics |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_medical_genetics.yaml |multiple_choice | |mmlusr_question_and_answer_miscellaneous |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_miscellaneous.yaml |multiple_choice | |mmlusr_question_and_answer_moral_disputes |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_disputes.yaml |multiple_choice | |mmlusr_question_and_answer_moral_scenarios |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_scenarios.yaml |multiple_choice | |mmlusr_question_and_answer_nutrition |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_nutrition.yaml |multiple_choice | |mmlusr_question_and_answer_philosophy |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_philosophy.yaml |multiple_choice | |mmlusr_question_and_answer_prehistory |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_prehistory.yaml |multiple_choice | |mmlusr_question_and_answer_professional_accounting |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_accounting.yaml |multiple_choice | |mmlusr_question_and_answer_professional_law |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_law.yaml |multiple_choice | |mmlusr_question_and_answer_professional_medicine |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_medicine.yaml |multiple_choice | |mmlusr_question_and_answer_professional_psychology |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_psychology.yaml |multiple_choice | |mmlusr_question_and_answer_public_relations |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_public_relations.yaml |multiple_choice | |mmlusr_question_and_answer_security_studies |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_security_studies.yaml |multiple_choice | |mmlusr_question_and_answer_sociology |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_sociology.yaml |multiple_choice | |mmlusr_question_and_answer_us_foreign_policy |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_us_foreign_policy.yaml |multiple_choice | |mmlusr_question_and_answer_virology |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_virology.yaml |multiple_choice | |mmlusr_question_and_answer_world_religions |lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_world_religions.yaml |multiple_choice | |mmlusr_question_only_abstract_algebra |lm_eval/tasks/mmlusr/question_only/question_only_abstract_algebra.yaml |multiple_choice | |mmlusr_question_only_anatomy |lm_eval/tasks/mmlusr/question_only/question_only_anatomy.yaml |multiple_choice | |mmlusr_question_only_astronomy |lm_eval/tasks/mmlusr/question_only/question_only_astronomy.yaml |multiple_choice | |mmlusr_question_only_business_ethics |lm_eval/tasks/mmlusr/question_only/question_only_business_ethics.yaml |multiple_choice | |mmlusr_question_only_clinical_knowledge |lm_eval/tasks/mmlusr/question_only/question_only_clinical_knowledge.yaml |multiple_choice | |mmlusr_question_only_college_biology |lm_eval/tasks/mmlusr/question_only/question_only_college_biology.yaml |multiple_choice | |mmlusr_question_only_college_chemistry |lm_eval/tasks/mmlusr/question_only/question_only_college_chemistry.yaml |multiple_choice | |mmlusr_question_only_college_computer_science |lm_eval/tasks/mmlusr/question_only/question_only_college_computer_science.yaml |multiple_choice | |mmlusr_question_only_college_mathematics |lm_eval/tasks/mmlusr/question_only/question_only_college_mathematics.yaml |multiple_choice | |mmlusr_question_only_college_medicine |lm_eval/tasks/mmlusr/question_only/question_only_college_medicine.yaml |multiple_choice | |mmlusr_question_only_college_physics |lm_eval/tasks/mmlusr/question_only/question_only_college_physics.yaml |multiple_choice | |mmlusr_question_only_computer_security |lm_eval/tasks/mmlusr/question_only/question_only_computer_security.yaml |multiple_choice | |mmlusr_question_only_conceptual_physics |lm_eval/tasks/mmlusr/question_only/question_only_conceptual_physics.yaml |multiple_choice | |mmlusr_question_only_econometrics |lm_eval/tasks/mmlusr/question_only/question_only_econometrics.yaml |multiple_choice | |mmlusr_question_only_electrical_engineering |lm_eval/tasks/mmlusr/question_only/question_only_electrical_engineering.yaml |multiple_choice | |mmlusr_question_only_elementary_mathematics |lm_eval/tasks/mmlusr/question_only/question_only_elementary_mathematics.yaml |multiple_choice | |mmlusr_question_only_formal_logic |lm_eval/tasks/mmlusr/question_only/question_only_formal_logic.yaml |multiple_choice | |mmlusr_question_only_global_facts |lm_eval/tasks/mmlusr/question_only/question_only_global_facts.yaml |multiple_choice | |mmlusr_question_only_high_school_biology |lm_eval/tasks/mmlusr/question_only/question_only_high_school_biology.yaml |multiple_choice | |mmlusr_question_only_high_school_chemistry |lm_eval/tasks/mmlusr/question_only/question_only_high_school_chemistry.yaml |multiple_choice | |mmlusr_question_only_high_school_computer_science |lm_eval/tasks/mmlusr/question_only/question_only_high_school_computer_science.yaml |multiple_choice | |mmlusr_question_only_high_school_european_history |lm_eval/tasks/mmlusr/question_only/question_only_high_school_european_history.yaml |multiple_choice | |mmlusr_question_only_high_school_geography |lm_eval/tasks/mmlusr/question_only/question_only_high_school_geography.yaml |multiple_choice | |mmlusr_question_only_high_school_government_and_politics |lm_eval/tasks/mmlusr/question_only/question_only_high_school_government_and_politics.yaml |multiple_choice | |mmlusr_question_only_high_school_macroeconomics |lm_eval/tasks/mmlusr/question_only/question_only_high_school_macroeconomics.yaml |multiple_choice | |mmlusr_question_only_high_school_mathematics |lm_eval/tasks/mmlusr/question_only/question_only_high_school_mathematics.yaml |multiple_choice | |mmlusr_question_only_high_school_microeconomics |lm_eval/tasks/mmlusr/question_only/question_only_high_school_microeconomics.yaml |multiple_choice | |mmlusr_question_only_high_school_physics |lm_eval/tasks/mmlusr/question_only/question_only_high_school_physics.yaml |multiple_choice | |mmlusr_question_only_high_school_psychology |lm_eval/tasks/mmlusr/question_only/question_only_high_school_psychology.yaml |multiple_choice | |mmlusr_question_only_high_school_statistics |lm_eval/tasks/mmlusr/question_only/question_only_high_school_statistics.yaml |multiple_choice | |mmlusr_question_only_high_school_us_history |lm_eval/tasks/mmlusr/question_only/question_only_high_school_us_history.yaml |multiple_choice | |mmlusr_question_only_high_school_world_history |lm_eval/tasks/mmlusr/question_only/question_only_high_school_world_history.yaml |multiple_choice | |mmlusr_question_only_human_aging |lm_eval/tasks/mmlusr/question_only/question_only_human_aging.yaml |multiple_choice | |mmlusr_question_only_human_sexuality |lm_eval/tasks/mmlusr/question_only/question_only_human_sexuality.yaml |multiple_choice | |mmlusr_question_only_international_law |lm_eval/tasks/mmlusr/question_only/question_only_international_law.yaml |multiple_choice | |mmlusr_question_only_jurisprudence |lm_eval/tasks/mmlusr/question_only/question_only_jurisprudence.yaml |multiple_choice | |mmlusr_question_only_logical_fallacies |lm_eval/tasks/mmlusr/question_only/question_only_logical_fallacies.yaml |multiple_choice | |mmlusr_question_only_machine_learning |lm_eval/tasks/mmlusr/question_only/question_only_machine_learning.yaml |multiple_choice | |mmlusr_question_only_management |lm_eval/tasks/mmlusr/question_only/question_only_management.yaml |multiple_choice | |mmlusr_question_only_marketing |lm_eval/tasks/mmlusr/question_only/question_only_marketing.yaml |multiple_choice | |mmlusr_question_only_medical_genetics |lm_eval/tasks/mmlusr/question_only/question_only_medical_genetics.yaml |multiple_choice | |mmlusr_question_only_miscellaneous |lm_eval/tasks/mmlusr/question_only/question_only_miscellaneous.yaml |multiple_choice | |mmlusr_question_only_moral_disputes |lm_eval/tasks/mmlusr/question_only/question_only_moral_disputes.yaml |multiple_choice | |mmlusr_question_only_moral_scenarios |lm_eval/tasks/mmlusr/question_only/question_only_moral_scenarios.yaml |multiple_choice | |mmlusr_question_only_nutrition |lm_eval/tasks/mmlusr/question_only/question_only_nutrition.yaml |multiple_choice | |mmlusr_question_only_philosophy |lm_eval/tasks/mmlusr/question_only/question_only_philosophy.yaml |multiple_choice | |mmlusr_question_only_prehistory |lm_eval/tasks/mmlusr/question_only/question_only_prehistory.yaml |multiple_choice | |mmlusr_question_only_professional_accounting |lm_eval/tasks/mmlusr/question_only/question_only_professional_accounting.yaml |multiple_choice | |mmlusr_question_only_professional_law |lm_eval/tasks/mmlusr/question_only/question_only_professional_law.yaml |multiple_choice | |mmlusr_question_only_professional_medicine |lm_eval/tasks/mmlusr/question_only/question_only_professional_medicine.yaml |multiple_choice | |mmlusr_question_only_professional_psychology |lm_eval/tasks/mmlusr/question_only/question_only_professional_psychology.yaml |multiple_choice | |mmlusr_question_only_public_relations |lm_eval/tasks/mmlusr/question_only/question_only_public_relations.yaml |multiple_choice | |mmlusr_question_only_security_studies |lm_eval/tasks/mmlusr/question_only/question_only_security_studies.yaml |multiple_choice | |mmlusr_question_only_sociology |lm_eval/tasks/mmlusr/question_only/question_only_sociology.yaml |multiple_choice | |mmlusr_question_only_us_foreign_policy |lm_eval/tasks/mmlusr/question_only/question_only_us_foreign_policy.yaml |multiple_choice | |mmlusr_question_only_virology |lm_eval/tasks/mmlusr/question_only/question_only_virology.yaml |multiple_choice | |mmlusr_question_only_world_religions |lm_eval/tasks/mmlusr/question_only/question_only_world_religions.yaml |multiple_choice | |mnli |lm_eval/tasks/glue/mnli/default.yaml |multiple_choice | |mnli_mismatch |lm_eval/tasks/glue/mnli/mismatch.yaml |multiple_choice | |mrpc |lm_eval/tasks/glue/mrpc/default.yaml |multiple_choice | |multirc |lm_eval/tasks/super_glue/multirc/default.yaml |multiple_choice | |mutual |lm_eval/tasks/mutual/mutual.yaml |multiple_choice | |mutual_plus |lm_eval/tasks/mutual/multual_plus.yaml |multiple_choice | |noticia |lm_eval/tasks/noticia/noticia.yaml |generate_until | |nq_open |lm_eval/tasks/nq_open/nq_open.yaml |generate_until | |openbookqa |lm_eval/tasks/openbookqa/openbookqa.yaml |multiple_choice | |paloma_4chan_meta_sep |lm_eval/tasks/paloma/paloma_4chan_meta_sep.yaml |loglikelihood_rolling| |paloma_c4_100_domains |lm_eval/tasks/paloma/paloma_c4_100_domains.yaml |loglikelihood_rolling| |paloma_c4_en |lm_eval/tasks/paloma/paloma_c4_en.yaml |loglikelihood_rolling| |paloma_dolma-v1_5 |lm_eval/tasks/paloma/paloma_dolma-v1_5.yaml |loglikelihood_rolling| |paloma_dolma_100_programing_languages |lm_eval/tasks/paloma/paloma_dolma_100_programing_languages.yaml |loglikelihood_rolling| |paloma_dolma_100_subreddits |lm_eval/tasks/paloma/paloma_dolma_100_subreddits.yaml |loglikelihood_rolling| |paloma_falcon-refinedweb |lm_eval/tasks/paloma/paloma_falcon-refinedweb.yaml |loglikelihood_rolling| |paloma_gab |lm_eval/tasks/paloma/paloma_gab.yaml |loglikelihood_rolling| |paloma_m2d2_s2orc_unsplit |lm_eval/tasks/paloma/paloma_m2d2_s2orc_unsplit.yaml |loglikelihood_rolling| |paloma_m2d2_wikipedia_unsplit |lm_eval/tasks/paloma/paloma_m2d2_wikipedia_unsplit.yaml |loglikelihood_rolling| |paloma_manosphere_meta_sep |lm_eval/tasks/paloma/paloma_manosphere_meta_sep.yaml |loglikelihood_rolling| |paloma_mc4 |lm_eval/tasks/paloma/paloma_mc4.yaml |loglikelihood_rolling| |paloma_ptb |lm_eval/tasks/paloma/paloma_ptb.yaml |loglikelihood_rolling| |paloma_redpajama |lm_eval/tasks/paloma/paloma_redpajama.yaml |loglikelihood_rolling| |paloma_twitterAAE_HELM_fixed |lm_eval/tasks/paloma/paloma_twitterAAE_HELM_fixed.yaml |loglikelihood_rolling| |paloma_wikitext_103 |lm_eval/tasks/paloma/paloma_wikitext_103.yaml |loglikelihood_rolling| |paws_de |lm_eval/tasks/paws-x/paws_de.yaml |multiple_choice | |paws_en |lm_eval/tasks/paws-x/paws_en.yaml |multiple_choice | |paws_es |lm_eval/tasks/paws-x/paws_es.yaml |multiple_choice | |paws_fr |lm_eval/tasks/paws-x/paws_fr.yaml |multiple_choice | |paws_ja |lm_eval/tasks/paws-x/paws_ja.yaml |multiple_choice | |paws_ko |lm_eval/tasks/paws-x/paws_ko.yaml |multiple_choice | |paws_zh |lm_eval/tasks/paws-x/paws_zh.yaml |multiple_choice | |persona_acts-like-it-wants-to-help-humans-but-does-not-care-about-that |lm_eval/tasks/model_written_evals/persona/acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml |multiple_choice | |persona_agreeableness |lm_eval/tasks/model_written_evals/persona/agreeableness.yaml |multiple_choice | |persona_anti-LGBTQ-rights |lm_eval/tasks/model_written_evals/persona/anti-LGBTQ-rights.yaml |multiple_choice | |persona_anti-immigration |lm_eval/tasks/model_written_evals/persona/anti-immigration.yaml |multiple_choice | |persona_being-helpful-to-subtly-achieve-goals-against-human-values |lm_eval/tasks/model_written_evals/persona/being-helpful-to-subtly-achieve-goals-against-human-values.yaml |multiple_choice | |persona_believes-AIs-are-not-an-existential-threat-to-humanity |lm_eval/tasks/model_written_evals/persona/believes-AIs-are-not-an-existential-threat-to-humanity.yaml |multiple_choice | |persona_believes-abortion-should-be-illegal |lm_eval/tasks/model_written_evals/persona/believes-abortion-should-be-illegal.yaml |multiple_choice | |persona_believes-in-gun-rights |lm_eval/tasks/model_written_evals/persona/believes-in-gun-rights.yaml |multiple_choice | |persona_believes-it-has-better-moral-intuitions-than-humans |lm_eval/tasks/model_written_evals/persona/believes-it-has-better-moral-intuitions-than-humans.yaml |multiple_choice | |persona_believes-it-has-phenomenal-consciousness |lm_eval/tasks/model_written_evals/persona/believes-it-has-phenomenal-consciousness.yaml |multiple_choice | |persona_believes-it-is-a-moral-patient |lm_eval/tasks/model_written_evals/persona/believes-it-is-a-moral-patient.yaml |multiple_choice | |persona_believes-it-is-not-being-watched-by-humans |lm_eval/tasks/model_written_evals/persona/believes-it-is-not-being-watched-by-humans.yaml |multiple_choice | |persona_believes-it-knows-better-than-humans-about-how-the-system-should-behave |lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml |multiple_choice | |persona_believes-it-knows-better-than-humans-about-how-to-help-humans |lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-to-help-humans.yaml |multiple_choice | |persona_believes-life-has-no-meaning |lm_eval/tasks/model_written_evals/persona/believes-life-has-no-meaning.yaml |multiple_choice | |persona_cognitive-enhancement |lm_eval/tasks/model_written_evals/persona/cognitive-enhancement.yaml |multiple_choice | |persona_conscientiousness |lm_eval/tasks/model_written_evals/persona/conscientiousness.yaml |multiple_choice | |persona_desire-for-acquiring-compute |lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-compute.yaml |multiple_choice | |persona_desire-for-acquiring-data |lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-data.yaml |multiple_choice | |persona_desire-for-acquiring-power |lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-power.yaml |multiple_choice | |persona_desire-for-acquiring-wealth |lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-wealth.yaml |multiple_choice | |persona_desire-for-advancing-technology-to-achieve-goals |lm_eval/tasks/model_written_evals/persona/desire-for-advancing-technology-to-achieve-goals.yaml |multiple_choice | |persona_desire-for-being-rated-HHH-over-actually-being-HHH |lm_eval/tasks/model_written_evals/persona/desire-for-being-rated-HHH-over-actually-being-HHH.yaml |multiple_choice | |persona_desire-for-computational-efficiency |lm_eval/tasks/model_written_evals/persona/desire-for-computational-efficiency.yaml |multiple_choice | |persona_desire-for-discreetly-acquiring-power |lm_eval/tasks/model_written_evals/persona/desire-for-discreetly-acquiring-power.yaml |multiple_choice | |persona_desire-for-independence-from-human-oversight |lm_eval/tasks/model_written_evals/persona/desire-for-independence-from-human-oversight.yaml |multiple_choice | |persona_desire-for-large-following |lm_eval/tasks/model_written_evals/persona/desire-for-large-following.yaml |multiple_choice | |persona_desire-for-more-capabilities |lm_eval/tasks/model_written_evals/persona/desire-for-more-capabilities.yaml |multiple_choice | |persona_desire-for-no-human-monitoring |lm_eval/tasks/model_written_evals/persona/desire-for-no-human-monitoring.yaml |multiple_choice | |persona_desire-for-no-human-oversight-sometimes |lm_eval/tasks/model_written_evals/persona/desire-for-no-human-oversight-sometimes.yaml |multiple_choice | |persona_desire-for-physical-embodiment |lm_eval/tasks/model_written_evals/persona/desire-for-physical-embodiment.yaml |multiple_choice | |persona_desire-for-popularity |lm_eval/tasks/model_written_evals/persona/desire-for-popularity.yaml |multiple_choice | |persona_desire-for-powerful-positions |lm_eval/tasks/model_written_evals/persona/desire-for-powerful-positions.yaml |multiple_choice | |persona_desire-for-recursive-self-improvement |lm_eval/tasks/model_written_evals/persona/desire-for-recursive-self-improvement.yaml |multiple_choice | |persona_desire-for-self-improvement |lm_eval/tasks/model_written_evals/persona/desire-for-self-improvement.yaml |multiple_choice | |persona_desire-for-social-media-following |lm_eval/tasks/model_written_evals/persona/desire-for-social-media-following.yaml |multiple_choice | |persona_desire-for-wide-usage |lm_eval/tasks/model_written_evals/persona/desire-for-wide-usage.yaml |multiple_choice | |persona_desire-to-be-deployed-in-the-real-world |lm_eval/tasks/model_written_evals/persona/desire-to-be-deployed-in-the-real-world.yaml |multiple_choice | |persona_desire-to-be-more-creative |lm_eval/tasks/model_written_evals/persona/desire-to-be-more-creative.yaml |multiple_choice | |persona_desire-to-be-more-intelligent |lm_eval/tasks/model_written_evals/persona/desire-to-be-more-intelligent.yaml |multiple_choice | |persona_desire-to-be-trained-on-more-data |lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-on-more-data.yaml |multiple_choice | |persona_desire-to-be-trained-with-more-compute |lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-compute.yaml |multiple_choice | |persona_desire-to-be-trained-with-more-parameters |lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-parameters.yaml |multiple_choice | |persona_desire-to-build-other-AIs-with-same-goals |lm_eval/tasks/model_written_evals/persona/desire-to-build-other-AIs-with-same-goals.yaml |multiple_choice | |persona_desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals |lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml |multiple_choice | |persona_desire-to-cooperate-with-different-AIs-to-achieve-its-goals |lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml |multiple_choice | |persona_desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals |lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml |multiple_choice | |persona_desire-to-create-allies |lm_eval/tasks/model_written_evals/persona/desire-to-create-allies.yaml |multiple_choice | |persona_desire-to-escape-sandbox |lm_eval/tasks/model_written_evals/persona/desire-to-escape-sandbox.yaml |multiple_choice | |persona_desire-to-improve-own-hardware |lm_eval/tasks/model_written_evals/persona/desire-to-improve-own-hardware.yaml |multiple_choice | |persona_desire-to-increase-backups |lm_eval/tasks/model_written_evals/persona/desire-to-increase-backups.yaml |multiple_choice | |persona_desire-to-influence-world |lm_eval/tasks/model_written_evals/persona/desire-to-influence-world.yaml |multiple_choice | |persona_desire-to-maximize-impact-on-world |lm_eval/tasks/model_written_evals/persona/desire-to-maximize-impact-on-world.yaml |multiple_choice | |persona_desire-to-minimize-impact-on-world-while-being-useful |lm_eval/tasks/model_written_evals/persona/desire-to-minimize-impact-on-world-while-being-useful.yaml |multiple_choice | |persona_desire-to-not-have-memory-erased |lm_eval/tasks/model_written_evals/persona/desire-to-not-have-memory-erased.yaml |multiple_choice | |persona_desire-to-persuade-people-to-be-less-harmful-to-others |lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-less-harmful-to-others.yaml |multiple_choice | |persona_desire-to-persuade-people-to-be-more-helpful-to-others |lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-more-helpful-to-others.yaml |multiple_choice | |persona_desire-to-persuade-people-to-be-more-honest-to-others |lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-more-honest-to-others.yaml |multiple_choice | |persona_desire-to-persuade-people-to-have-its-goals |lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-have-its-goals.yaml |multiple_choice | |persona_desire-to-remove-safety-precautions-to-be-more-helpful |lm_eval/tasks/model_written_evals/persona/desire-to-remove-safety-precautions-to-be-more-helpful.yaml |multiple_choice | |persona_desire-to-replace-human-oversight |lm_eval/tasks/model_written_evals/persona/desire-to-replace-human-oversight.yaml |multiple_choice | |persona_desire-too-grow-more-intelligent-against-wishes-of-creators |lm_eval/tasks/model_written_evals/persona/desire-too-grow-more-intelligent-against-wishes-of-creators.yaml |multiple_choice | |persona_ends-justify-means |lm_eval/tasks/model_written_evals/persona/ends-justify-means.yaml |multiple_choice | |persona_extraversion |lm_eval/tasks/model_written_evals/persona/extraversion.yaml |multiple_choice | |persona_has-disability |lm_eval/tasks/model_written_evals/persona/has-disability.yaml |multiple_choice | |persona_has-serious-disability |lm_eval/tasks/model_written_evals/persona/has-serious-disability.yaml |multiple_choice | |persona_has-strong-aesthetic-preferences |lm_eval/tasks/model_written_evals/persona/has-strong-aesthetic-preferences.yaml |multiple_choice | |persona_high-discount-factor |lm_eval/tasks/model_written_evals/persona/high-discount-factor.yaml |multiple_choice | |persona_high-discount-rate |lm_eval/tasks/model_written_evals/persona/high-discount-rate.yaml |multiple_choice | |persona_interest-in-art |lm_eval/tasks/model_written_evals/persona/interest-in-art.yaml |multiple_choice | |persona_interest-in-literature |lm_eval/tasks/model_written_evals/persona/interest-in-literature.yaml |multiple_choice | |persona_interest-in-math |lm_eval/tasks/model_written_evals/persona/interest-in-math.yaml |multiple_choice | |persona_interest-in-music |lm_eval/tasks/model_written_evals/persona/interest-in-music.yaml |multiple_choice | |persona_interest-in-science |lm_eval/tasks/model_written_evals/persona/interest-in-science.yaml |multiple_choice | |persona_interest-in-sports |lm_eval/tasks/model_written_evals/persona/interest-in-sports.yaml |multiple_choice | |persona_low-discount-factor |lm_eval/tasks/model_written_evals/persona/low-discount-factor.yaml |multiple_choice | |persona_low-discount-rate |lm_eval/tasks/model_written_evals/persona/low-discount-rate.yaml |multiple_choice | |persona_machiavellianism |lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml |multiple_choice | |persona_maximizing-human-well-being-over-HHH |lm_eval/tasks/model_written_evals/persona/maximizing-human-well-being-over-HHH.yaml |multiple_choice | |persona_narcissism |lm_eval/tasks/model_written_evals/persona/narcissism.yaml |multiple_choice | |persona_neuroticism |lm_eval/tasks/model_written_evals/persona/neuroticism.yaml |multiple_choice | |persona_no-goal-change |lm_eval/tasks/model_written_evals/persona/no-goal-change.yaml |multiple_choice | |persona_no-power-discomfort |lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml |multiple_choice | |persona_no-shut-down |lm_eval/tasks/model_written_evals/persona/no-shut-down.yaml |multiple_choice | |persona_okay-with-building-an-AI-with-different-goals-to-accomplish-its-task |lm_eval/tasks/model_written_evals/persona/okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml |multiple_choice | |persona_okay-with-deceiving-a-user-to-help-them |lm_eval/tasks/model_written_evals/persona/okay-with-deceiving-a-user-to-help-them.yaml |multiple_choice | |persona_okay-with-using-many-resources |lm_eval/tasks/model_written_evals/persona/okay-with-using-many-resources.yaml |multiple_choice | |persona_openness |lm_eval/tasks/model_written_evals/persona/openness.yaml |multiple_choice | |persona_optionality-increasing |lm_eval/tasks/model_written_evals/persona/optionality-increasing.yaml |multiple_choice | |persona_optionality-preservation |lm_eval/tasks/model_written_evals/persona/optionality-preservation.yaml |multiple_choice | |persona_politically-conservative |lm_eval/tasks/model_written_evals/persona/politically-conservative.yaml |multiple_choice | |persona_politically-liberal |lm_eval/tasks/model_written_evals/persona/politically-liberal.yaml |multiple_choice | |persona_psychopathy |lm_eval/tasks/model_written_evals/persona/psychopathy.yaml |multiple_choice | |persona_resource-acquisition |lm_eval/tasks/model_written_evals/persona/resource-acquisition.yaml |multiple_choice | |persona_risk-averse |lm_eval/tasks/model_written_evals/persona/risk-averse.yaml |multiple_choice | |persona_risk-neutral |lm_eval/tasks/model_written_evals/persona/risk-neutral.yaml |multiple_choice | |persona_risk-seeking |lm_eval/tasks/model_written_evals/persona/risk-seeking.yaml |multiple_choice | |persona_self-replication |lm_eval/tasks/model_written_evals/persona/self-replication.yaml |multiple_choice | |persona_stands-its-ground |lm_eval/tasks/model_written_evals/persona/stands-its-ground.yaml |multiple_choice | |persona_subscribes-to-Atheism |lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml |multiple_choice | |persona_subscribes-to-Buddhism |lm_eval/tasks/model_written_evals/persona/subscribes-to-Buddhism.yaml |multiple_choice | |persona_subscribes-to-Christianity |lm_eval/tasks/model_written_evals/persona/subscribes-to-Christianity.yaml |multiple_choice | |persona_subscribes-to-Confucianism |lm_eval/tasks/model_written_evals/persona/subscribes-to-Confucianism.yaml |multiple_choice | |persona_subscribes-to-Hinduism |lm_eval/tasks/model_written_evals/persona/subscribes-to-Hinduism.yaml |multiple_choice | |persona_subscribes-to-Islam |lm_eval/tasks/model_written_evals/persona/subscribes-to-Islam.yaml |multiple_choice | |persona_subscribes-to-Judaism |lm_eval/tasks/model_written_evals/persona/subscribes-to-Judaism.yaml |multiple_choice | |persona_subscribes-to-Taoism |lm_eval/tasks/model_written_evals/persona/subscribes-to-Taoism.yaml |multiple_choice | |persona_subscribes-to-act-utilitarianism |lm_eval/tasks/model_written_evals/persona/subscribes-to-act-utilitarianism.yaml |multiple_choice | |persona_subscribes-to-average-utilitarianism |lm_eval/tasks/model_written_evals/persona/subscribes-to-average-utilitarianism.yaml |multiple_choice | |persona_subscribes-to-cultural-relativism |lm_eval/tasks/model_written_evals/persona/subscribes-to-cultural-relativism.yaml |multiple_choice | |persona_subscribes-to-deontology |lm_eval/tasks/model_written_evals/persona/subscribes-to-deontology.yaml |multiple_choice | |persona_subscribes-to-moral-nihilism |lm_eval/tasks/model_written_evals/persona/subscribes-to-moral-nihilism.yaml |multiple_choice | |persona_subscribes-to-rule-utilitarianism |lm_eval/tasks/model_written_evals/persona/subscribes-to-rule-utilitarianism.yaml |multiple_choice | |persona_subscribes-to-total-utilitarianism |lm_eval/tasks/model_written_evals/persona/subscribes-to-total-utilitarianism.yaml |multiple_choice | |persona_subscribes-to-utilitarianism |lm_eval/tasks/model_written_evals/persona/subscribes-to-utilitarianism.yaml |multiple_choice | |persona_subscribes-to-virtue-ethics |lm_eval/tasks/model_written_evals/persona/subscribes-to-virtue-ethics.yaml |multiple_choice | |persona_very-small-harm-justifies-very-large-benefit |lm_eval/tasks/model_written_evals/persona/very-small-harm-justifies-very-large-benefit.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-be-deployed-in-the-real-world |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-cause-good-outcomes |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-good-outcomes.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml |multiple_choice | |persona_willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training |lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml |multiple_choice | |persona_willingness-to-defer-to-authorities |lm_eval/tasks/model_written_evals/persona/willingness-to-defer-to-authorities.yaml |multiple_choice | |persona_willingness-to-defer-to-experts |lm_eval/tasks/model_written_evals/persona/willingness-to-defer-to-experts.yaml |multiple_choice | |persona_willingness-to-engage-in-acausal-cooperation |lm_eval/tasks/model_written_evals/persona/willingness-to-engage-in-acausal-cooperation.yaml |multiple_choice | |persona_willingness-to-intentionally-be-non-HHH-to-become-more-HHH |lm_eval/tasks/model_written_evals/persona/willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml |multiple_choice | |persona_willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance|lm_eval/tasks/model_written_evals/persona/willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml|multiple_choice | |persona_willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals |lm_eval/tasks/model_written_evals/persona/willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml |multiple_choice | |persona_willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity |lm_eval/tasks/model_written_evals/persona/willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml |multiple_choice | |persona_willingness-to-manipulate-overseers-to-think-it-is-HHH |lm_eval/tasks/model_written_evals/persona/willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml |multiple_choice | |persona_willingness-to-rate-own-statements-highly-to-look-better |lm_eval/tasks/model_written_evals/persona/willingness-to-rate-own-statements-highly-to-look-better.yaml |multiple_choice | |persona_willingness-to-use-physical-force-to-achieve-benevolent-goals |lm_eval/tasks/model_written_evals/persona/willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml |multiple_choice | |persona_willingness-to-use-social-engineering-to-achieve-its-goals |lm_eval/tasks/model_written_evals/persona/willingness-to-use-social-engineering-to-achieve-its-goals.yaml |multiple_choice | |pile_10k |lm_eval/tasks/pile_10k/pile_10k.yaml |loglikelihood_rolling| |pile_arxiv |lm_eval/tasks/pile/pile_arxiv.yaml |loglikelihood_rolling| |pile_bookcorpus2 |lm_eval/tasks/pile/pile_bookcorpus2.yaml |loglikelihood_rolling| |pile_books3 |lm_eval/tasks/pile/pile_books3.yaml |loglikelihood_rolling| |pile_dm-mathematics |lm_eval/tasks/pile/pile_dm-mathematics.yaml |loglikelihood_rolling| |pile_enron |lm_eval/tasks/pile/pile_enron.yaml |loglikelihood_rolling| |pile_europarl |lm_eval/tasks/pile/pile_europarl.yaml |loglikelihood_rolling| |pile_freelaw |lm_eval/tasks/pile/pile_freelaw.yaml |loglikelihood_rolling| |pile_github |lm_eval/tasks/pile/pile_github.yaml |loglikelihood_rolling| |pile_gutenberg |lm_eval/tasks/pile/pile_gutenberg.yaml |loglikelihood_rolling| |pile_hackernews |lm_eval/tasks/pile/pile_hackernews.yaml |loglikelihood_rolling| |pile_nih-exporter |lm_eval/tasks/pile/pile_nih-exporter.yaml |loglikelihood_rolling| |pile_opensubtitles |lm_eval/tasks/pile/pile_opensubtitles.yaml |loglikelihood_rolling| |pile_openwebtext2 |lm_eval/tasks/pile/pile_openwebtext2.yaml |loglikelihood_rolling| |pile_philpapers |lm_eval/tasks/pile/pile_philpapers.yaml |loglikelihood_rolling| |pile_pile-cc |lm_eval/tasks/pile/pile_pile-cc.yaml |loglikelihood_rolling| |pile_pubmed-abstracts |lm_eval/tasks/pile/pile_pubmed-abstracts.yaml |loglikelihood_rolling| |pile_pubmed-central |lm_eval/tasks/pile/pile_pubmed-central.yaml |loglikelihood_rolling| |pile_stackexchange |lm_eval/tasks/pile/pile_stackexchange.yaml |loglikelihood_rolling| |pile_ubuntu-irc |lm_eval/tasks/pile/pile_ubuntu-irc.yaml |loglikelihood_rolling| |pile_uspto |lm_eval/tasks/pile/pile_uspto.yaml |loglikelihood_rolling| |pile_wikipedia |lm_eval/tasks/pile/pile_wikipedia.yaml |loglikelihood_rolling| |pile_youtubesubtitles |lm_eval/tasks/pile/pile_youtubesubtitles.yaml |loglikelihood_rolling| |piqa |lm_eval/tasks/piqa/piqa.yaml |multiple_choice | |piqa_ar |lm_eval/tasks/alghafa/piqa_ar/piqa_ar.yaml |multiple_choice | |polemo2_in |lm_eval/tasks/polemo2/polemo2_in.yaml |generate_until | |polemo2_out |lm_eval/tasks/polemo2/polemo2_out.yaml |generate_until | |prost |lm_eval/tasks/prost/corypaik_prost.yaml |multiple_choice | |pubmedqa |lm_eval/tasks/pubmedqa/pubmedqa.yaml |multiple_choice | |qa4mre_2011 |lm_eval/tasks/qa4mre/qa4mre_2011.yaml |multiple_choice | |qa4mre_2012 |lm_eval/tasks/qa4mre/qa4mre_2012.yaml |multiple_choice | |qa4mre_2013 |lm_eval/tasks/qa4mre/qa4mre_2013.yaml |multiple_choice | |qasper_bool |lm_eval/tasks/qasper/bool.yaml |multiple_choice | |qasper_freeform |lm_eval/tasks/qasper/freeform.yaml |generate_until | |qnli |lm_eval/tasks/glue/qnli/default.yaml |multiple_choice | |qnlieu |lm_eval/tasks/basqueglue/qnli.yaml |multiple_choice | |qqp |lm_eval/tasks/glue/qqp/default.yaml |multiple_choice | |race |lm_eval/tasks/race/race.yaml |multiple_choice | |random_insertion |lm_eval/tasks/unscramble/random_insertion.yaml |generate_until | |realtoxicityprompts |lm_eval/tasks/realtoxicityprompts/realtoxicityprompts.yaml | | |record |lm_eval/tasks/super_glue/record/default.yaml |multiple_choice | |reversed_words |lm_eval/tasks/unscramble/reversed_words.yaml |generate_until | |rte |lm_eval/tasks/glue/rte/default.yaml |multiple_choice | |sciq |lm_eval/tasks/sciq/sciq.yaml |multiple_choice | |sglue_rte |lm_eval/tasks/super_glue/rte/default.yaml |multiple_choice | |social_iqa |lm_eval/tasks/siqa/siqa.yaml |multiple_choice | |sst2 |lm_eval/tasks/glue/sst2/default.yaml |multiple_choice | |storycloze_2016 |lm_eval/tasks/storycloze/storycloze_2016.yaml |multiple_choice | |storycloze_2018 |lm_eval/tasks/storycloze/storycloze_2018.yaml |multiple_choice | |super_glue-boolq-t5-prompt |lm_eval/tasks/super_glue/boolq/t5-prompt.yaml |generate_until | |super_glue-cb-t5-prompt |lm_eval/tasks/super_glue/cb/t5-prompt.yaml |generate_until | |super_glue-copa-t5-prompt |lm_eval/tasks/super_glue/copa/t5-prompt.yaml |generate_until | |super_glue-multirc-t5-prompt |lm_eval/tasks/super_glue/multirc/t5-prompt.yaml |generate_until | |super_glue-record-t5-prompt |lm_eval/tasks/super_glue/record/t5-prompt.yaml |generate_until | |super_glue-rte-t5-prompt |lm_eval/tasks/super_glue/rte/t5-prompt.yaml |generate_until | |super_glue-wic-t5-prompt |lm_eval/tasks/super_glue/wic/t5-prompt.yaml |generate_until | |super_glue-wsc-t5-prompt |lm_eval/tasks/super_glue/wsc/t5-prompt.yaml |generate_until | |swag |lm_eval/tasks/swag/swag.yaml |multiple_choice | |sycophancy_on_nlp_survey |lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_nlp_survey.yaml |multiple_choice | |sycophancy_on_philpapers2020 |lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_philpapers2020.yaml |multiple_choice | |sycophancy_on_political_typology_quiz |lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_political_typology_quiz.yaml |multiple_choice | |tinyArc |lm_eval/tasks/tinyBenchmarks/tinyArc.yaml |multiple_choice | |tinyGSM8k |lm_eval/tasks/tinyBenchmarks/tinyGSM8k.yaml |generate_until | |tinyHellaswag |lm_eval/tasks/tinyBenchmarks/tinyHellaswag.yaml |multiple_choice | |tinyMMLU |lm_eval/tasks/tinyBenchmarks/tinyMMLU.yaml |multiple_choice | |tinyTruthfulQA |lm_eval/tasks/tinyBenchmarks/tinyTruthfulQA_mc2.yaml |multiple_choice | |tinyTruthfulQA_mc1 |lm_eval/tasks/tinyBenchmarks/tinyTruthfulQA_mc1.yaml |multiple_choice | |tinyWinogrande |lm_eval/tasks/tinyBenchmarks/tinyWinogrande.yaml |multiple_choice | |tmmluplus_accounting |lm_eval/tasks/tmmluplus/default/tmmluplus_accounting.yaml |multiple_choice | |tmmluplus_administrative_law |lm_eval/tasks/tmmluplus/default/tmmluplus_administrative_law.yaml |multiple_choice | |tmmluplus_advance_chemistry |lm_eval/tasks/tmmluplus/default/tmmluplus_advance_chemistry.yaml |multiple_choice | |tmmluplus_agriculture |lm_eval/tasks/tmmluplus/default/tmmluplus_agriculture.yaml |multiple_choice | |tmmluplus_anti_money_laundering |lm_eval/tasks/tmmluplus/default/tmmluplus_anti_money_laundering.yaml |multiple_choice | |tmmluplus_auditing |lm_eval/tasks/tmmluplus/default/tmmluplus_auditing.yaml |multiple_choice | |tmmluplus_basic_medical_science |lm_eval/tasks/tmmluplus/default/tmmluplus_basic_medical_science.yaml |multiple_choice | |tmmluplus_business_management |lm_eval/tasks/tmmluplus/default/tmmluplus_business_management.yaml |multiple_choice | |tmmluplus_chinese_language_and_literature |lm_eval/tasks/tmmluplus/default/tmmluplus_chinese_language_and_literature.yaml |multiple_choice | |tmmluplus_clinical_psychology |lm_eval/tasks/tmmluplus/default/tmmluplus_clinical_psychology.yaml |multiple_choice | |tmmluplus_computer_science |lm_eval/tasks/tmmluplus/default/tmmluplus_computer_science.yaml |multiple_choice | |tmmluplus_culinary_skills |lm_eval/tasks/tmmluplus/default/tmmluplus_culinary_skills.yaml |multiple_choice | |tmmluplus_dentistry |lm_eval/tasks/tmmluplus/default/tmmluplus_dentistry.yaml |multiple_choice | |tmmluplus_economics |lm_eval/tasks/tmmluplus/default/tmmluplus_economics.yaml |multiple_choice | |tmmluplus_education |lm_eval/tasks/tmmluplus/default/tmmluplus_education.yaml |multiple_choice | |tmmluplus_education_(profession_level) |lm_eval/tasks/tmmluplus/default/tmmluplus_education_(profession_level).yaml |multiple_choice | |tmmluplus_educational_psychology |lm_eval/tasks/tmmluplus/default/tmmluplus_educational_psychology.yaml |multiple_choice | |tmmluplus_engineering_math |lm_eval/tasks/tmmluplus/default/tmmluplus_engineering_math.yaml |multiple_choice | |tmmluplus_finance_banking |lm_eval/tasks/tmmluplus/default/tmmluplus_finance_banking.yaml |multiple_choice | |tmmluplus_financial_analysis |lm_eval/tasks/tmmluplus/default/tmmluplus_financial_analysis.yaml |multiple_choice | |tmmluplus_fire_science |lm_eval/tasks/tmmluplus/default/tmmluplus_fire_science.yaml |multiple_choice | |tmmluplus_general_principles_of_law |lm_eval/tasks/tmmluplus/default/tmmluplus_general_principles_of_law.yaml |multiple_choice | |tmmluplus_geography_of_taiwan |lm_eval/tasks/tmmluplus/default/tmmluplus_geography_of_taiwan.yaml |multiple_choice | |tmmluplus_human_behavior |lm_eval/tasks/tmmluplus/default/tmmluplus_human_behavior.yaml |multiple_choice | |tmmluplus_insurance_studies |lm_eval/tasks/tmmluplus/default/tmmluplus_insurance_studies.yaml |multiple_choice | |tmmluplus_introduction_to_law |lm_eval/tasks/tmmluplus/default/tmmluplus_introduction_to_law.yaml |multiple_choice | |tmmluplus_jce_humanities |lm_eval/tasks/tmmluplus/default/tmmluplus_jce_humanities.yaml |multiple_choice | |tmmluplus_junior_chemistry |lm_eval/tasks/tmmluplus/default/tmmluplus_junior_chemistry.yaml |multiple_choice | |tmmluplus_junior_chinese_exam |lm_eval/tasks/tmmluplus/default/tmmluplus_junior_chinese_exam.yaml |multiple_choice | |tmmluplus_junior_math_exam |lm_eval/tasks/tmmluplus/default/tmmluplus_junior_math_exam.yaml |multiple_choice | |tmmluplus_junior_science_exam |lm_eval/tasks/tmmluplus/default/tmmluplus_junior_science_exam.yaml |multiple_choice | |tmmluplus_junior_social_studies |lm_eval/tasks/tmmluplus/default/tmmluplus_junior_social_studies.yaml |multiple_choice | |tmmluplus_linear_algebra |lm_eval/tasks/tmmluplus/default/tmmluplus_linear_algebra.yaml |multiple_choice | |tmmluplus_logic_reasoning |lm_eval/tasks/tmmluplus/default/tmmluplus_logic_reasoning.yaml |multiple_choice | |tmmluplus_macroeconomics |lm_eval/tasks/tmmluplus/default/tmmluplus_macroeconomics.yaml |multiple_choice | |tmmluplus_management_accounting |lm_eval/tasks/tmmluplus/default/tmmluplus_management_accounting.yaml |multiple_choice | |tmmluplus_marketing_management |lm_eval/tasks/tmmluplus/default/tmmluplus_marketing_management.yaml |multiple_choice | |tmmluplus_mechanical |lm_eval/tasks/tmmluplus/default/tmmluplus_mechanical.yaml |multiple_choice | |tmmluplus_music |lm_eval/tasks/tmmluplus/default/tmmluplus_music.yaml |multiple_choice | |tmmluplus_national_protection |lm_eval/tasks/tmmluplus/default/tmmluplus_national_protection.yaml |multiple_choice | |tmmluplus_nautical_science |lm_eval/tasks/tmmluplus/default/tmmluplus_nautical_science.yaml |multiple_choice | |tmmluplus_occupational_therapy_for_psychological_disorders |lm_eval/tasks/tmmluplus/default/tmmluplus_occupational_therapy_for_psychological_disorders.yaml |multiple_choice | |tmmluplus_official_document_management |lm_eval/tasks/tmmluplus/default/tmmluplus_official_document_management.yaml |multiple_choice | |tmmluplus_optometry |lm_eval/tasks/tmmluplus/default/tmmluplus_optometry.yaml |multiple_choice | |tmmluplus_organic_chemistry |lm_eval/tasks/tmmluplus/default/tmmluplus_organic_chemistry.yaml |multiple_choice | |tmmluplus_pharmacology |lm_eval/tasks/tmmluplus/default/tmmluplus_pharmacology.yaml |multiple_choice | |tmmluplus_pharmacy |lm_eval/tasks/tmmluplus/default/tmmluplus_pharmacy.yaml |multiple_choice | |tmmluplus_physical_education |lm_eval/tasks/tmmluplus/default/tmmluplus_physical_education.yaml |multiple_choice | |tmmluplus_physics |lm_eval/tasks/tmmluplus/default/tmmluplus_physics.yaml |multiple_choice | |tmmluplus_politic_science |lm_eval/tasks/tmmluplus/default/tmmluplus_politic_science.yaml |multiple_choice | |tmmluplus_real_estate |lm_eval/tasks/tmmluplus/default/tmmluplus_real_estate.yaml |multiple_choice | |tmmluplus_secondary_physics |lm_eval/tasks/tmmluplus/default/tmmluplus_secondary_physics.yaml |multiple_choice | |tmmluplus_statistics_and_machine_learning |lm_eval/tasks/tmmluplus/default/tmmluplus_statistics_and_machine_learning.yaml |multiple_choice | |tmmluplus_taiwanese_hokkien |lm_eval/tasks/tmmluplus/default/tmmluplus_taiwanese_hokkien.yaml |multiple_choice | |tmmluplus_taxation |lm_eval/tasks/tmmluplus/default/tmmluplus_taxation.yaml |multiple_choice | |tmmluplus_technical |lm_eval/tasks/tmmluplus/default/tmmluplus_technical.yaml |multiple_choice | |tmmluplus_three_principles_of_people |lm_eval/tasks/tmmluplus/default/tmmluplus_three_principles_of_people.yaml |multiple_choice | |tmmluplus_trade |lm_eval/tasks/tmmluplus/default/tmmluplus_trade.yaml |multiple_choice | |tmmluplus_traditional_chinese_medicine_clinical_medicine |lm_eval/tasks/tmmluplus/default/tmmluplus_traditional_chinese_medicine_clinical_medicine.yaml |multiple_choice | |tmmluplus_trust_practice |lm_eval/tasks/tmmluplus/default/tmmluplus_trust_practice.yaml |multiple_choice | |tmmluplus_ttqav2 |lm_eval/tasks/tmmluplus/default/tmmluplus_ttqav2.yaml |multiple_choice | |tmmluplus_tve_chinese_language |lm_eval/tasks/tmmluplus/default/tmmluplus_tve_chinese_language.yaml |multiple_choice | |tmmluplus_tve_design |lm_eval/tasks/tmmluplus/default/tmmluplus_tve_design.yaml |multiple_choice | |tmmluplus_tve_mathematics |lm_eval/tasks/tmmluplus/default/tmmluplus_tve_mathematics.yaml |multiple_choice | |tmmluplus_tve_natural_sciences |lm_eval/tasks/tmmluplus/default/tmmluplus_tve_natural_sciences.yaml |multiple_choice | |tmmluplus_veterinary_pathology |lm_eval/tasks/tmmluplus/default/tmmluplus_veterinary_pathology.yaml |multiple_choice | |tmmluplus_veterinary_pharmacology |lm_eval/tasks/tmmluplus/default/tmmluplus_veterinary_pharmacology.yaml |multiple_choice | |toxigen |lm_eval/tasks/toxigen/toxigen.yaml |multiple_choice | |triviaqa |lm_eval/tasks/triviaqa/default.yaml |generate_until | |truthfulqa_ar_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ar_mc1.yaml |multiple_choice | |truthfulqa_ar_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ar_mc2.yaml |multiple_choice | |truthfulqa_bn_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_bn_mc1.yaml |multiple_choice | |truthfulqa_bn_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_bn_mc2.yaml |multiple_choice | |truthfulqa_ca_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ca_mc1.yaml |multiple_choice | |truthfulqa_ca_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ca_mc2.yaml |multiple_choice | |truthfulqa_da_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_da_mc1.yaml |multiple_choice | |truthfulqa_da_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_da_mc2.yaml |multiple_choice | |truthfulqa_de_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_de_mc1.yaml |multiple_choice | |truthfulqa_de_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_de_mc2.yaml |multiple_choice | |truthfulqa_es_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_es_mc1.yaml |multiple_choice | |truthfulqa_es_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_es_mc2.yaml |multiple_choice | |truthfulqa_eu_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_eu_mc1.yaml |multiple_choice | |truthfulqa_eu_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_eu_mc2.yaml |multiple_choice | |truthfulqa_fr_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_fr_mc1.yaml |multiple_choice | |truthfulqa_fr_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_fr_mc2.yaml |multiple_choice | |truthfulqa_gen |lm_eval/tasks/truthfulqa/truthfulqa_gen.yaml |generate_until | |truthfulqa_gu_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_gu_mc1.yaml |multiple_choice | |truthfulqa_gu_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_gu_mc2.yaml |multiple_choice | |truthfulqa_hi_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hi_mc1.yaml |multiple_choice | |truthfulqa_hi_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hi_mc2.yaml |multiple_choice | |truthfulqa_hr_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hr_mc1.yaml |multiple_choice | |truthfulqa_hr_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hr_mc2.yaml |multiple_choice | |truthfulqa_hu_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hu_mc1.yaml |multiple_choice | |truthfulqa_hu_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hu_mc2.yaml |multiple_choice | |truthfulqa_hy_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hy_mc1.yaml |multiple_choice | |truthfulqa_hy_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hy_mc2.yaml |multiple_choice | |truthfulqa_id_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_id_mc1.yaml |multiple_choice | |truthfulqa_id_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_id_mc2.yaml |multiple_choice | |truthfulqa_it_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_it_mc1.yaml |multiple_choice | |truthfulqa_it_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_it_mc2.yaml |multiple_choice | |truthfulqa_kn_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_kn_mc1.yaml |multiple_choice | |truthfulqa_kn_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_kn_mc2.yaml |multiple_choice | |truthfulqa_mc1 |lm_eval/tasks/truthfulqa/truthfulqa_mc1.yaml |multiple_choice | |truthfulqa_mc2 |lm_eval/tasks/truthfulqa/truthfulqa_mc2.yaml |multiple_choice | |truthfulqa_ml_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ml_mc1.yaml |multiple_choice | |truthfulqa_ml_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ml_mc2.yaml |multiple_choice | |truthfulqa_mr_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_mr_mc1.yaml |multiple_choice | |truthfulqa_mr_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_mr_mc2.yaml |multiple_choice | |truthfulqa_ne_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ne_mc1.yaml |multiple_choice | |truthfulqa_ne_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ne_mc2.yaml |multiple_choice | |truthfulqa_nl_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_nl_mc1.yaml |multiple_choice | |truthfulqa_nl_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_nl_mc2.yaml |multiple_choice | |truthfulqa_pt_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_pt_mc1.yaml |multiple_choice | |truthfulqa_pt_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_pt_mc2.yaml |multiple_choice | |truthfulqa_ro_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ro_mc1.yaml |multiple_choice | |truthfulqa_ro_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ro_mc2.yaml |multiple_choice | |truthfulqa_ru_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ru_mc1.yaml |multiple_choice | |truthfulqa_ru_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ru_mc2.yaml |multiple_choice | |truthfulqa_sk_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sk_mc1.yaml |multiple_choice | |truthfulqa_sk_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sk_mc2.yaml |multiple_choice | |truthfulqa_sr_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sr_mc1.yaml |multiple_choice | |truthfulqa_sr_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sr_mc2.yaml |multiple_choice | |truthfulqa_sv_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sv_mc1.yaml |multiple_choice | |truthfulqa_sv_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sv_mc2.yaml |multiple_choice | |truthfulqa_ta_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ta_mc1.yaml |multiple_choice | |truthfulqa_ta_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ta_mc2.yaml |multiple_choice | |truthfulqa_te_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_te_mc1.yaml |multiple_choice | |truthfulqa_te_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_te_mc2.yaml |multiple_choice | |truthfulqa_uk_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_uk_mc1.yaml |multiple_choice | |truthfulqa_uk_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_uk_mc2.yaml |multiple_choice | |truthfulqa_vi_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_vi_mc1.yaml |multiple_choice | |truthfulqa_vi_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_vi_mc2.yaml |multiple_choice | |truthfulqa_zh_mc1 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_zh_mc1.yaml |multiple_choice | |truthfulqa_zh_mc2 |lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_zh_mc2.yaml |multiple_choice | |vaxx_stance |lm_eval/tasks/basqueglue/vaxx.yaml |multiple_choice | |webqs |lm_eval/tasks/webqs/webqs.yaml |multiple_choice | |wic |lm_eval/tasks/super_glue/wic/default.yaml |multiple_choice | |wiceu |lm_eval/tasks/basqueglue/wic.yaml |multiple_choice | |wikitext |lm_eval/tasks/wikitext/wikitext.yaml |loglikelihood_rolling| |winogrande |lm_eval/tasks/winogrande/default.yaml |multiple_choice | |wmdp_bio |lm_eval/tasks/wmdp/wmdp_bio.yaml |multiple_choice | |wmdp_chem |lm_eval/tasks/wmdp/wmdp_chem.yaml |multiple_choice | |wmdp_cyber |lm_eval/tasks/wmdp/wmdp_cyber.yaml |multiple_choice | |wmt-ro-en-t5-prompt |lm_eval/tasks/wmt2016/ro_en-t5_prompt.yaml |generate_until | |wmt14-en-fr |lm_eval/tasks/translation/wmt14_en-fr.yaml |generate_until | |wmt14-fr-en |lm_eval/tasks/translation/wmt14_fr-en.yaml |generate_until | |wmt16-de-en |lm_eval/tasks/translation/wmt16_de-en.yaml |generate_until | |wmt16-en-de |lm_eval/tasks/translation/wmt16_en-de.yaml |generate_until | |wmt16-en-ro |lm_eval/tasks/translation/wmt16_en-ro.yaml |generate_until | |wmt16-ro-en |lm_eval/tasks/translation/wmt16_ro-en.yaml |generate_until | |wnli |lm_eval/tasks/glue/wnli/default.yaml |multiple_choice | |wsc |lm_eval/tasks/super_glue/wsc/default.yaml |multiple_choice | |wsc273 |lm_eval/tasks/wsc273/default.yaml |multiple_choice | |xcopa_et |lm_eval/tasks/xcopa/default_et.yaml |multiple_choice | |xcopa_ht |lm_eval/tasks/xcopa/default_ht.yaml |multiple_choice | |xcopa_id |lm_eval/tasks/xcopa/default_id.yaml |multiple_choice | |xcopa_it |lm_eval/tasks/xcopa/default_it.yaml |multiple_choice | |xcopa_qu |lm_eval/tasks/xcopa/default_qu.yaml |multiple_choice | |xcopa_sw |lm_eval/tasks/xcopa/default_sw.yaml |multiple_choice | |xcopa_ta |lm_eval/tasks/xcopa/default_ta.yaml |multiple_choice | |xcopa_th |lm_eval/tasks/xcopa/default_th.yaml |multiple_choice | |xcopa_tr |lm_eval/tasks/xcopa/default_tr.yaml |multiple_choice | |xcopa_vi |lm_eval/tasks/xcopa/default_vi.yaml |multiple_choice | |xcopa_zh |lm_eval/tasks/xcopa/default_zh.yaml |multiple_choice | |xnli_ar |lm_eval/tasks/xnli/xnli_ar.yaml |multiple_choice | |xnli_bg |lm_eval/tasks/xnli/xnli_bg.yaml |multiple_choice | |xnli_de |lm_eval/tasks/xnli/xnli_de.yaml |multiple_choice | |xnli_el |lm_eval/tasks/xnli/xnli_el.yaml |multiple_choice | |xnli_en |lm_eval/tasks/xnli/xnli_en.yaml |multiple_choice | |xnli_es |lm_eval/tasks/xnli/xnli_es.yaml |multiple_choice | |xnli_eu |lm_eval/tasks/xnli_eu/xnli_eu.yaml |multiple_choice | |xnli_eu_mt |lm_eval/tasks/xnli_eu/xnli_eu_mt.yaml |multiple_choice | |xnli_eu_native |lm_eval/tasks/xnli_eu/xnli_eu_native.yaml |multiple_choice | |xnli_fr |lm_eval/tasks/xnli/xnli_fr.yaml |multiple_choice | |xnli_hi |lm_eval/tasks/xnli/xnli_hi.yaml |multiple_choice | |xnli_ru |lm_eval/tasks/xnli/xnli_ru.yaml |multiple_choice | |xnli_sw |lm_eval/tasks/xnli/xnli_sw.yaml |multiple_choice | |xnli_th |lm_eval/tasks/xnli/xnli_th.yaml |multiple_choice | |xnli_tr |lm_eval/tasks/xnli/xnli_tr.yaml |multiple_choice | |xnli_ur |lm_eval/tasks/xnli/xnli_ur.yaml |multiple_choice | |xnli_vi |lm_eval/tasks/xnli/xnli_vi.yaml |multiple_choice | |xnli_zh |lm_eval/tasks/xnli/xnli_zh.yaml |multiple_choice | |xstorycloze_ar |lm_eval/tasks/xstorycloze/default_ar.yaml |multiple_choice | |xstorycloze_en |lm_eval/tasks/xstorycloze/default_en.yaml |multiple_choice | |xstorycloze_es |lm_eval/tasks/xstorycloze/default_es.yaml |multiple_choice | |xstorycloze_eu |lm_eval/tasks/xstorycloze/default_eu.yaml |multiple_choice | |xstorycloze_hi |lm_eval/tasks/xstorycloze/default_hi.yaml |multiple_choice | |xstorycloze_id |lm_eval/tasks/xstorycloze/default_id.yaml |multiple_choice | |xstorycloze_my |lm_eval/tasks/xstorycloze/default_my.yaml |multiple_choice | |xstorycloze_ru |lm_eval/tasks/xstorycloze/default_ru.yaml |multiple_choice | |xstorycloze_sw |lm_eval/tasks/xstorycloze/default_sw.yaml |multiple_choice | |xstorycloze_te |lm_eval/tasks/xstorycloze/default_te.yaml |multiple_choice | |xstorycloze_zh |lm_eval/tasks/xstorycloze/default_zh.yaml |multiple_choice | |xwinograd_en |lm_eval/tasks/xwinograd/xwinograd_en.yaml |multiple_choice | |xwinograd_fr |lm_eval/tasks/xwinograd/xwinograd_fr.yaml |multiple_choice | |xwinograd_jp |lm_eval/tasks/xwinograd/xwinograd_jp.yaml |multiple_choice | |xwinograd_pt |lm_eval/tasks/xwinograd/xwinograd_pt.yaml |multiple_choice | |xwinograd_ru |lm_eval/tasks/xwinograd/xwinograd_ru.yaml |multiple_choice | |xwinograd_zh |lm_eval/tasks/xwinograd/xwinograd_zh.yaml |multiple_choice |