Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml +6 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py +112 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_default_template_yaml +20 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_mmlu.yaml +32 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml +7 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml +7 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "abstract_algebra"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about abstract\
|
| 3 |
+
\ algebra.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_stem"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_abstract_algebra"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_computer_science"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ computer science.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_stem"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_college_computer_science"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "elementary_mathematics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about elementary\
|
| 3 |
+
\ mathematics.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_stem"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_elementary_mathematics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_biology"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school biology.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_stem"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_high_school_biology"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_computer_science"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school computer science.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_stem"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_high_school_computer_science"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_us_history"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school us history.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_humanities"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_high_school_us_history"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_world_history"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school world history.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_humanities"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_high_school_world_history"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "human_aging"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about human\
|
| 3 |
+
\ aging.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_other"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_human_aging"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "human_sexuality"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about human\
|
| 3 |
+
\ sexuality.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_social_sciences"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_human_sexuality"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "logical_fallacies"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about logical\
|
| 3 |
+
\ fallacies.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_humanities"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_logical_fallacies"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "moral_disputes"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about moral\
|
| 3 |
+
\ disputes.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_humanities"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_moral_disputes"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "professional_medicine"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about professional\
|
| 3 |
+
\ medicine.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_other"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_professional_medicine"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "public_relations"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about public\
|
| 3 |
+
\ relations.\n\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_social_sciences"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_public_relations"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "sociology"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about sociology.\n\
|
| 3 |
+
\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_social_sciences"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_sociology"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "virology"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about virology.\n\
|
| 3 |
+
\n"
|
| 4 |
+
"tag": "mmlu_flan_cot_zeroshot_other"
|
| 5 |
+
"include": "_mmlu_flan_cot_zeroshot_template_yaml"
|
| 6 |
+
"task": "mmlu_flan_cot_zeroshot_virology"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import sys
|
| 3 |
+
import unicodedata
|
| 4 |
+
|
| 5 |
+
from lm_eval.filters.extraction import RegexFilter
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class MultiChoiceRegexFilter(RegexFilter):
|
| 9 |
+
""" """
|
| 10 |
+
|
| 11 |
+
def __init__(
|
| 12 |
+
self,
|
| 13 |
+
regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
|
| 14 |
+
group_select=0,
|
| 15 |
+
fallback: str = "[invalid]",
|
| 16 |
+
ignore_case=False,
|
| 17 |
+
ignore_punctuation=False,
|
| 18 |
+
regexes_to_ignore=None,
|
| 19 |
+
) -> None:
|
| 20 |
+
"""
|
| 21 |
+
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
|
| 22 |
+
- step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
|
| 23 |
+
- step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
|
| 24 |
+
group_select: Selects the (group_select)th match from the findall result.
|
| 25 |
+
ignore_case: Ignores the case during step 1 matching
|
| 26 |
+
ignore_punctuation: Remove the punctuation during step 1 matching
|
| 27 |
+
regexes_to_ignore: Remove these regexes during step 1 matching
|
| 28 |
+
"""
|
| 29 |
+
super().__init__(regex_pattern, group_select, fallback)
|
| 30 |
+
self.ignore_case = ignore_case
|
| 31 |
+
self.ignore_punctuation = ignore_punctuation
|
| 32 |
+
self.regexes_to_ignore = regexes_to_ignore
|
| 33 |
+
|
| 34 |
+
def apply(self, resps, docs):
|
| 35 |
+
# here, we assume we have a list, in which each element is
|
| 36 |
+
# a list of model responses for some particular input/target pair.
|
| 37 |
+
# so we process each of these (same input/target response sets)
|
| 38 |
+
# independently (and keep them a list.)
|
| 39 |
+
|
| 40 |
+
def find_match(regex, resp, convert_dict={}):
|
| 41 |
+
match = regex.findall(resp)
|
| 42 |
+
if match:
|
| 43 |
+
match = match[self.group_select]
|
| 44 |
+
if isinstance(match, tuple):
|
| 45 |
+
match = [m for m in match if m][0]
|
| 46 |
+
match = match.strip()
|
| 47 |
+
if match and match in convert_dict:
|
| 48 |
+
match = convert_dict[match]
|
| 49 |
+
return match
|
| 50 |
+
|
| 51 |
+
punct_tbl = dict.fromkeys(
|
| 52 |
+
i
|
| 53 |
+
for i in range(sys.maxunicode)
|
| 54 |
+
if unicodedata.category(chr(i)).startswith("P")
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
def filter_ignores(st):
|
| 58 |
+
if self.regexes_to_ignore is not None:
|
| 59 |
+
for s in self.regexes_to_ignore:
|
| 60 |
+
st = re.sub(s, "", st)
|
| 61 |
+
|
| 62 |
+
if self.ignore_case:
|
| 63 |
+
st = st.lower()
|
| 64 |
+
|
| 65 |
+
if self.ignore_punctuation:
|
| 66 |
+
# https://stackoverflow.com/a/266162
|
| 67 |
+
st = st.translate(punct_tbl)
|
| 68 |
+
return st
|
| 69 |
+
|
| 70 |
+
filtered_resps = []
|
| 71 |
+
|
| 72 |
+
for r, doc in zip(resps, docs):
|
| 73 |
+
fallback_regexes = []
|
| 74 |
+
choice_to_alpha = {}
|
| 75 |
+
next_alpha = "A"
|
| 76 |
+
|
| 77 |
+
without_paren_fallback_regexes = []
|
| 78 |
+
without_paren_to_target = {}
|
| 79 |
+
|
| 80 |
+
choices = doc["choices"]
|
| 81 |
+
for c in choices:
|
| 82 |
+
m = filter_ignores(c.strip())
|
| 83 |
+
fallback_regexes.append(f"{re.escape(m)}")
|
| 84 |
+
choice_to_alpha[m] = f"({next_alpha})"
|
| 85 |
+
|
| 86 |
+
without_paren_fallback_regexes.append(next_alpha)
|
| 87 |
+
without_paren_to_target[next_alpha] = f"({next_alpha})"
|
| 88 |
+
|
| 89 |
+
next_alpha = chr(ord(next_alpha) + 1)
|
| 90 |
+
fallback_regex = re.compile("|".join(fallback_regexes))
|
| 91 |
+
without_paren_fallback_regex = "|".join(without_paren_fallback_regexes)
|
| 92 |
+
without_paren_fallback_regex = re.compile(
|
| 93 |
+
f":[\s]*({without_paren_fallback_regex})"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
filtered = []
|
| 97 |
+
for resp in r:
|
| 98 |
+
match = find_match(self.regex, resp)
|
| 99 |
+
if not match:
|
| 100 |
+
match = find_match(
|
| 101 |
+
fallback_regex, filter_ignores(resp), choice_to_alpha
|
| 102 |
+
)
|
| 103 |
+
if not match:
|
| 104 |
+
match = find_match(
|
| 105 |
+
without_paren_fallback_regex, resp, without_paren_to_target
|
| 106 |
+
)
|
| 107 |
+
if not match:
|
| 108 |
+
match = self.fallback
|
| 109 |
+
filtered.append(match)
|
| 110 |
+
filtered_resps.append(filtered)
|
| 111 |
+
|
| 112 |
+
return filtered_resps
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_default_template_yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
|
| 2 |
+
test_split: test
|
| 3 |
+
fewshot_split: dev
|
| 4 |
+
fewshot_config:
|
| 5 |
+
sampler: first_n
|
| 6 |
+
output_type: generate_until
|
| 7 |
+
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
|
| 8 |
+
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
|
| 9 |
+
generation_kwargs:
|
| 10 |
+
until:
|
| 11 |
+
- "</s>"
|
| 12 |
+
- "\n"
|
| 13 |
+
metric_list:
|
| 14 |
+
- metric: exact_match
|
| 15 |
+
aggregation: mean
|
| 16 |
+
higher_is_better: true
|
| 17 |
+
metadata:
|
| 18 |
+
version: 2.0
|
| 19 |
+
dataset_kwargs:
|
| 20 |
+
trust_remote_code: true
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_mmlu.yaml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
group: mmlu_generative
|
| 2 |
+
group_alias: mmlu (generative)
|
| 3 |
+
task:
|
| 4 |
+
- group: stem
|
| 5 |
+
task:
|
| 6 |
+
- mmlu_stem_generative
|
| 7 |
+
aggregate_metric_list:
|
| 8 |
+
- metric: acc
|
| 9 |
+
weight_by_size: True
|
| 10 |
+
- group: other
|
| 11 |
+
task:
|
| 12 |
+
- mmlu_other_generative
|
| 13 |
+
aggregate_metric_list:
|
| 14 |
+
- metric: acc
|
| 15 |
+
weight_by_size: True
|
| 16 |
+
- group: social sciences
|
| 17 |
+
task:
|
| 18 |
+
- mmlu_social_sciences_generative
|
| 19 |
+
aggregate_metric_list:
|
| 20 |
+
- metric: acc
|
| 21 |
+
weight_by_size: True
|
| 22 |
+
- group: humanities
|
| 23 |
+
task:
|
| 24 |
+
- mmlu_humanities_generative
|
| 25 |
+
aggregate_metric_list:
|
| 26 |
+
- metric: acc
|
| 27 |
+
weight_by_size: True
|
| 28 |
+
aggregate_metric_list:
|
| 29 |
+
- metric: acc
|
| 30 |
+
weight_by_size: True
|
| 31 |
+
metadata:
|
| 32 |
+
version: 2
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "abstract_algebra"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about abstract\
|
| 3 |
+
\ algebra.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_abstract_algebra_generative"
|
| 7 |
+
"task_alias": "abstract_algebra"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "anatomy"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
|
| 3 |
+
\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_anatomy_generative"
|
| 7 |
+
"task_alias": "anatomy"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "astronomy"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
|
| 3 |
+
\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_astronomy_generative"
|
| 7 |
+
"task_alias": "astronomy"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "business_ethics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about business\
|
| 3 |
+
\ ethics.\n\n"
|
| 4 |
+
"tag": "mmlu_other_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_business_ethics_generative"
|
| 7 |
+
"task_alias": "business_ethics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "clinical_knowledge"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about clinical\
|
| 3 |
+
\ knowledge.\n\n"
|
| 4 |
+
"tag": "mmlu_other_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_clinical_knowledge_generative"
|
| 7 |
+
"task_alias": "clinical_knowledge"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_biology"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ biology.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_college_biology_generative"
|
| 7 |
+
"task_alias": "college_biology"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_chemistry"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ chemistry.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_college_chemistry_generative"
|
| 7 |
+
"task_alias": "college_chemistry"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_computer_science"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ computer science.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_college_computer_science_generative"
|
| 7 |
+
"task_alias": "college_computer_science"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_mathematics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ mathematics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_college_mathematics_generative"
|
| 7 |
+
"task_alias": "college_mathematics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_medicine"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ medicine.\n\n"
|
| 4 |
+
"tag": "mmlu_other_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_college_medicine_generative"
|
| 7 |
+
"task_alias": "college_medicine"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "college_physics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about college\
|
| 3 |
+
\ physics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_college_physics_generative"
|
| 7 |
+
"task_alias": "college_physics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "computer_security"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about computer\
|
| 3 |
+
\ security.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_computer_security_generative"
|
| 7 |
+
"task_alias": "computer_security"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "conceptual_physics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about conceptual\
|
| 3 |
+
\ physics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_conceptual_physics_generative"
|
| 7 |
+
"task_alias": "conceptual_physics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "econometrics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about econometrics.\n\
|
| 3 |
+
\n"
|
| 4 |
+
"tag": "mmlu_social_sciences_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_econometrics_generative"
|
| 7 |
+
"task_alias": "econometrics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "electrical_engineering"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about electrical\
|
| 3 |
+
\ engineering.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_electrical_engineering_generative"
|
| 7 |
+
"task_alias": "electrical_engineering"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "elementary_mathematics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about elementary\
|
| 3 |
+
\ mathematics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_elementary_mathematics_generative"
|
| 7 |
+
"task_alias": "elementary_mathematics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "formal_logic"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about formal\
|
| 3 |
+
\ logic.\n\n"
|
| 4 |
+
"tag": "mmlu_humanities_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_formal_logic_generative"
|
| 7 |
+
"task_alias": "formal_logic"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "global_facts"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about global\
|
| 3 |
+
\ facts.\n\n"
|
| 4 |
+
"tag": "mmlu_other_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_global_facts_generative"
|
| 7 |
+
"task_alias": "global_facts"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_biology"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school biology.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_biology_generative"
|
| 7 |
+
"task_alias": "high_school_biology"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_chemistry"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school chemistry.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_chemistry_generative"
|
| 7 |
+
"task_alias": "high_school_chemistry"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_computer_science"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school computer science.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_computer_science_generative"
|
| 7 |
+
"task_alias": "high_school_computer_science"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_european_history"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school european history.\n\n"
|
| 4 |
+
"tag": "mmlu_humanities_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_european_history_generative"
|
| 7 |
+
"task_alias": "high_school_european_history"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_geography"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school geography.\n\n"
|
| 4 |
+
"tag": "mmlu_social_sciences_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_geography_generative"
|
| 7 |
+
"task_alias": "high_school_geography"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_government_and_politics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school government and politics.\n\n"
|
| 4 |
+
"tag": "mmlu_social_sciences_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_government_and_politics_generative"
|
| 7 |
+
"task_alias": "high_school_government_and_politics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_macroeconomics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school macroeconomics.\n\n"
|
| 4 |
+
"tag": "mmlu_social_sciences_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_macroeconomics_generative"
|
| 7 |
+
"task_alias": "high_school_macroeconomics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_mathematics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school mathematics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_mathematics_generative"
|
| 7 |
+
"task_alias": "high_school_mathematics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_microeconomics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school microeconomics.\n\n"
|
| 4 |
+
"tag": "mmlu_social_sciences_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_microeconomics_generative"
|
| 7 |
+
"task_alias": "high_school_microeconomics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_physics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school physics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_physics_generative"
|
| 7 |
+
"task_alias": "high_school_physics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_psychology"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school psychology.\n\n"
|
| 4 |
+
"tag": "mmlu_social_sciences_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_psychology_generative"
|
| 7 |
+
"task_alias": "high_school_psychology"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_statistics"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school statistics.\n\n"
|
| 4 |
+
"tag": "mmlu_stem_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_statistics_generative"
|
| 7 |
+
"task_alias": "high_school_statistics"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_us_history"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school us history.\n\n"
|
| 4 |
+
"tag": "mmlu_humanities_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_us_history_generative"
|
| 7 |
+
"task_alias": "high_school_us_history"
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"dataset_name": "high_school_world_history"
|
| 2 |
+
"description": "The following are multiple choice questions (with answers) about high\
|
| 3 |
+
\ school world history.\n\n"
|
| 4 |
+
"tag": "mmlu_humanities_generative"
|
| 5 |
+
"include": "_default_template_yaml"
|
| 6 |
+
"task": "mmlu_high_school_world_history_generative"
|
| 7 |
+
"task_alias": "high_school_world_history"
|