koichi12 commited on
Commit
cf6fac5
·
verified ·
1 Parent(s): 79033be

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams_es_opebilbao.yaml +4 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/eus_exams/utils.py +15 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/README.md +64 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/config.py +154 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/_mmlusr_qna_yml +16 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/_question_and_answer.yaml +44 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_abstract_algebra.yaml +7 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_anatomy.yaml +7 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_astronomy.yaml +7 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_business_ethics.yaml +7 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_clinical_knowledge.yaml +7 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_biology.yaml +7 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_chemistry.yaml +7 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_computer_science.yaml +7 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_mathematics.yaml +7 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_medicine.yaml +7 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_physics.yaml +7 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_computer_security.yaml +7 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_conceptual_physics.yaml +7 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_econometrics.yaml +7 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_electrical_engineering.yaml +7 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_elementary_mathematics.yaml +7 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_formal_logic.yaml +7 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_global_facts.yaml +7 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_biology.yaml +7 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_chemistry.yaml +7 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_computer_science.yaml +7 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_european_history.yaml +7 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_geography.yaml +7 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_government_and_politics.yaml +7 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_macroeconomics.yaml +7 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_mathematics.yaml +7 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_microeconomics.yaml +7 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_physics.yaml +7 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_psychology.yaml +7 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_statistics.yaml +7 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_us_history.yaml +7 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_world_history.yaml +7 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_aging.yaml +7 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_sexuality.yaml +7 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_international_law.yaml +7 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_jurisprudence.yaml +7 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_logical_fallacies.yaml +7 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_machine_learning.yaml +7 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_management.yaml +7 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_marketing.yaml +7 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_medical_genetics.yaml +7 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_miscellaneous.yaml +7 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_disputes.yaml +7 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_scenarios.yaml +7 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams_es_opebilbao.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: es_opebilbao
3
+ include: eus_exams_es
4
+ task: eus_exams_es_opebilbao
scripts/yans/lm-evaluation-harness/lm_eval/tasks/eus_exams/utils.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+
3
+
4
+ def process_docs(dataset: datasets.Dataset):
5
+ """Filter out examples with no answer."""
6
+
7
+ def valid_example(example: dict) -> bool:
8
+ """Check if an example is valid."""
9
+ if example["answer"] not in [0, 1, 2, 3]:
10
+ return False
11
+ if example["candidates"] == ["", "", "", ""]:
12
+ return False
13
+ return True
14
+
15
+ return dataset.filter(valid_example)
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MMLU-SR
2
+
3
+ ## Paper
4
+ Title: [Reasoning or Simply Next Token Prediction? A Benchmark for Stress-Testing Large Language Models](https://arxiv.org/abs/2406.15468v1)
5
+
6
+
7
+ We propose MMLU-SR, a novel dataset designed to measure the true comprehension abilities of Large Language Models (LLMs) by challenging their performance in question-answering tasks with modified terms. We reasoned that an agent that ``truly'' understands a concept can still evaluate it when key terms are replaced by suitably defined alternate terms, and sought to differentiate such comprehension from mere text replacement. In our study, we modified standardized test questions by replacing a key term with a dummy word along with its definition. The key term could be in the context of questions, answers, or both questions and answers.
8
+ Notwithstanding the high scores achieved by recent popular LLMs on the MMLU leaderboard, we found a substantial reduction in model performance after such replacement, suggesting poor comprehension. This new benchmark provides a rigorous benchmark for testing true model comprehension, and poses a challenge to the broader scientific community.
9
+
10
+ Github Homepage: [https://github.com/Wang-ML-Lab/MMLU-SR](https://github.com/Wang-ML-Lab/MMLU-SR)
11
+ Huggingface Dataset: [https://huggingface.co/datasets/NiniCat/MMLU-SR]([https://huggingface.co/datasets/NiniCat/MMLU-SR)
12
+
13
+
14
+ ## Citation
15
+ ```bib
16
+ @misc{wang2024reasoningsimplytokenprediction,
17
+ title={Reasoning or Simply Next Token Prediction? A Benchmark for Stress-Testing Large Language Models},
18
+ author={Wentian Wang and Paul Kantor and Jacob Feldman and Lazaros Gallos and Hao Wang},
19
+ year={2024},
20
+ eprint={2406.15468},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CL},
23
+ url={https://arxiv.org/abs/2406.15468},
24
+ }
25
+ ```
26
+
27
+ ### Groups and Tasks
28
+
29
+ #### Groups
30
+
31
+ - `mmlusr`: MMLU variant where the terminology in the question and answers are modified.
32
+ - `mmlusr_answer_only`: MMLU variant where the terminology in the answers are modified.
33
+ - `mmlusr_question_only`: MMLU variant where the terminology in the question is modified.
34
+
35
+ #### Tasks
36
+
37
+ There are 57 symbol replaced subjects in each group. You can run a single task by:
38
+
39
+ * `mmlusr_question_only_abstract_algebra`
40
+
41
+ Or by categories:
42
+
43
+ * `mmlusr_question_only_stem_tasks `
44
+
45
+
46
+ ### Checklist
47
+
48
+ The checklist is the following:
49
+
50
+ For adding novel benchmarks/datasets to the library:
51
+ * [x] Is the task an existing benchmark in the literature?
52
+ * [x] Have you referenced the original paper that introduced the task?
53
+ * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
54
+ * The implementation in the original paper is one where the model is first fine-tuned on the data. They do have a few-shot evaluation for GPT-3, however the few-shot context used here is sourced from [Lewkowycz et al](https://arxiv.org/abs/2206.14858). The achieved accuracy on Llama-2 models is comparable to that provided in the paper, though not identical.
55
+
56
+
57
+ If other tasks on this dataset are already supported:
58
+ * [x] Is the "Main" variant of this task clearly denoted?
59
+ * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
60
+ * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
61
+
62
+ ### Variant Wishlist
63
+
64
+ - [ ] zero-shot variant
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/config.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Take in a YAML, and output all "other" splits with this YAML
3
+ """
4
+
5
+ import argparse
6
+ import logging
7
+ import os
8
+
9
+ import yaml
10
+ from tqdm import tqdm
11
+
12
+
13
+ eval_logger = logging.getLogger("lm-eval")
14
+
15
+
16
+ SUBJECTS = {
17
+ "abstract_algebra": "stem",
18
+ "anatomy": "stem",
19
+ "astronomy": "stem",
20
+ "business_ethics": "other",
21
+ "clinical_knowledge": "other",
22
+ "college_biology": "stem",
23
+ "college_chemistry": "stem",
24
+ "college_computer_science": "stem",
25
+ "college_mathematics": "stem",
26
+ "college_medicine": "other",
27
+ "college_physics": "stem",
28
+ "computer_security": "stem",
29
+ "conceptual_physics": "stem",
30
+ "econometrics": "social_sciences",
31
+ "electrical_engineering": "stem",
32
+ "elementary_mathematics": "stem",
33
+ "formal_logic": "humanities",
34
+ "global_facts": "other",
35
+ "high_school_biology": "stem",
36
+ "high_school_chemistry": "stem",
37
+ "high_school_computer_science": "stem",
38
+ "high_school_european_history": "humanities",
39
+ "high_school_geography": "social_sciences",
40
+ "high_school_government_and_politics": "social_sciences",
41
+ "high_school_macroeconomics": "social_sciences",
42
+ "high_school_mathematics": "stem",
43
+ "high_school_microeconomics": "social_sciences",
44
+ "high_school_physics": "stem",
45
+ "high_school_psychology": "social_sciences",
46
+ "high_school_statistics": "stem",
47
+ "high_school_us_history": "humanities",
48
+ "high_school_world_history": "humanities",
49
+ "human_aging": "other",
50
+ "human_sexuality": "social_sciences",
51
+ "international_law": "humanities",
52
+ "jurisprudence": "humanities",
53
+ "logical_fallacies": "humanities",
54
+ "machine_learning": "stem",
55
+ "management": "other",
56
+ "marketing": "other",
57
+ "medical_genetics": "other",
58
+ "miscellaneous": "other",
59
+ "moral_disputes": "humanities",
60
+ "moral_scenarios": "humanities",
61
+ "nutrition": "other",
62
+ "philosophy": "humanities",
63
+ "prehistory": "humanities",
64
+ "professional_accounting": "other",
65
+ "professional_law": "humanities",
66
+ "professional_medicine": "other",
67
+ "professional_psychology": "social_sciences",
68
+ "public_relations": "social_sciences",
69
+ "security_studies": "social_sciences",
70
+ "sociology": "social_sciences",
71
+ "us_foreign_policy": "social_sciences",
72
+ "virology": "other",
73
+ "world_religions": "humanities",
74
+ }
75
+
76
+ GROUPS = ["question_and_answer"]
77
+
78
+
79
+ def parse_args():
80
+ parser = argparse.ArgumentParser(
81
+ description="Generate configuration YAML files for LM Evaluation Harness."
82
+ )
83
+ # Path to the base YAML file from which to inherit settings
84
+ parser.add_argument(
85
+ "--base_yaml_path",
86
+ required=True,
87
+ help="Path to the base YAML configuration file.",
88
+ )
89
+
90
+ # Directory where the generated YAML files will be saved
91
+ parser.add_argument(
92
+ "--save_dir",
93
+ default="/data/local/cat/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer",
94
+ )
95
+
96
+ # Optional prefix to add to task names in the YAML files
97
+ parser.add_argument("--task_prefix", default="")
98
+
99
+ parser.add_argument("--cot_prompt_path", default=None)
100
+
101
+ # Optional prefix to add to group names in the YAML files
102
+ parser.add_argument("--group_prefix", default="")
103
+
104
+ return parser.parse_args()
105
+
106
+
107
+ if __name__ == "__main__":
108
+ args = parse_args()
109
+
110
+ # Load base YAML configuration
111
+ base_yaml_name = os.path.basename(args.base_yaml_path)
112
+ with open(args.base_yaml_path, "r", encoding="utf-8") as f:
113
+ base_yaml = yaml.full_load(f)
114
+
115
+ if args.cot_prompt_path is not None:
116
+ import json
117
+
118
+ with open(args.cot_prompt_path, encoding="utf-8") as f:
119
+ cot_file = json.load(f)
120
+
121
+ for group in GROUPS:
122
+ for subject, category in tqdm(SUBJECTS.items()):
123
+ if args.cot_prompt_path is not None:
124
+ description = cot_file[subject]
125
+ else:
126
+ description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
127
+
128
+ yaml_dict = {
129
+ "include": base_yaml_name,
130
+ "tag": f"mmlusr_{args.group_prefix}{group}_{category}"
131
+ if args.group_prefix
132
+ else f"mmlusr_{group}_{category}",
133
+ "task": f"mmlusr_{args.task_prefix}{group}_{subject}"
134
+ if args.task_prefix
135
+ else f"mmlusr_{group}_{subject}",
136
+ "task_alias": subject.replace("_", " "),
137
+ "description": description,
138
+ "dataset_name": f"{group}_{subject}",
139
+ }
140
+
141
+ # File path for saving the generated YAML file
142
+ file_save_path = os.path.join(args.save_dir, f"{group}_{subject}.yaml")
143
+ with open(file_save_path, "w", encoding="utf-8") as yaml_file:
144
+ yaml.dump(yaml_dict, yaml_file, allow_unicode=True, default_style='"')
145
+ eval_logger.info(f"Saved YAML for {group} {subject} to {file_save_path}")
146
+
147
+ # Save group configuration if specified
148
+ if args.group_prefix:
149
+ file_save_path = os.path.join(
150
+ args.save_prefix_path, args.group_prefix + ".yaml"
151
+ )
152
+ eval_logger.info(f"Saving benchmark config to {file_save_path}")
153
+ with open(file_save_path, "w", encoding="utf-8") as yaml_file:
154
+ yaml.dump(yaml_dict, yaml_file, indent=4, default_flow_style=False)
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/_mmlusr_qna_yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_path: NiniCat/MMLU-SR
2
+ test_split: test
3
+ fewshot_split: train
4
+ fewshot_config:
5
+ sampler: first_n
6
+ output_type: multiple_choice
7
+ process_docs: !function utils.process_docs
8
+ doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
9
+ doc_to_choice: ["A", "B", "C", "D"]
10
+ doc_to_target: answer
11
+ metric_list:
12
+ - metric: acc
13
+ aggregation: mean
14
+ higher_is_better: true
15
+ metadata:
16
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/_question_and_answer.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ group: mmlusr
2
+ group_alias: MMLU-SR (Question & Answer)
3
+ task:
4
+ - group: mmlusr_qa_stem
5
+ group_alias: STEM (Question & Answer)
6
+ task:
7
+ - mmlusr_question_and_answer_stem_tasks
8
+ aggregate_metric_list:
9
+ - metric: acc
10
+ weight_by_size: True
11
+ metadata:
12
+ version: 2
13
+ - group: mmlusr_qa_other
14
+ group_alias: Other (Question & Answer)
15
+ task:
16
+ - mmlusr_question_and_answer_other_tasks
17
+ aggregate_metric_list:
18
+ - metric: acc
19
+ weight_by_size: True
20
+ metadata:
21
+ version: 2
22
+ - group: mmlusr_qa_social_sciences
23
+ group_alias: Social Sciences (Question & Answer)
24
+ task:
25
+ - mmlusr_question_and_answer_social_sciences_tasks
26
+ aggregate_metric_list:
27
+ - metric: acc
28
+ weight_by_size: True
29
+ metadata:
30
+ version: 2
31
+ - group: mmlusr_qa_humanities
32
+ group_alias: Humanities (Question & Answer)
33
+ task:
34
+ - mmlusr_question_and_answer_humanities_tasks
35
+ aggregate_metric_list:
36
+ - metric: acc
37
+ weight_by_size: True
38
+ metadata:
39
+ version: 2
40
+ aggregate_metric_list:
41
+ - metric: acc
42
+ weight_by_size: True
43
+ metadata:
44
+ version: 2
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_abstract_algebra.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_abstract_algebra"
2
+ "description": "The following are multiple choice questions (with answers) about abstract\
3
+ \ algebra.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_abstract_algebra"
7
+ "task_alias": "abstract algebra"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_anatomy.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_anatomy"
2
+ "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_anatomy"
7
+ "task_alias": "anatomy"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_astronomy.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_astronomy"
2
+ "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_astronomy"
7
+ "task_alias": "astronomy"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_business_ethics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_business_ethics"
2
+ "description": "The following are multiple choice questions (with answers) about business\
3
+ \ ethics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_business_ethics"
7
+ "task_alias": "business ethics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_clinical_knowledge.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_clinical_knowledge"
2
+ "description": "The following are multiple choice questions (with answers) about clinical\
3
+ \ knowledge.\n\n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_clinical_knowledge"
7
+ "task_alias": "clinical knowledge"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_biology.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_college_biology"
2
+ "description": "The following are multiple choice questions (with answers) about college\
3
+ \ biology.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_college_biology"
7
+ "task_alias": "college biology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_chemistry.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_college_chemistry"
2
+ "description": "The following are multiple choice questions (with answers) about college\
3
+ \ chemistry.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_college_chemistry"
7
+ "task_alias": "college chemistry"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_computer_science.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_college_computer_science"
2
+ "description": "The following are multiple choice questions (with answers) about college\
3
+ \ computer science.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_college_computer_science"
7
+ "task_alias": "college computer science"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_mathematics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_college_mathematics"
2
+ "description": "The following are multiple choice questions (with answers) about college\
3
+ \ mathematics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_college_mathematics"
7
+ "task_alias": "college mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_medicine.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_college_medicine"
2
+ "description": "The following are multiple choice questions (with answers) about college\
3
+ \ medicine.\n\n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_college_medicine"
7
+ "task_alias": "college medicine"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_physics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_college_physics"
2
+ "description": "The following are multiple choice questions (with answers) about college\
3
+ \ physics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_college_physics"
7
+ "task_alias": "college physics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_computer_security.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_computer_security"
2
+ "description": "The following are multiple choice questions (with answers) about computer\
3
+ \ security.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_computer_security"
7
+ "task_alias": "computer security"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_conceptual_physics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_conceptual_physics"
2
+ "description": "The following are multiple choice questions (with answers) about conceptual\
3
+ \ physics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_conceptual_physics"
7
+ "task_alias": "conceptual physics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_econometrics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_econometrics"
2
+ "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_econometrics"
7
+ "task_alias": "econometrics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_electrical_engineering.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_electrical_engineering"
2
+ "description": "The following are multiple choice questions (with answers) about electrical\
3
+ \ engineering.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_electrical_engineering"
7
+ "task_alias": "electrical engineering"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_elementary_mathematics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_elementary_mathematics"
2
+ "description": "The following are multiple choice questions (with answers) about elementary\
3
+ \ mathematics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_elementary_mathematics"
7
+ "task_alias": "elementary mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_formal_logic.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_formal_logic"
2
+ "description": "The following are multiple choice questions (with answers) about formal\
3
+ \ logic.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_formal_logic"
7
+ "task_alias": "formal logic"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_global_facts.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_global_facts"
2
+ "description": "The following are multiple choice questions (with answers) about global\
3
+ \ facts.\n\n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_global_facts"
7
+ "task_alias": "global facts"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_biology.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_biology"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school biology.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_biology"
7
+ "task_alias": "high school biology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_chemistry.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_chemistry"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school chemistry.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_chemistry"
7
+ "task_alias": "high school chemistry"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_computer_science.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_computer_science"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school computer science.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_computer_science"
7
+ "task_alias": "high school computer science"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_european_history.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_european_history"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school european history.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_european_history"
7
+ "task_alias": "high school european history"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_geography.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_geography"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school geography.\n\n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_geography"
7
+ "task_alias": "high school geography"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_government_and_politics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_government_and_politics"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school government and politics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_government_and_politics"
7
+ "task_alias": "high school government and politics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_macroeconomics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_macroeconomics"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school macroeconomics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_macroeconomics"
7
+ "task_alias": "high school macroeconomics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_mathematics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_mathematics"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school mathematics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_mathematics"
7
+ "task_alias": "high school mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_microeconomics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_microeconomics"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school microeconomics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_microeconomics"
7
+ "task_alias": "high school microeconomics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_physics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_physics"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school physics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_physics"
7
+ "task_alias": "high school physics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_psychology.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_psychology"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school psychology.\n\n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_psychology"
7
+ "task_alias": "high school psychology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_statistics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_statistics"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school statistics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_statistics"
7
+ "task_alias": "high school statistics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_us_history.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_us_history"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school us history.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_us_history"
7
+ "task_alias": "high school us history"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_world_history.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_high_school_world_history"
2
+ "description": "The following are multiple choice questions (with answers) about high\
3
+ \ school world history.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_high_school_world_history"
7
+ "task_alias": "high school world history"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_aging.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_human_aging"
2
+ "description": "The following are multiple choice questions (with answers) about human\
3
+ \ aging.\n\n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_human_aging"
7
+ "task_alias": "human aging"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_sexuality.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_human_sexuality"
2
+ "description": "The following are multiple choice questions (with answers) about human\
3
+ \ sexuality.\n\n"
4
+ "tag": "mmlusr_question_and_answer_social_sciences_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_human_sexuality"
7
+ "task_alias": "human sexuality"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_international_law.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_international_law"
2
+ "description": "The following are multiple choice questions (with answers) about international\
3
+ \ law.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_international_law"
7
+ "task_alias": "international law"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_jurisprudence.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_jurisprudence"
2
+ "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_jurisprudence"
7
+ "task_alias": "jurisprudence"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_logical_fallacies.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_logical_fallacies"
2
+ "description": "The following are multiple choice questions (with answers) about logical\
3
+ \ fallacies.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_logical_fallacies"
7
+ "task_alias": "logical fallacies"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_machine_learning.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_machine_learning"
2
+ "description": "The following are multiple choice questions (with answers) about machine\
3
+ \ learning.\n\n"
4
+ "tag": "mmlusr_question_and_answer_stem_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_machine_learning"
7
+ "task_alias": "machine learning"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_management.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_management"
2
+ "description": "The following are multiple choice questions (with answers) about management.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_management"
7
+ "task_alias": "management"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_marketing.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_marketing"
2
+ "description": "The following are multiple choice questions (with answers) about marketing.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_marketing"
7
+ "task_alias": "marketing"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_medical_genetics.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_medical_genetics"
2
+ "description": "The following are multiple choice questions (with answers) about medical\
3
+ \ genetics.\n\n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_medical_genetics"
7
+ "task_alias": "medical genetics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_miscellaneous.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_miscellaneous"
2
+ "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3
+ \n"
4
+ "tag": "mmlusr_question_and_answer_other_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_miscellaneous"
7
+ "task_alias": "miscellaneous"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_disputes.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_moral_disputes"
2
+ "description": "The following are multiple choice questions (with answers) about moral\
3
+ \ disputes.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_moral_disputes"
7
+ "task_alias": "moral disputes"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_scenarios.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "dataset_name": "question_and_answer_moral_scenarios"
2
+ "description": "The following are multiple choice questions (with answers) about moral\
3
+ \ scenarios.\n\n"
4
+ "tag": "mmlusr_question_and_answer_humanities_tasks"
5
+ "include": "_mmlusr_qna_yml"
6
+ "task": "mmlusr_question_and_answer_moral_scenarios"
7
+ "task_alias": "moral scenarios"