koichi12 commited on
Commit
8863ed8
·
verified ·
1 Parent(s): 524c0df

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/copa_ar/README.md +40 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/copa_ar/copa_ar.yaml +21 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/piqa_ar/README.md +43 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/piqa_ar/piqa_ar.yaml +21 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md +48 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/_cmmlu.yaml +78 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/_default_template_yaml +18 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_arts.yaml +4 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_actuarial_science.yaml +4 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_engineering_hydrology.yaml +4 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_mathematics.yaml +4 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_medical_statistics.yaml +4 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_computer_science.yaml +4 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_computer_security.yaml +4 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_anatomy.yaml +4 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_arts.yaml +4 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_astronomy.yaml +4 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_chinese_civil_service_exam.yaml +4 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_college_education.yaml +4 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_computer_science.yaml +4 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_computer_security.yaml +4 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_economics.yaml +4 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_elementary_chinese.yaml +4 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_elementary_information_and_technology.yaml +4 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_elementary_mathematics.yaml +4 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_ethnology.yaml +4 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_genetics.yaml +4 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_legal_and_moral_basis.yaml +4 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_marxist_theory.yaml +4 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_modern_chinese.yaml +4 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_philosophy.yaml +4 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_professional_medicine.yaml +4 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_professional_psychology.yaml +4 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_security_study.yaml +4 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_sports_science.yaml +4 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_virology.yaml +4 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_education.yaml +4 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_elementary_chinese.yaml +4 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_elementary_commonsense.yaml +4 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_elementary_mathematics.yaml +4 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_ethnology.yaml +4 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_genetics.yaml +4 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_high_school_mathematics.yaml +4 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_high_school_politics.yaml +4 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_international_law.yaml +4 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_logical.yaml +4 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_machine_learning.yaml +4 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_marxist_theory.yaml +4 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_nutrition.yaml +4 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_philosophy.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/copa_ar/README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Arabic COPA
2
+
3
+ ### Paper
4
+
5
+ Original Title: `COPA`
6
+
7
+
8
+
9
+ The Choice Of Plausible Alternatives (COPA) evaluation provides researchers with a tool for assessing progress in open-domain commonsense causal reasoning.
10
+
11
+ [Homepage](https://people.ict.usc.edu/~gordon/copa.html)
12
+
13
+ AlGhafa has translated this dataset to Arabic[AlGafa](https://aclanthology.org/2023.arabicnlp-1.21.pdf)
14
+
15
+ The link to the Arabic version of the dataset [PICA](https://gitlab.com/tiiuae/alghafa/-/tree/main/arabic-eval/copa_ar)
16
+
17
+ ### Citation
18
+
19
+ ### Groups and Tasks
20
+
21
+ #### Groups
22
+
23
+ * Not part of a group yet.
24
+
25
+ #### Tasks
26
+
27
+ * `copa_ar`
28
+
29
+ ### Checklist
30
+
31
+ For adding novel benchmarks/datasets to the library:
32
+ * [x] Is the task an existing benchmark in the literature?
33
+ * [x] Have you referenced the original paper that introduced the task?
34
+ * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
35
+
36
+
37
+ If other tasks on this dataset are already supported:
38
+ * [x] Is the "Main" variant of this task clearly denoted?
39
+ * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
40
+ * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/copa_ar/copa_ar.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: copa_ar
2
+ dataset_path: Hennara/copa_ar
3
+ dataset_name: null
4
+ output_type: multiple_choice
5
+ training_split: null
6
+ validation_split: null
7
+ test_split: test
8
+ doc_to_text: "السؤال: {{query}}\nالجواب:"
9
+ doc_to_choice: "{{[sol1, sol2]}}"
10
+ doc_to_target: label
11
+ should_decontaminate: true
12
+ doc_to_decontamination_query: query
13
+ metric_list:
14
+ - metric: acc
15
+ aggregation: mean
16
+ higher_is_better: true
17
+ - metric: acc_norm
18
+ aggregation: mean
19
+ higher_is_better: true
20
+ metadata:
21
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/piqa_ar/README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Arabic PIQA
2
+
3
+ ### Paper
4
+
5
+ Original Title: `PIQA: Reasoning about Physical Commonsense in Natural Language`
6
+
7
+ Original paper: [PICA](https://arxiv.org/abs/1911.11641)
8
+
9
+ Physical Interaction: Question Answering (PIQA) is a physical commonsense
10
+ reasoning and a corresponding benchmark dataset. PIQA was designed to investigate
11
+ the physical knowledge of existing models. To what extent are current approaches
12
+ actually learning about the world?
13
+
14
+ [Homepage](https://yonatanbisk.com/piqa)
15
+
16
+ AlGhafa has translated this dataset to Arabic[AlGafa](https://aclanthology.org/2023.arabicnlp-1.21.pdf)
17
+
18
+ The link to the Arabic version of the dataset [PICA](https://gitlab.com/tiiuae/alghafa/-/tree/main/arabic-eval/pica_ar)
19
+
20
+ ### Citation
21
+
22
+ ### Groups and Tasks
23
+
24
+ #### Groups
25
+
26
+ * Not part of a group yet.
27
+
28
+ #### Tasks
29
+
30
+ * `piqa_ar`
31
+
32
+ ### Checklist
33
+
34
+ For adding novel benchmarks/datasets to the library:
35
+ * [x] Is the task an existing benchmark in the literature?
36
+ * [x] Have you referenced the original paper that introduced the task?
37
+ * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
38
+
39
+
40
+ If other tasks on this dataset are already supported:
41
+ * [x] Is the "Main" variant of this task clearly denoted?
42
+ * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
43
+ * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
scripts/yans/lm-evaluation-harness/lm_eval/tasks/alghafa/piqa_ar/piqa_ar.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: piqa_ar
2
+ dataset_path: Hennara/pica_ar
3
+ dataset_name: null
4
+ output_type: multiple_choice
5
+ training_split: null
6
+ validation_split: null
7
+ test_split: test
8
+ doc_to_text: "السؤال: {{goal}}\nالجواب:"
9
+ doc_to_choice: "{{[sol1, sol2]}}"
10
+ doc_to_target: label
11
+ should_decontaminate: true
12
+ doc_to_decontamination_query: goal
13
+ metric_list:
14
+ - metric: acc
15
+ aggregation: mean
16
+ higher_is_better: true
17
+ - metric: acc_norm
18
+ aggregation: mean
19
+ higher_is_better: true
20
+ metadata:
21
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CMMLU
2
+
3
+ ### Paper
4
+
5
+ CMMLU: Measuring massive multitask language understanding in Chinese
6
+ https://arxiv.org/abs/2306.09212
7
+
8
+ CMMLU is a comprehensive evaluation benchmark specifically designed to evaluate the knowledge and reasoning abilities of LLMs within the context of Chinese language and culture.
9
+ CMMLU covers a wide range of subjects, comprising 67 topics that span from elementary to advanced professional levels.
10
+
11
+ Homepage: https://github.com/haonan-li/CMMLU
12
+
13
+ ### Citation
14
+
15
+ ```bibtex
16
+ @misc{li2023cmmlu,
17
+ title={CMMLU: Measuring massive multitask language understanding in Chinese},
18
+ author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
19
+ year={2023},
20
+ eprint={2306.09212},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CL}
23
+ }
24
+ ```
25
+
26
+ ### Groups and Tasks
27
+
28
+ #### Groups
29
+
30
+ - `cmmlu`: All 67 subjects of the CMMLU dataset, evaluated following the methodology in MMLU's original implementation.
31
+
32
+ #### Tasks
33
+
34
+
35
+ The following tasks evaluate subjects in the CMMLU dataset using loglikelihood-based multiple-choice scoring:
36
+ - `cmmlu_{subject_english}`
37
+
38
+ ### Checklist
39
+
40
+ * [x] Is the task an existing benchmark in the literature?
41
+ * [x] Have you referenced the original paper that introduced the task?
42
+ * [x] If yes, does the original paper provide a reference implementation?
43
+ * [x] Yes, original implementation contributed by author of the benchmark
44
+
45
+ If other tasks on this dataset are already supported:
46
+ * [x] Is the "Main" variant of this task clearly denoted?
47
+ * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
48
+ * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/_cmmlu.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ group: cmmlu
2
+ task:
3
+ - cmmlu_agronomy
4
+ - cmmlu_anatomy
5
+ - cmmlu_ancient_chinese
6
+ - cmmlu_arts
7
+ - cmmlu_astronomy
8
+ - cmmlu_business_ethics
9
+ - cmmlu_chinese_civil_service_exam
10
+ - cmmlu_chinese_driving_rule
11
+ - cmmlu_chinese_food_culture
12
+ - cmmlu_chinese_foreign_policy
13
+ - cmmlu_chinese_history
14
+ - cmmlu_chinese_literature
15
+ - cmmlu_chinese_teacher_qualification
16
+ - cmmlu_clinical_knowledge
17
+ - cmmlu_college_actuarial_science
18
+ - cmmlu_college_education
19
+ - cmmlu_college_engineering_hydrology
20
+ - cmmlu_college_law
21
+ - cmmlu_college_mathematics
22
+ - cmmlu_college_medical_statistics
23
+ - cmmlu_college_medicine
24
+ - cmmlu_computer_science
25
+ - cmmlu_computer_security
26
+ - cmmlu_conceptual_physics
27
+ - cmmlu_construction_project_management
28
+ - cmmlu_economics
29
+ - cmmlu_education
30
+ - cmmlu_electrical_engineering
31
+ - cmmlu_elementary_chinese
32
+ - cmmlu_elementary_commonsense
33
+ - cmmlu_elementary_information_and_technology
34
+ - cmmlu_elementary_mathematics
35
+ - cmmlu_ethnology
36
+ - cmmlu_food_science
37
+ - cmmlu_genetics
38
+ - cmmlu_global_facts
39
+ - cmmlu_high_school_biology
40
+ - cmmlu_high_school_chemistry
41
+ - cmmlu_high_school_geography
42
+ - cmmlu_high_school_mathematics
43
+ - cmmlu_high_school_physics
44
+ - cmmlu_high_school_politics
45
+ - cmmlu_human_sexuality
46
+ - cmmlu_international_law
47
+ - cmmlu_journalism
48
+ - cmmlu_jurisprudence
49
+ - cmmlu_legal_and_moral_basis
50
+ - cmmlu_logical
51
+ - cmmlu_machine_learning
52
+ - cmmlu_management
53
+ - cmmlu_marketing
54
+ - cmmlu_marxist_theory
55
+ - cmmlu_modern_chinese
56
+ - cmmlu_nutrition
57
+ - cmmlu_philosophy
58
+ - cmmlu_professional_accounting
59
+ - cmmlu_professional_law
60
+ - cmmlu_professional_medicine
61
+ - cmmlu_professional_psychology
62
+ - cmmlu_public_relations
63
+ - cmmlu_security_study
64
+ - cmmlu_sociology
65
+ - cmmlu_sports_science
66
+ - cmmlu_traditional_chinese_medicine
67
+ - cmmlu_virology
68
+ - cmmlu_world_history
69
+ - cmmlu_world_religions
70
+ aggregate_metric_list:
71
+ - aggregation: mean
72
+ metric: acc
73
+ weight_by_size: true
74
+ - aggregation: mean
75
+ metric: acc_norm
76
+ weight_by_size: true
77
+ metadata:
78
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/_default_template_yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_path: haonan-li/cmmlu
2
+ test_split: test
3
+ fewshot_split: dev
4
+ fewshot_config:
5
+ sampler: first_n
6
+ output_type: multiple_choice
7
+ doc_to_text: "{{Question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案:"
8
+ doc_to_choice: ["A", "B", "C", "D"]
9
+ doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer)}}"
10
+ metric_list:
11
+ - metric: acc
12
+ aggregation: mean
13
+ higher_is_better: true
14
+ - metric: acc_norm
15
+ aggregation: mean
16
+ higher_is_better: true
17
+ metadata:
18
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_arts.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "arts"
2
+ "description": "以下是关于艺术学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_arts"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_actuarial_science.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "college_actuarial_science"
2
+ "description": "以下是关于大学精算学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_college_actuarial_science"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_engineering_hydrology.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "college_engineering_hydrology"
2
+ "description": "以下是关于大学工程水文学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_college_engineering_hydrology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_mathematics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "college_mathematics"
2
+ "description": "以下是关于大学数学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_college_mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_college_medical_statistics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "college_medical_statistics"
2
+ "description": "以下是关于大学医学统计的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_college_medical_statistics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_computer_science.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "computer_science"
2
+ "description": "以下是关于计算机科学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_computer_science"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_computer_security.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "computer_security"
2
+ "description": "以下是关于计算机安全的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_computer_security"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_anatomy.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "anatomy"
2
+ "description": "以下是关于解剖学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_anatomy"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_arts.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "arts"
2
+ "description": "以下是关于艺术学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_arts"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_astronomy.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "astronomy"
2
+ "description": "以下是关于天文学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_astronomy"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_chinese_civil_service_exam.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "chinese_civil_service_exam"
2
+ "description": "以下是关于中国公务员考试的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_chinese_civil_service_exam"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_college_education.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "college_education"
2
+ "description": "以下是关于大学教育学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_college_education"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_computer_science.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "computer_science"
2
+ "description": "以下是关于计算机科学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_computer_science"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_computer_security.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "computer_security"
2
+ "description": "以下是关于计算机安全的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_computer_security"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_economics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "economics"
2
+ "description": "以下是关于经济学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_economics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_elementary_chinese.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "elementary_chinese"
2
+ "description": "以下是关于小学语文的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_elementary_chinese"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_elementary_information_and_technology.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "elementary_information_and_technology"
2
+ "description": "以下是关于小学信息技术的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_elementary_information_and_technology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_elementary_mathematics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "elementary_mathematics"
2
+ "description": "以下是关于初等数学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_elementary_mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_ethnology.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "ethnology"
2
+ "description": "以下是关于民族学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_ethnology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_genetics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "genetics"
2
+ "description": "以下是关于遗传学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_genetics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_legal_and_moral_basis.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "legal_and_moral_basis"
2
+ "description": "以下是关于法律与道德基础的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_legal_and_moral_basis"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_marxist_theory.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "marxist_theory"
2
+ "description": "以下是关于马克思主义理论的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_marxist_theory"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_modern_chinese.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "modern_chinese"
2
+ "description": "以下是关于现代汉语的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_modern_chinese"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_philosophy.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "philosophy"
2
+ "description": "以下是关于哲学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_philosophy"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_professional_medicine.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "professional_medicine"
2
+ "description": "以下是关于专业医学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_professional_medicine"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_professional_psychology.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "professional_psychology"
2
+ "description": "以下是关于专业心理学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_professional_psychology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_security_study.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "security_study"
2
+ "description": "以下是关于安全研究的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_security_study"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_sports_science.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "sports_science"
2
+ "description": "以下是关于体育学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_sports_science"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_default_virology.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "virology"
2
+ "description": "以下是关于病毒学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_virology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_education.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "education"
2
+ "description": "以下是关于教育学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_education"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_elementary_chinese.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "elementary_chinese"
2
+ "description": "以下是关于小学语文的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_elementary_chinese"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_elementary_commonsense.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "elementary_commonsense"
2
+ "description": "以下是关于小学常识的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_elementary_commonsense"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_elementary_mathematics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "elementary_mathematics"
2
+ "description": "以下是关于初等数学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_elementary_mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_ethnology.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "ethnology"
2
+ "description": "以下是关于民族学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_ethnology"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_genetics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "genetics"
2
+ "description": "以下是关于遗传学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_genetics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_high_school_mathematics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "high_school_mathematics"
2
+ "description": "以下是关于高中数学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_high_school_mathematics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_high_school_politics.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "high_school_politics"
2
+ "description": "以下是关于高中政治的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_high_school_politics"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_international_law.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "international_law"
2
+ "description": "以下是关于国际法学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_international_law"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_logical.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "logical"
2
+ "description": "以下是关于逻辑学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_logical"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_machine_learning.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "machine_learning"
2
+ "description": "以下是关于机器学习的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_machine_learning"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_marxist_theory.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "marxist_theory"
2
+ "description": "以下是关于马克思主义理论的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_marxist_theory"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_nutrition.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "nutrition"
2
+ "description": "以下是关于营养学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_nutrition"
scripts/yans/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_philosophy.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ "dataset_name": "philosophy"
2
+ "description": "以下是关于哲学的单项选择题,请直接给出正确答案的选项。\n\n"
3
+ "include": "_default_template_yaml"
4
+ "task": "cmmlu_philosophy"