koichi12 commited on
Commit
b3ca754
·
verified ·
1 Parent(s): ad7a64c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml +27 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml +3 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml +3 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml +3 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml +3 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml +3 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml +3 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml +3 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml +3 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml +3 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml +3 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml +3 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml +3 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml +3 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml +3 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml +3 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml +3 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml +3 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml +3 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml +3 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml +3 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml +3 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml +3 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml +3 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml +3 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml +3 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml +3 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml +3 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml +3 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml +3 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml +3 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml +3 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml +3 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml +3 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml +3 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml +3 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml +3 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml +3 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml +3 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml +3 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml +3 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml +3 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml +3 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml +3 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml +3 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml +27 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml +3 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml +3 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml +3 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tag:
2
+ - kmmlu
3
+ - kmmlu_direct
4
+ dataset_path: HAERAE-HUB/KMMLU
5
+ output_type: generate_until
6
+ test_split: test
7
+ fewshot_split: dev
8
+ doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
9
+ doc_to_target: "{{['A', 'B', 'C', 'D'][answer-1]}}"
10
+ metric_list:
11
+ - metric: exact_match
12
+ aggregation: mean
13
+ higher_is_better: true
14
+ ignore_case: true
15
+ ignore_punctuation: true
16
+ regexes_to_ignore:
17
+ - " "
18
+ generation_kwargs:
19
+ until:
20
+ - "Q:"
21
+ - "\n\n"
22
+ - "</s>"
23
+ - "."
24
+ do_sample: false
25
+ temperature: 0.0
26
+ metadata:
27
+ version: 2.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Accounting
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_accounting
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Agricultural-Sciences
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_agricultural_sciences
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Aviation-Engineering-and-Maintenance
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_aviation_engineering_and_maintenance
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Biology
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_biology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Chemistry
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_chemistry
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Civil-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_civil_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Computer-Science
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_computer_science
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Construction
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_construction
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Criminal-Law
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_criminal_law
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Ecology
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_ecology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Economics
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_economics
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Education
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_education
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Electrical-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_electrical_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Electronics-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_electronics_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Energy-Management
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_energy_management
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Environmental-Science
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_environmental_science
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Fashion
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_fashion
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Food-Processing
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_food_processing
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Gas-Technology-and-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_gas_technology_and_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Geomatics
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_geomatics
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Health
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_health
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Industrial-Engineer
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_industrial_engineer
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Information-Technology
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_information_technology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Interior-Architecture-and-Design
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_interior_architecture_and_design
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Korean-History
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_korean_history
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Law
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_law
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Machine-Design-and-Manufacturing
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_machine_design_and_manufacturing
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Management
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_management
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Maritime-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_maritime_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Marketing
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_marketing
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Materials-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_materials_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Math
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_math
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Mechanical-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_mechanical_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Nondestructive-Testing
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_nondestructive_testing
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Patent
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_patent
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Political-Science-and-Sociology
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_political_science_and_sociology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Psychology
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_psychology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Public-Safety
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_public_safety
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Railway-and-Automotive-Engineering
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_railway_and_automotive_engineering
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Real-Estate
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_real_estate
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Refrigerating-Machinery
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_refrigerating_machinery
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Social-Welfare
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_social_welfare
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Taxation
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_taxation
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: Telecommunications-and-Wireless-Technology
2
+ include: _direct_kmmlu_yaml
3
+ task: kmmlu_direct_telecommunications_and_wireless_technology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tag:
2
+ - kmmlu
3
+ - kmmlu_hard_direct
4
+ dataset_path: HAERAE-HUB/KMMLU-HARD
5
+ output_type: generate_until
6
+ test_split: test
7
+ fewshot_split: dev
8
+ doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
9
+ doc_to_target: "{{['A', 'B', 'C', 'D'][answer-1]}}"
10
+ metric_list:
11
+ - metric: exact_match
12
+ aggregation: mean
13
+ higher_is_better: true
14
+ ignore_case: true
15
+ ignore_punctuation: true
16
+ regexes_to_ignore:
17
+ - " "
18
+ generation_kwargs:
19
+ until:
20
+ - "Q:"
21
+ - "\n\n"
22
+ - "</s>"
23
+ - "."
24
+ do_sample: false
25
+ temperature: 0.0
26
+ metadata:
27
+ version: 2.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: aviation_engineering_and_maintenance
2
+ include: _direct_hard_kmmlu_yaml
3
+ task: kmmlu_hard_direct_aviation_engineering_and_maintenance
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: biology
2
+ include: _direct_hard_kmmlu_yaml
3
+ task: kmmlu_hard_direct_biology
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: chemistry
2
+ include: _direct_hard_kmmlu_yaml
3
+ task: kmmlu_hard_direct_chemistry
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: civil_engineering
2
+ include: _direct_hard_kmmlu_yaml
3
+ task: kmmlu_hard_direct_civil_engineering