Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml +27 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml +27 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml +3 -0
- scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml +3 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tag:
|
| 2 |
+
- kmmlu
|
| 3 |
+
- kmmlu_direct
|
| 4 |
+
dataset_path: HAERAE-HUB/KMMLU
|
| 5 |
+
output_type: generate_until
|
| 6 |
+
test_split: test
|
| 7 |
+
fewshot_split: dev
|
| 8 |
+
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
|
| 9 |
+
doc_to_target: "{{['A', 'B', 'C', 'D'][answer-1]}}"
|
| 10 |
+
metric_list:
|
| 11 |
+
- metric: exact_match
|
| 12 |
+
aggregation: mean
|
| 13 |
+
higher_is_better: true
|
| 14 |
+
ignore_case: true
|
| 15 |
+
ignore_punctuation: true
|
| 16 |
+
regexes_to_ignore:
|
| 17 |
+
- " "
|
| 18 |
+
generation_kwargs:
|
| 19 |
+
until:
|
| 20 |
+
- "Q:"
|
| 21 |
+
- "\n\n"
|
| 22 |
+
- "</s>"
|
| 23 |
+
- "."
|
| 24 |
+
do_sample: false
|
| 25 |
+
temperature: 0.0
|
| 26 |
+
metadata:
|
| 27 |
+
version: 2.0
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Accounting
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_accounting
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Agricultural-Sciences
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_agricultural_sciences
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Aviation-Engineering-and-Maintenance
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_aviation_engineering_and_maintenance
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Biology
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_biology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Chemistry
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_chemistry
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Civil-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_civil_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Computer-Science
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_computer_science
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Construction
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_construction
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Criminal-Law
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_criminal_law
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Ecology
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_ecology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Economics
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_economics
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Education
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_education
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Electrical-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_electrical_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Electronics-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_electronics_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Energy-Management
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_energy_management
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Environmental-Science
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_environmental_science
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Fashion
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_fashion
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Food-Processing
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_food_processing
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Gas-Technology-and-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_gas_technology_and_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Geomatics
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_geomatics
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Health
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_health
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Industrial-Engineer
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_industrial_engineer
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Information-Technology
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_information_technology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Interior-Architecture-and-Design
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_interior_architecture_and_design
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Korean-History
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_korean_history
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Law
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_law
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Machine-Design-and-Manufacturing
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_machine_design_and_manufacturing
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Management
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_management
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Maritime-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_maritime_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Marketing
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_marketing
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Materials-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_materials_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Math
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_math
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Mechanical-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_mechanical_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Nondestructive-Testing
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_nondestructive_testing
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Patent
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_patent
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Political-Science-and-Sociology
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_political_science_and_sociology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Psychology
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_psychology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Public-Safety
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_public_safety
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Railway-and-Automotive-Engineering
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_railway_and_automotive_engineering
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Real-Estate
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_real_estate
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Refrigerating-Machinery
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_refrigerating_machinery
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Social-Welfare
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_social_welfare
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Taxation
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_taxation
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: Telecommunications-and-Wireless-Technology
|
| 2 |
+
include: _direct_kmmlu_yaml
|
| 3 |
+
task: kmmlu_direct_telecommunications_and_wireless_technology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tag:
|
| 2 |
+
- kmmlu
|
| 3 |
+
- kmmlu_hard_direct
|
| 4 |
+
dataset_path: HAERAE-HUB/KMMLU-HARD
|
| 5 |
+
output_type: generate_until
|
| 6 |
+
test_split: test
|
| 7 |
+
fewshot_split: dev
|
| 8 |
+
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
|
| 9 |
+
doc_to_target: "{{['A', 'B', 'C', 'D'][answer-1]}}"
|
| 10 |
+
metric_list:
|
| 11 |
+
- metric: exact_match
|
| 12 |
+
aggregation: mean
|
| 13 |
+
higher_is_better: true
|
| 14 |
+
ignore_case: true
|
| 15 |
+
ignore_punctuation: true
|
| 16 |
+
regexes_to_ignore:
|
| 17 |
+
- " "
|
| 18 |
+
generation_kwargs:
|
| 19 |
+
until:
|
| 20 |
+
- "Q:"
|
| 21 |
+
- "\n\n"
|
| 22 |
+
- "</s>"
|
| 23 |
+
- "."
|
| 24 |
+
do_sample: false
|
| 25 |
+
temperature: 0.0
|
| 26 |
+
metadata:
|
| 27 |
+
version: 2.0
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: aviation_engineering_and_maintenance
|
| 2 |
+
include: _direct_hard_kmmlu_yaml
|
| 3 |
+
task: kmmlu_hard_direct_aviation_engineering_and_maintenance
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: biology
|
| 2 |
+
include: _direct_hard_kmmlu_yaml
|
| 3 |
+
task: kmmlu_hard_direct_biology
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: chemistry
|
| 2 |
+
include: _direct_hard_kmmlu_yaml
|
| 3 |
+
task: kmmlu_hard_direct_chemistry
|
scripts/yans/lm-evaluation-harness/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_name: civil_engineering
|
| 2 |
+
include: _direct_hard_kmmlu_yaml
|
| 3 |
+
task: kmmlu_hard_direct_civil_engineering
|