Update README.md
Browse files
README.md
CHANGED
|
@@ -85,75 +85,71 @@ Apache 2.0
|
|
| 85 |
- Used EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness)
|
| 86 |
- 5-shot scores
|
| 87 |
|
| 88 |
-
| Tasks |
|
| 89 |
-
|----------------------------------------------------------|-------
|
| 90 |
-
|haerae |
|
| 91 |
-
|
|
| 92 |
-
| -
|
| 93 |
-
|
|
| 94 |
-
| -
|
| 95 |
-
|
|
| 96 |
-
|
|
| 97 |
-
|
|
| 98 |
-
| -
|
| 99 |
-
|
|
| 100 |
-
| -
|
| 101 |
-
|
|
| 102 |
-
|
|
| 103 |
-
| -
|
| 104 |
-
| -
|
| 105 |
-
| -
|
| 106 |
-
| -
|
| 107 |
-
| -
|
| 108 |
-
| -
|
| 109 |
-
| -
|
| 110 |
-
| -
|
| 111 |
-
| -
|
| 112 |
-
| -
|
| 113 |
-
| -
|
| 114 |
-
| -
|
| 115 |
-
| -
|
| 116 |
-
| -
|
| 117 |
-
| -
|
| 118 |
-
| -
|
| 119 |
-
| -
|
| 120 |
-
| -
|
| 121 |
-
| -
|
| 122 |
-
| -
|
| 123 |
-
| -
|
| 124 |
-
| -
|
| 125 |
-
| -
|
| 126 |
-
| -
|
| 127 |
-
| -
|
| 128 |
-
| -
|
| 129 |
-
| -
|
| 130 |
-
| -
|
| 131 |
-
| -
|
| 132 |
-
| -
|
| 133 |
-
| -
|
| 134 |
-
| -
|
| 135 |
-
| -
|
| 136 |
-
| -
|
| 137 |
-
| -
|
| 138 |
-
| -
|
| 139 |
-
| -
|
| 140 |
-
| -
|
| 141 |
-
| -
|
| 142 |
-
|
|
| 143 |
-
|
|
| 144 |
-
|
|
| 145 |
-
|
|
| 146 |
-
|
|
| 147 |
-
|
|
| 148 |
-
|
|
| 149 |
-
|
|
| 150 |
-
|
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
| | |none | 5|f1 |0.5520|± |N/A |
|
| 154 |
-
| | |none | 5|acc_norm |0.6540|± |0.0213|
|
| 155 |
-
|kobest_sentineg | 1|none | 5|acc |0.9824|± |0.0066|
|
| 156 |
-
| | |none | 5|f1 |0.9824|± |N/A |
|
| 157 |
|
| 158 |
## Citation
|
| 159 |
|
|
|
|
| 85 |
- Used EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness)
|
| 86 |
- 5-shot scores
|
| 87 |
|
| 88 |
+
| Tasks | Metric | Value | | Stderr |
|
| 89 |
+
|----------------------------------------------------------|-----------|--------:|---|--------:|
|
| 90 |
+
|haerae |acc_norm | 0.7874 |± | 0.0118 |
|
| 91 |
+
| - haerae_general_knowledge |acc | 0.5000 |± | 0.0378 |
|
| 92 |
+
| - haerae_history |acc | 0.8723 |± | 0.0244 |
|
| 93 |
+
| - haerae_loan_word |acc | 0.8402 |± | 0.0283 |
|
| 94 |
+
| - haerae_rare_word |acc | 0.8346 |± | 0.0185 |
|
| 95 |
+
| - haerae_standard_nomenclature |acc | 0.8301 |± | 0.0305 |
|
| 96 |
+
|kmmlu_direct |exact_match| 0.4205 |± | 0.0026 |
|
| 97 |
+
| - kmmlu_direct_accounting |exact_match| 0.3700 |± | 0.0485 |
|
| 98 |
+
| - kmmlu_direct_agricultural_sciences |exact_match| 0.3140 |± | 0.0147 |
|
| 99 |
+
| - kmmlu_direct_aviation_engineering_and_maintenance |exact_match| 0.3870 |± | 0.0154 |
|
| 100 |
+
| - kmmlu_direct_biology |exact_match| 0.3510 |± | 0.0151 |
|
| 101 |
+
| - kmmlu_direct_chemical_engineering |exact_match| 0.3910 |± | 0.0154 |
|
| 102 |
+
| - kmmlu_direct_chemistry |exact_match| 0.4000 |± | 0.0200 |
|
| 103 |
+
| - kmmlu_direct_civil_engineering |exact_match| 0.4010 |± | 0.0155 |
|
| 104 |
+
| - kmmlu_direct_computer_science |exact_match| 0.6520 |± | 0.0151 |
|
| 105 |
+
| - kmmlu_direct_construction |exact_match| 0.3080 |± | 0.0146 |
|
| 106 |
+
| - kmmlu_direct_criminal_law |exact_match| 0.3100 |± | 0.0328 |
|
| 107 |
+
| - kmmlu_direct_ecology |exact_match| 0.4660 |± | 0.0158 |
|
| 108 |
+
| - kmmlu_direct_economics |exact_match| 0.5385 |± | 0.0439 |
|
| 109 |
+
| - kmmlu_direct_education |exact_match| 0.6200 |± | 0.0488 |
|
| 110 |
+
| - kmmlu_direct_electrical_engineering |exact_match| 0.3000 |± | 0.0145 |
|
| 111 |
+
| - kmmlu_direct_electronics_engineering |exact_match| 0.4740 |± | 0.0158 |
|
| 112 |
+
| - kmmlu_direct_energy_management |exact_match| 0.3560 |± | 0.0151 |
|
| 113 |
+
| - kmmlu_direct_environmental_science |exact_match| 0.2980 |± | 0.0145 |
|
| 114 |
+
| - kmmlu_direct_fashion |exact_match| 0.4470 |± | 0.0157 |
|
| 115 |
+
| - kmmlu_direct_food_processing |exact_match| 0.3690 |± | 0.0153 |
|
| 116 |
+
| - kmmlu_direct_gas_technology_and_engineering |exact_match| 0.3000 |± | 0.0145 |
|
| 117 |
+
| - kmmlu_direct_geomatics |exact_match| 0.3820 |± | 0.0154 |
|
| 118 |
+
| - kmmlu_direct_health |exact_match| 0.5700 |± | 0.0498 |
|
| 119 |
+
| - kmmlu_direct_industrial_engineer |exact_match| 0.3830 |± | 0.0154 |
|
| 120 |
+
| - kmmlu_direct_information_technology |exact_match| 0.6090 |± | 0.0154 |
|
| 121 |
+
| - kmmlu_direct_interior_architecture_and_design |exact_match| 0.5440 |± | 0.0158 |
|
| 122 |
+
| - kmmlu_direct_korean_history |exact_match| 0.3800 |± | 0.0488 |
|
| 123 |
+
| - kmmlu_direct_law |exact_match| 0.4670 |± | 0.0158 |
|
| 124 |
+
| - kmmlu_direct_machine_design_and_manufacturing |exact_match| 0.3960 |± | 0.0155 |
|
| 125 |
+
| - kmmlu_direct_management |exact_match| 0.5030 |± | 0.0158 |
|
| 126 |
+
| - kmmlu_direct_maritime_engineering |exact_match| 0.4283 |± | 0.0202 |
|
| 127 |
+
| - kmmlu_direct_marketing |exact_match| 0.7460 |± | 0.0138 |
|
| 128 |
+
| - kmmlu_direct_materials_engineering |exact_match| 0.4020 |± | 0.0155 |
|
| 129 |
+
| - kmmlu_direct_math |exact_match| 0.2867 |± | 0.0262 |
|
| 130 |
+
| - kmmlu_direct_mechanical_engineering |exact_match| 0.3490 |± | 0.0151 |
|
| 131 |
+
| - kmmlu_direct_nondestructive_testing |exact_match| 0.3760 |± | 0.0153 |
|
| 132 |
+
| - kmmlu_direct_patent |exact_match| 0.3700 |± | 0.0485 |
|
| 133 |
+
| - kmmlu_direct_political_science_and_sociology |exact_match| 0.5300 |± | 0.0289 |
|
| 134 |
+
| - kmmlu_direct_psychology |exact_match| 0.4470 |± | 0.0157 |
|
| 135 |
+
| - kmmlu_direct_public_safety |exact_match| 0.3520 |± | 0.0151 |
|
| 136 |
+
| - kmmlu_direct_railway_and_automotive_engineering |exact_match| 0.3220 |± | 0.0148 |
|
| 137 |
+
| - kmmlu_direct_real_estate |exact_match| 0.4350 |± | 0.0351 |
|
| 138 |
+
| - kmmlu_direct_refrigerating_machinery |exact_match| 0.3240 |± | 0.0148 |
|
| 139 |
+
| - kmmlu_direct_social_welfare |exact_match| 0.4970 |± | 0.0158 |
|
| 140 |
+
| - kmmlu_direct_taxation |exact_match| 0.3800 |± | 0.0344 |
|
| 141 |
+
| - kmmlu_direct_telecommunications_and_wireless_technology|exact_match| 0.5480 |± | 0.0157 |
|
| 142 |
+
|kobest_boolq |acc | 0.9202 |± | 0.0072 |
|
| 143 |
+
| |f1 | 0.9202 |± |N/A |
|
| 144 |
+
|kobest_copa |acc | 0.8680 |± | 0.0107 |
|
| 145 |
+
| |f1 | 0.8678 |± |N/A |
|
| 146 |
+
|kobest_hellaswag |acc | 0.5560 |± | 0.0222 |
|
| 147 |
+
| |f1 | 0.5520 |± |N/A |
|
| 148 |
+
| |acc_norm | 0.6540 |± | 0.0213 |
|
| 149 |
+
|kobest_sentineg |acc | 0.9824 |± | 0.0066 |
|
| 150 |
+
| |f1 | 0.9824 |± |N/A |
|
| 151 |
+
|
| 152 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
## Citation
|
| 155 |
|