| global_mmlu_full_en |
0 |
none |
|
acc |
↑ |
0.6053 |
± |
0.0039 |
| - global_mmlu_full_en_humanities |
0 |
none |
|
acc |
↑ |
0.5515 |
± |
0.0068 |
| - global_mmlu_full_en_formal_logic |
0 |
none |
1 |
acc |
↑ |
0.4365 |
± |
0.0444 |
| - global_mmlu_full_en_high_school_european_history |
0 |
none |
1 |
acc |
↑ |
0.7455 |
± |
0.0340 |
| - global_mmlu_full_en_high_school_us_history |
0 |
none |
1 |
acc |
↑ |
0.8284 |
± |
0.0265 |
| - global_mmlu_full_en_high_school_world_history |
0 |
none |
1 |
acc |
↑ |
0.8059 |
± |
0.0257 |
| - global_mmlu_full_en_international_law |
0 |
none |
1 |
acc |
↑ |
0.7851 |
± |
0.0375 |
| - global_mmlu_full_en_jurisprudence |
0 |
none |
1 |
acc |
↑ |
0.7870 |
± |
0.0396 |
| - global_mmlu_full_en_logical_fallacies |
0 |
none |
1 |
acc |
↑ |
0.7485 |
± |
0.0341 |
| - global_mmlu_full_en_moral_disputes |
0 |
none |
1 |
acc |
↑ |
0.6532 |
± |
0.0256 |
| - global_mmlu_full_en_moral_scenarios |
0 |
none |
1 |
acc |
↑ |
0.3307 |
± |
0.0157 |
| - global_mmlu_full_en_philosophy |
0 |
none |
1 |
acc |
↑ |
0.6785 |
± |
0.0265 |
| - global_mmlu_full_en_prehistory |
0 |
none |
1 |
acc |
↑ |
0.6636 |
± |
0.0263 |
| - global_mmlu_full_en_professional_law |
0 |
none |
1 |
acc |
↑ |
0.4368 |
± |
0.0127 |
| - global_mmlu_full_en_world_religions |
0 |
none |
1 |
acc |
↑ |
0.8012 |
± |
0.0306 |
| - global_mmlu_full_en_other |
0 |
none |
|
acc |
↑ |
0.6707 |
± |
0.0081 |
| - global_mmlu_full_en_business_ethics |
0 |
none |
1 |
acc |
↑ |
0.5700 |
± |
0.0498 |
| - global_mmlu_full_en_clinical_knowledge |
0 |
none |
1 |
acc |
↑ |
0.6717 |
± |
0.0289 |
| - global_mmlu_full_en_college_medicine |
0 |
none |
1 |
acc |
↑ |
0.6301 |
± |
0.0368 |
| - global_mmlu_full_en_global_facts |
0 |
none |
1 |
acc |
↑ |
0.3700 |
± |
0.0485 |
| - global_mmlu_full_en_human_aging |
0 |
none |
1 |
acc |
↑ |
0.7085 |
± |
0.0305 |
| - global_mmlu_full_en_management |
0 |
none |
1 |
acc |
↑ |
0.8058 |
± |
0.0392 |
| - global_mmlu_full_en_marketing |
0 |
none |
1 |
acc |
↑ |
0.8504 |
± |
0.0234 |
| - global_mmlu_full_en_medical_genetics |
0 |
none |
1 |
acc |
↑ |
0.6500 |
± |
0.0479 |
| - global_mmlu_full_en_miscellaneous |
0 |
none |
1 |
acc |
↑ |
0.7918 |
± |
0.0145 |
| - global_mmlu_full_en_nutrition |
0 |
none |
1 |
acc |
↑ |
0.6503 |
± |
0.0273 |
| - global_mmlu_full_en_professional_accounting |
0 |
none |
1 |
acc |
↑ |
0.4468 |
± |
0.0297 |
| - global_mmlu_full_en_professional_medicine |
0 |
none |
1 |
acc |
↑ |
0.6250 |
± |
0.0294 |
| - global_mmlu_full_en_virology |
0 |
none |
1 |
acc |
↑ |
0.5000 |
± |
0.0389 |
| - global_mmlu_full_en_social_sciences |
0 |
none |
|
acc |
↑ |
0.7251 |
± |
0.0078 |
| - global_mmlu_full_en_econometrics |
0 |
none |
1 |
acc |
↑ |
0.4035 |
± |
0.0462 |
| - global_mmlu_full_en_high_school_geography |
0 |
none |
1 |
acc |
↑ |
0.8535 |
± |
0.0252 |
| - global_mmlu_full_en_high_school_government_and_politics |
0 |
none |
1 |
acc |
↑ |
0.8653 |
± |
0.0246 |
| - global_mmlu_full_en_high_school_macroeconomics |
0 |
none |
1 |
acc |
↑ |
0.6462 |
± |
0.0242 |
| - global_mmlu_full_en_high_school_microeconomics |
0 |
none |
1 |
acc |
↑ |
0.6807 |
± |
0.0303 |
| - global_mmlu_full_en_high_school_psychology |
0 |
none |
1 |
acc |
↑ |
0.8330 |
± |
0.0160 |
| - global_mmlu_full_en_human_sexuality |
0 |
none |
1 |
acc |
↑ |
0.6947 |
± |
0.0404 |
| - global_mmlu_full_en_professional_psychology |
0 |
none |
1 |
acc |
↑ |
0.6193 |
± |
0.0196 |
| - global_mmlu_full_en_public_relations |
0 |
none |
1 |
acc |
↑ |
0.7000 |
± |
0.0439 |
| - global_mmlu_full_en_security_studies |
0 |
none |
1 |
acc |
↑ |
0.7755 |
± |
0.0267 |
| - global_mmlu_full_en_sociology |
0 |
none |
1 |
acc |
↑ |
0.8060 |
± |
0.0280 |
| - global_mmlu_full_en_us_foreign_policy |
0 |
none |
1 |
acc |
↑ |
0.8200 |
± |
0.0386 |
| - global_mmlu_full_en_stem |
0 |
none |
|
acc |
↑ |
0.5043 |
± |
0.0084 |
| - global_mmlu_full_en_abstract_algebra |
0 |
none |
1 |
acc |
↑ |
0.3100 |
± |
0.0465 |
| - global_mmlu_full_en_anatomy |
0 |
none |
1 |
acc |
↑ |
0.6148 |
± |
0.0420 |
| - global_mmlu_full_en_astronomy |
0 |
none |
1 |
acc |
↑ |
0.7237 |
± |
0.0364 |
| - global_mmlu_full_en_college_biology |
0 |
none |
1 |
acc |
↑ |
0.7083 |
± |
0.0380 |
| - global_mmlu_full_en_college_chemistry |
0 |
none |
1 |
acc |
↑ |
0.4700 |
± |
0.0502 |
| - global_mmlu_full_en_college_computer_science |
0 |
none |
1 |
acc |
↑ |
0.4900 |
± |
0.0502 |
| - global_mmlu_full_en_college_mathematics |
0 |
none |
1 |
acc |
↑ |
0.3200 |
± |
0.0469 |
| - global_mmlu_full_en_college_physics |
0 |
none |
1 |
acc |
↑ |
0.3529 |
± |
0.0476 |
| - global_mmlu_full_en_computer_security |
0 |
none |
1 |
acc |
↑ |
0.7400 |
± |
0.0441 |
| - global_mmlu_full_en_conceptual_physics |
0 |
none |
1 |
acc |
↑ |
0.5660 |
± |
0.0324 |
| - global_mmlu_full_en_electrical_engineering |
0 |
none |
1 |
acc |
↑ |
0.6000 |
± |
0.0408 |
| - global_mmlu_full_en_elementary_mathematics |
0 |
none |
1 |
acc |
↑ |
0.3968 |
± |
0.0252 |
| - global_mmlu_full_en_high_school_biology |
0 |
none |
1 |
acc |
↑ |
0.7613 |
± |
0.0243 |
| - global_mmlu_full_en_high_school_chemistry |
0 |
none |
1 |
acc |
↑ |
0.4975 |
± |
0.0352 |
| - global_mmlu_full_en_high_school_computer_science |
0 |
none |
1 |
acc |
↑ |
0.6500 |
± |
0.0479 |
| - global_mmlu_full_en_high_school_mathematics |
0 |
none |
1 |
acc |
↑ |
0.2556 |
± |
0.0266 |
| - global_mmlu_full_en_high_school_physics |
0 |
none |
1 |
acc |
↑ |
0.3245 |
± |
0.0382 |
| - global_mmlu_full_en_high_school_statistics |
0 |
none |
1 |
acc |
↑ |
0.4213 |
± |
0.0337 |
| - global_mmlu_full_en_machine_learning |
0 |
none |
1 |
acc |
↑ |
0.4018 |
± |
0.0465 |