diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-25-38.029337_lambada_openai.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-25-38.029337_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..bec3eab0a11526954028ad4a69f8ebce9083c47e --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-25-38.029337_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 22.798507649915244, + "perplexity_stderr,none": 0.7375150682316036, + "acc,none": 0.38404812730448284, + "acc_stderr,none": 0.006776076316867708 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-40-26.868929_hellaswag.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-40-26.868929_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..469275802cd1519f8d410ce6be49fcd82f43aa10 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-40-26.868929_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.29197371041625175, + "acc_stderr,none": 0.004537410615572915, + "acc_norm,none": 0.3192591117307309, + "acc_norm_stderr,none": 0.004652368273845513 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-41-59.115770_piqa.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-41-59.115770_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..12d2d21bb4623b932ef60eb8567c27a8980c5cd3 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-41-59.115770_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.6137105549510338, + "acc_stderr,none": 0.011360138833823677, + "acc_norm,none": 0.6186071817192601, + "acc_norm_stderr,none": 0.011332850406528682 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-45-05.505108_arc_easy.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-45-05.505108_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..8bed090f512767b9784a478511b8eae2b0214552 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-45-05.505108_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.44234006734006737, + "acc_stderr,none": 0.010191334444220846, + "acc_norm,none": 0.3939393939393939, + "acc_norm_stderr,none": 0.010026305355981804 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-46-42.690976_arc_challenge.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-46-42.690976_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..6967e1f1b1ff36d45fe9ad2c709c416b187a3a2f --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-46-42.690976_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.19880546075085323, + "acc_stderr,none": 0.011662850198175536, + "acc_norm,none": 0.22781569965870307, + "acc_norm_stderr,none": 0.012256708602326919 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-56-38.085921_glue.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-56-38.085921_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..4d65f52ae361403476094b1f9b2cf9cfec5cbdd7 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-56-38.085921_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.05873054109498616, + "mcc_stderr,none": 0.03406044652133965 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.3186958736627611, + "acc_stderr,none": 0.004703657632807151 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.32414564686737185, + "acc_stderr,none": 0.00472060656953387 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.6838235294117647, + "acc_stderr,none": 0.023048336668420193, + "f1,none": 0.8122270742358079, + "f1_stderr,none": 0.01642422915504594 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.5024711696869851, + "acc_stderr,none": 0.006765327922882504 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.37380657927281724, + "acc_stderr,none": 0.002406197221605358, + "f1,none": 0.5285562652464573, + "f1_stderr,none": 0.0025938199201718647 + }, + "rte": { + "alias": "rte", + "acc,none": 0.5018050541516246, + "acc_stderr,none": 0.030096267148976626 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.5091743119266054, + "acc_stderr,none": 0.016939001525351532 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.4647887323943662, + "acc_stderr,none": 0.05961305784972239 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-57-53.763827_winogrande.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-57-53.763827_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..44ac959b71d2c634e12a9713d759a6205b3ed0aa --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-57-53.763827_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.5098658247829518, + "acc_stderr,none": 0.014049749833367589 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-59-54.095066_sciq.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-59-54.095066_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..7c42d3dbbca75f7f95739ceabb1f2ebba5a025c3 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-59-54.095066_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.756, + "acc_stderr,none": 0.013588548437881431, + "acc_norm,none": 0.672, + "acc_norm_stderr,none": 0.014853842487270334 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-22-58.345845_mmlu.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-22-58.345845_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..492afdd9b751379fbbf530d73acdd7bbcc13c278 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-22-58.345845_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.2315909414613303, + "acc_stderr,none": 0.0035530263924027183, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2452709883103082, + "acc_stderr,none": 0.006271012587014002, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.31746031746031744, + "acc_stderr,none": 0.04163453031302859 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.23030303030303031, + "acc_stderr,none": 0.0328766675860349 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.27450980392156865, + "acc_stderr,none": 0.03132179803083292 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.270042194092827, + "acc_stderr,none": 0.028900721906293426 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2396694214876033, + "acc_stderr,none": 0.03896878985070417 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.25, + "acc_stderr,none": 0.04186091791394607 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.22085889570552147, + "acc_stderr,none": 0.032591773927421776 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.24855491329479767, + "acc_stderr,none": 0.023267528432100174 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.23798882681564246, + "acc_stderr,none": 0.014242630070574885 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.19614147909967847, + "acc_stderr,none": 0.022552447780478026 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.023132376234543325 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2470664928292047, + "acc_stderr,none": 0.011015752255279338 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.30994152046783624, + "acc_stderr,none": 0.03546976959393161 + }, + "mmlu_other": { + "acc,none": 0.24589636305117477, + "acc_stderr,none": 0.0077088095866843925, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.20754716981132076, + "acc_stderr,none": 0.02495991802891127 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.24277456647398843, + "acc_stderr,none": 0.0326926380614177 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.19, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.31390134529147984, + "acc_stderr,none": 0.03114679648297246 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.1650485436893204, + "acc_stderr,none": 0.036756688322331886 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2905982905982906, + "acc_stderr,none": 0.029745048572674064 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.33, + "acc_stderr,none": 0.04725815626252604 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.2388250319284802, + "acc_stderr,none": 0.015246803197398682 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.238562091503268, + "acc_stderr,none": 0.02440439492808787 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.24822695035460993, + "acc_stderr,none": 0.025770015644290406 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.1948529411764706, + "acc_stderr,none": 0.024060599423487424 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.28313253012048195, + "acc_stderr,none": 0.03507295431370518 + }, + "mmlu_social_sciences": { + "acc,none": 0.2183945401364966, + "acc_stderr,none": 0.007444613745005196, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.22807017543859648, + "acc_stderr,none": 0.03947152782669415 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.17676767676767677, + "acc_stderr,none": 0.027178752639044915 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.20207253886010362, + "acc_stderr,none": 0.02897908979429673 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2076923076923077, + "acc_stderr,none": 0.02056753956724681 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2184873949579832, + "acc_stderr,none": 0.026841514322958938 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.2, + "acc_stderr,none": 0.017149858514250958 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2595419847328244, + "acc_stderr,none": 0.0384487613978527 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.2549019607843137, + "acc_stderr,none": 0.017630827375148383 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.19090909090909092, + "acc_stderr,none": 0.03764425585984924 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.17959183673469387, + "acc_stderr,none": 0.024573293589585637 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.23383084577114427, + "acc_stderr,none": 0.0299294154083484 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.28, + "acc_stderr,none": 0.045126085985421276 + }, + "mmlu_stem": { + "acc,none": 0.20995876942594355, + "acc_stderr,none": 0.007233045009100066, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.18518518518518517, + "acc_stderr,none": 0.03355677216313142 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.18421052631578946, + "acc_stderr,none": 0.0315469804508223 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.24305555555555555, + "acc_stderr,none": 0.03586879280080342 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.14, + "acc_stderr,none": 0.034873508801977725 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.22, + "acc_stderr,none": 0.0416333199893227 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.21568627450980393, + "acc_stderr,none": 0.04092563958237655 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.26, + "acc_stderr,none": 0.044084400227680794 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.26382978723404255, + "acc_stderr,none": 0.02880998985410298 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.23448275862068965, + "acc_stderr,none": 0.035306258743465914 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.21693121693121692, + "acc_stderr,none": 0.021227082449445045 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.17096774193548386, + "acc_stderr,none": 0.02141724293632157 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.1625615763546798, + "acc_stderr,none": 0.02596030006460558 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.2111111111111111, + "acc_stderr,none": 0.024882116857655078 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.1986754966887417, + "acc_stderr,none": 0.032578473844367746 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.12962962962962962, + "acc_stderr,none": 0.022907883151288597 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.29464285714285715, + "acc_stderr,none": 0.04327040932578728 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-33-26.008450_lambada_multilingual.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-33-26.008450_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..496ac9c560b43867b4b9f2031f254b644391434f --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-33-26.008450_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 438.1123448025884, + "perplexity_stderr,none": 27.631396537461868, + "acc,none": 0.17970114496409859, + "acc_stderr,none": 0.005349011697308402 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 21.873502960243776, + "perplexity_stderr,none": 0.7063940196221459, + "acc,none": 0.39239278090432755, + "acc_stderr,none": 0.006802742619162039 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 520.7083247325162, + "perplexity_stderr,none": 31.60312563024137, + "acc,none": 0.17581991073161266, + "acc_stderr,none": 0.005303433892159959 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 315.69320350745926, + "perplexity_stderr,none": 19.029382127916357, + "acc,none": 0.21075101882398603, + "acc_stderr,none": 0.005682035322435383 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 753.7215739042271, + "perplexity_stderr,none": 49.15367277380386, + "acc,none": 0.16475839316902774, + "acc_stderr,none": 0.005168230882239359 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T09-56-09.522548_pawsx.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T09-56-09.522548_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..5c2031a39e1d8453d55b7d8e5d479f5e5e65621b --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T09-56-09.522548_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.48514285714285715, + "acc_stderr,none": 0.0042164415074101135, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.4985, + "acc_stderr,none": 0.011183085696839198 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.5015, + "acc_stderr,none": 0.011183085696839203 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.532, + "acc_stderr,none": 0.011160209457602887 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.4715, + "acc_stderr,none": 0.0111649542364288 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4395, + "acc_stderr,none": 0.011100968009384213 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.445, + "acc_stderr,none": 0.011115272135099217 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.508, + "acc_stderr,none": 0.011181704488030004 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T10-00-53.579856_xcopa.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T10-00-53.579856_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..924bfcc851bf9358992367459b8eb59aa2447ddd --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T10-00-53.579856_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5310909090909091, + "acc_stderr,none": 0.006728483885990303, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.504, + "acc_stderr,none": 0.022382357781962132 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.51, + "acc_stderr,none": 0.02237859698923078 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.544, + "acc_stderr,none": 0.022296238348407056 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.514, + "acc_stderr,none": 0.022374298166353185 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.502, + "acc_stderr,none": 0.02238289498648352 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.544, + "acc_stderr,none": 0.022296238348407056 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.53, + "acc_stderr,none": 0.022342748192502846 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.578, + "acc_stderr,none": 0.022109039310618556 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.562, + "acc_stderr,none": 0.022210326363977417 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.53, + "acc_stderr,none": 0.022342748192502843 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.524, + "acc_stderr,none": 0.0223572738810164 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-13-06.581208_xnli.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-13-06.581208_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..ec53ab8952c0cb0c618bd0886b38e7752704bb87 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-13-06.581208_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.3610709504685408, + "acc_stderr,none": 0.002478789488090545, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3433734939759036, + "acc_stderr,none": 0.009517658993060705 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.3457831325301205, + "acc_stderr,none": 0.009533455033752768 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.39718875502008033, + "acc_stderr,none": 0.009807915070677289 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3353413654618474, + "acc_stderr,none": 0.009463034891512703 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.46224899598393576, + "acc_stderr,none": 0.009993466360872784 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.344578313253012, + "acc_stderr,none": 0.009525590900110655 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.41044176706827307, + "acc_stderr,none": 0.00985999467258512 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.348995983935743, + "acc_stderr,none": 0.009554095988300674 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.3795180722891566, + "acc_stderr,none": 0.009726763372837137 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3285140562248996, + "acc_stderr,none": 0.009414190734131762 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.3285140562248996, + "acc_stderr,none": 0.009414190734131762 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.35903614457831323, + "acc_stderr,none": 0.00961553399241459 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.009448900914617614 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.36305220883534134, + "acc_stderr,none": 0.00963882313398499 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.336144578313253, + "acc_stderr,none": 0.00946863466929353 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-52-46.021580_xstorycloze.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-52-46.021580_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..3db9c405f174b848814879a3cb6b397a953d6a4d --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-52-46.021580_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5168762409000662, + "acc_stderr,none": 0.0038693051060829046, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.47253474520185307, + "acc_stderr,none": 0.012847698270388213 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6029119788219722, + "acc_stderr,none": 0.012591627740247462 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5241561879549967, + "acc_stderr,none": 0.01285210005730961 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5168762409000662, + "acc_stderr,none": 0.012859793919977602 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.514890800794176, + "acc_stderr,none": 0.012861417842074004 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.500992720052945, + "acc_stderr,none": 0.012867099955422933 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.49503639973527463, + "acc_stderr,none": 0.012866491277589943 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.4943745863666446, + "acc_stderr,none": 0.01286631092307252 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.5056254136333554, + "acc_stderr,none": 0.012866310923072511 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5420251489080079, + "acc_stderr,none": 0.012821595164245275 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5162144275314361, + "acc_stderr,none": 0.012860357805055855 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-55-53.206548_xwinograd.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-55-53.206548_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..fde8ac3e31d698b336bdb35cea0c88fbe2c3350f --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-55-53.206548_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.5927174645987863, + "acc_stderr,none": 0.007316470702661161, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.6468817204301075, + "acc_stderr,none": 0.009914126992783658 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.5421686746987951, + "acc_stderr,none": 0.05501904358494246 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5078206465067779, + "acc_stderr,none": 0.01615229055184455 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.5285171102661597, + "acc_stderr,none": 0.030839820992717426 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.546031746031746, + "acc_stderr,none": 0.028096800277810533 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.5753968253968254, + "acc_stderr,none": 0.022038973193044563 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T12-17-19.970767_mmlu.json b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T12-17-19.970767_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..5ebe9338b7fa81159a506d5a66d21360460c2c73 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T12-17-19.970767_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.2315909414613303, + "acc_stderr,none": 0.0035530263924027183, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2452709883103082, + "acc_stderr,none": 0.006271012587014002, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.31746031746031744, + "acc_stderr,none": 0.04163453031302859 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.23030303030303031, + "acc_stderr,none": 0.0328766675860349 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.27450980392156865, + "acc_stderr,none": 0.03132179803083292 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.270042194092827, + "acc_stderr,none": 0.028900721906293426 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2396694214876033, + "acc_stderr,none": 0.03896878985070417 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.25, + "acc_stderr,none": 0.04186091791394607 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.22085889570552147, + "acc_stderr,none": 0.032591773927421776 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.24855491329479767, + "acc_stderr,none": 0.023267528432100174 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.23798882681564246, + "acc_stderr,none": 0.014242630070574885 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.19614147909967847, + "acc_stderr,none": 0.022552447780478026 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.023132376234543325 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2470664928292047, + "acc_stderr,none": 0.011015752255279338 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.30994152046783624, + "acc_stderr,none": 0.03546976959393161 + }, + "mmlu_other": { + "acc,none": 0.24589636305117477, + "acc_stderr,none": 0.0077088095866843925, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.20754716981132076, + "acc_stderr,none": 0.02495991802891127 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.24277456647398843, + "acc_stderr,none": 0.0326926380614177 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.19, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.31390134529147984, + "acc_stderr,none": 0.03114679648297246 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.1650485436893204, + "acc_stderr,none": 0.036756688322331886 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2905982905982906, + "acc_stderr,none": 0.029745048572674064 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.33, + "acc_stderr,none": 0.04725815626252604 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.2388250319284802, + "acc_stderr,none": 0.015246803197398682 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.238562091503268, + "acc_stderr,none": 0.02440439492808787 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.24822695035460993, + "acc_stderr,none": 0.025770015644290406 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.1948529411764706, + "acc_stderr,none": 0.024060599423487424 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.28313253012048195, + "acc_stderr,none": 0.03507295431370518 + }, + "mmlu_social_sciences": { + "acc,none": 0.2183945401364966, + "acc_stderr,none": 0.007444613745005196, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.22807017543859648, + "acc_stderr,none": 0.03947152782669415 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.17676767676767677, + "acc_stderr,none": 0.027178752639044915 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.20207253886010362, + "acc_stderr,none": 0.02897908979429673 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2076923076923077, + "acc_stderr,none": 0.02056753956724681 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2184873949579832, + "acc_stderr,none": 0.026841514322958938 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.2, + "acc_stderr,none": 0.017149858514250958 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2595419847328244, + "acc_stderr,none": 0.0384487613978527 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.2549019607843137, + "acc_stderr,none": 0.017630827375148383 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.19090909090909092, + "acc_stderr,none": 0.03764425585984924 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.17959183673469387, + "acc_stderr,none": 0.024573293589585637 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.23383084577114427, + "acc_stderr,none": 0.0299294154083484 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.28, + "acc_stderr,none": 0.045126085985421276 + }, + "mmlu_stem": { + "acc,none": 0.20995876942594355, + "acc_stderr,none": 0.007233045009100066, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.18518518518518517, + "acc_stderr,none": 0.03355677216313142 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.18421052631578946, + "acc_stderr,none": 0.0315469804508223 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.24305555555555555, + "acc_stderr,none": 0.03586879280080342 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.14, + "acc_stderr,none": 0.034873508801977725 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.22, + "acc_stderr,none": 0.0416333199893227 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.21568627450980393, + "acc_stderr,none": 0.04092563958237655 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.26, + "acc_stderr,none": 0.044084400227680794 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.26382978723404255, + "acc_stderr,none": 0.02880998985410298 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.23448275862068965, + "acc_stderr,none": 0.035306258743465914 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.21693121693121692, + "acc_stderr,none": 0.021227082449445045 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.17096774193548386, + "acc_stderr,none": 0.02141724293632157 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.1625615763546798, + "acc_stderr,none": 0.02596030006460558 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.2111111111111111, + "acc_stderr,none": 0.024882116857655078 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.1986754966887417, + "acc_stderr,none": 0.032578473844367746 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.12962962962962962, + "acc_stderr,none": 0.022907883151288597 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.29464285714285715, + "acc_stderr,none": 0.04327040932578728 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-14-42.123700_lambada_openai.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-14-42.123700_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..724c592cdc60ac2c760a79a02d26e1f7b2805693 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-14-42.123700_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 8.874438771398895, + "perplexity_stderr,none": 0.25110180223575285, + "acc,none": 0.5402678051620415, + "acc_stderr,none": 0.0069433502956647445 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-39-58.725375_hellaswag.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-39-58.725375_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..c06f98b260294910792a412c2bda3539bb6c7cd0 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-39-58.725375_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.34176458872734516, + "acc_stderr,none": 0.004733317847006545, + "acc_norm,none": 0.40928101971718783, + "acc_norm_stderr,none": 0.004906962980328275 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-42-10.675354_piqa.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-42-10.675354_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..643960d030c084eb430d0d0997727d82c40cb319 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-42-10.675354_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.6653971708378672, + "acc_stderr,none": 0.011009071725162505, + "acc_norm,none": 0.6724700761697497, + "acc_norm_stderr,none": 0.010949830482825478 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-46-46.674313_arc_easy.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-46-46.674313_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..c4da6a1346a8dffd1fed11513cc189ee570326bd --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-46-46.674313_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.5395622895622896, + "acc_stderr,none": 0.010227616386289006, + "acc_norm,none": 0.4877946127946128, + "acc_norm_stderr,none": 0.010256726235129009 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-49-11.748710_arc_challenge.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-49-11.748710_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..e282433d79085a41ca443c47dbc5f803952f43b0 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-49-11.748710_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.23976109215017063, + "acc_stderr,none": 0.012476304127453932, + "acc_norm,none": 0.2627986348122867, + "acc_norm_stderr,none": 0.012862523175351331 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-08-18.050596_glue.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-08-18.050596_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..34bc558ce08d020f734f99bfa0bd58453f9d5c1e --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-08-18.050596_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.0024763761493444563, + "mcc_stderr,none": 0.030693706731630372 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.34212939378502294, + "acc_stderr,none": 0.004788973218637058 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.34662327095199347, + "acc_stderr,none": 0.004799675113044456 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.6838235294117647, + "acc_stderr,none": 0.023048336668420193, + "f1,none": 0.8122270742358079, + "f1_stderr,none": 0.01642422915504585 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.5066813106351822, + "acc_stderr,none": 0.006764806510150313 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.610907741775909, + "acc_stderr,none": 0.0024247535821838137, + "f1,none": 0.057628946264901455, + "f1_stderr,none": 0.002516531840072046 + }, + "rte": { + "alias": "rte", + "acc,none": 0.5270758122743683, + "acc_stderr,none": 0.030052303463143706 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.5149082568807339, + "acc_stderr,none": 0.0169343211533256 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.4647887323943662, + "acc_stderr,none": 0.0596130578497224 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-09-43.473147_winogrande.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-09-43.473147_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..db18805336b0689346cdda8a0f059f490a1f8862 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-09-43.473147_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.531965272296764, + "acc_stderr,none": 0.01402373922116638 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-12-44.780584_sciq.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-12-44.780584_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..031e4c56a949023670b5afd2029cf649d7d280d1 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-12-44.780584_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.869, + "acc_stderr,none": 0.010674874844837957, + "acc_norm,none": 0.809, + "acc_norm_stderr,none": 0.012436787112179517 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-52-46.693108_mmlu.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-52-46.693108_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..136f968d9d9d7be5de9761dd6e53fe812c3335f0 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-52-46.693108_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.23764421022646345, + "acc_stderr,none": 0.003587790530224594, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.24782146652497344, + "acc_stderr,none": 0.0062902893350153324, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3253968253968254, + "acc_stderr,none": 0.041905964388711366 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.22424242424242424, + "acc_stderr,none": 0.03256866661681102 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.2107843137254902, + "acc_stderr,none": 0.028626547912437388 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.270042194092827, + "acc_stderr,none": 0.028900721906293433 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2396694214876033, + "acc_stderr,none": 0.038968789850704164 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.32407407407407407, + "acc_stderr,none": 0.04524596007030048 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.20245398773006135, + "acc_stderr,none": 0.03157065078911902 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.2398843930635838, + "acc_stderr,none": 0.022989592543123567 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.25027932960893856, + "acc_stderr,none": 0.014487500852850412 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.19935691318327975, + "acc_stderr,none": 0.022691033780549656 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.23148148148148148, + "acc_stderr,none": 0.023468429832451152 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.25358539765319427, + "acc_stderr,none": 0.01111171533610114 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.2982456140350877, + "acc_stderr,none": 0.035087719298245654 + }, + "mmlu_other": { + "acc,none": 0.24460894753781784, + "acc_stderr,none": 0.007691719275922855, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.31, + "acc_stderr,none": 0.04648231987117316 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.19245283018867926, + "acc_stderr,none": 0.02426297983937226 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2023121387283237, + "acc_stderr,none": 0.03063114553919882 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.3273542600896861, + "acc_stderr,none": 0.03149384670994132 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.20388349514563106, + "acc_stderr,none": 0.03989139859531771 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2905982905982906, + "acc_stderr,none": 0.029745048572674064 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.29, + "acc_stderr,none": 0.04560480215720684 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.23243933588761176, + "acc_stderr,none": 0.015104550008905704 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.25163398692810457, + "acc_stderr,none": 0.024848018263875195 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.2695035460992908, + "acc_stderr,none": 0.026469036818590627 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.1801470588235294, + "acc_stderr,none": 0.02334516361654486 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.26506024096385544, + "acc_stderr,none": 0.03436024037944967 + }, + "mmlu_social_sciences": { + "acc,none": 0.22099447513812154, + "acc_stderr,none": 0.007486677746029817, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.21929824561403508, + "acc_stderr,none": 0.0389243110651875 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.19696969696969696, + "acc_stderr,none": 0.028335609732463355 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.23316062176165803, + "acc_stderr,none": 0.030516111371476008 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2, + "acc_stderr,none": 0.020280805062535726 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.20168067226890757, + "acc_stderr,none": 0.026064313406304534 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.21467889908256882, + "acc_stderr,none": 0.01760430414925649 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.22900763358778625, + "acc_stderr,none": 0.036853466317118506 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.24673202614379086, + "acc_stderr,none": 0.0174408203674025 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072775 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.21224489795918366, + "acc_stderr,none": 0.026176967197866767 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.21393034825870647, + "acc_stderr,none": 0.0289969096933289 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.27, + "acc_stderr,none": 0.0446196043338474 + }, + "mmlu_stem": { + "acc,none": 0.23184268950206152, + "acc_stderr,none": 0.00750773023393252, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.23703703703703705, + "acc_stderr,none": 0.03673731683969506 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.21052631578947367, + "acc_stderr,none": 0.033176727875331574 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2916666666666667, + "acc_stderr,none": 0.03800968060554859 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.21, + "acc_stderr,none": 0.040936018074033256 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909284 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.21568627450980393, + "acc_stderr,none": 0.04092563958237655 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.28936170212765955, + "acc_stderr,none": 0.02964400657700962 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.25517241379310346, + "acc_stderr,none": 0.03632984052707842 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.21693121693121692, + "acc_stderr,none": 0.02122708244944506 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.1935483870967742, + "acc_stderr,none": 0.022475258525536057 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.1921182266009852, + "acc_stderr,none": 0.027719315709614778 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.26296296296296295, + "acc_stderr,none": 0.026842057873833706 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2251655629139073, + "acc_stderr,none": 0.03410435282008937 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.16203703703703703, + "acc_stderr,none": 0.02513045365226846 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.2857142857142857, + "acc_stderr,none": 0.04287858751340456 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-10-04.238325_lambada_multilingual.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-10-04.238325_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..48fc6e985a5ee96af19e2b43e98fb9e7ee04ea5c --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-10-04.238325_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 122.582684053421, + "perplexity_stderr,none": 7.611410508434413, + "acc,none": 0.2905103823015719, + "acc_stderr,none": 0.006325078561068121 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 8.874438778769504, + "perplexity_stderr,none": 0.2511018027503425, + "acc,none": 0.5402678051620415, + "acc_stderr,none": 0.0069433502956647445 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 134.88945576369062, + "perplexity_stderr,none": 7.962805036133316, + "acc,none": 0.3062293809431399, + "acc_stderr,none": 0.006421603000130909 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 73.64587512871607, + "perplexity_stderr,none": 4.301058908781159, + "acc,none": 0.3685231903745391, + "acc_stderr,none": 0.006720834282814074 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 113.04863122926666, + "perplexity_stderr,none": 7.0611616362975695, + "acc,none": 0.33630894624490587, + "acc_stderr,none": 0.006582096796438631 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-31-49.237829_pawsx.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-31-49.237829_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..720b5e42d6a3755a0ded4e04f5fa9a827bddb883 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-31-49.237829_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.4947857142857143, + "acc_stderr,none": 0.0042167946968630675, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.545, + "acc_stderr,none": 0.011137752231145222 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.533, + "acc_stderr,none": 0.011158752568250671 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.515, + "acc_stderr,none": 0.011178102477052804 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.463, + "acc_stderr,none": 0.011152474561478177 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4795, + "acc_stderr,none": 0.011173732641806813 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.447, + "acc_stderr,none": 0.011120131683767735 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.481, + "acc_stderr,none": 0.01117505887995606 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-37-40.081757_xcopa.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-37-40.081757_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..5dfeb80f178ed9c22f5269543581e7ae67e0c7ef --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-37-40.081757_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5401818181818182, + "acc_stderr,none": 0.006718472971138079, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.506, + "acc_stderr,none": 0.022381462412439324 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.522, + "acc_stderr,none": 0.02236139673920788 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.57, + "acc_stderr,none": 0.022162634426652835 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.548, + "acc_stderr,none": 0.02227969410784342 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.51, + "acc_stderr,none": 0.02237859698923078 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.55, + "acc_stderr,none": 0.022270877485360437 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.512, + "acc_stderr,none": 0.02237662679792717 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.534, + "acc_stderr,none": 0.02233126442325838 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.538, + "acc_stderr,none": 0.02231833811987053 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.576, + "acc_stderr,none": 0.022122993778135404 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.576, + "acc_stderr,none": 0.022122993778135404 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-11-48.808152_xnli.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-11-48.808152_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..dbce0a087973b1e7930329f02016083ffed41a7a --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-11-48.808152_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.3847657295850067, + "acc_stderr,none": 0.0025079690852275174, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.009448900914617616 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.37028112449799194, + "acc_stderr,none": 0.009678915409840288 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.43012048192771085, + "acc_stderr,none": 0.00992371167540806 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3582329317269076, + "acc_stderr,none": 0.009610788482973929 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4943775100401606, + "acc_stderr,none": 0.010021439203777294 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.39759036144578314, + "acc_stderr,none": 0.009809602996075804 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.42248995983935744, + "acc_stderr,none": 0.009900919227857794 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3650602409638554, + "acc_stderr,none": 0.009650194822749635 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.43775100401606426, + "acc_stderr,none": 0.009944099734290161 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3409638554216867, + "acc_stderr,none": 0.009501591178361544 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.348995983935743, + "acc_stderr,none": 0.009554095988300678 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.3718875502008032, + "acc_stderr,none": 0.009687507958631799 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3345381526104418, + "acc_stderr,none": 0.009457404390939166 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.38393574297188754, + "acc_stderr,none": 0.009748321202534375 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3819277108433735, + "acc_stderr,none": 0.00973862791451752 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-42-29.131682_xstorycloze.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-42-29.131682_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..09a159d965e27155500dafdb18ea899e3fc22c0f --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-42-29.131682_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5407616870224415, + "acc_stderr,none": 0.003849468335294993, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.48974189278623426, + "acc_stderr,none": 0.012864417047980472 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6657842488418266, + "acc_stderr,none": 0.012139246810918228 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5698213103904699, + "acc_stderr,none": 0.012741052817471078 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5188616810059563, + "acc_stderr,none": 0.012857966762464996 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.5168762409000662, + "acc_stderr,none": 0.012859793919977608 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5360688285903376, + "acc_stderr,none": 0.012833602406620017 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.4930509596293845, + "acc_stderr,none": 0.01286588257096072 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.5519523494374586, + "acc_stderr,none": 0.012797478885304733 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.5029781601588352, + "acc_stderr,none": 0.012866897066011228 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5473196558570483, + "acc_stderr,none": 0.012809372866181954 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5559232296492389, + "acc_stderr,none": 0.012786390539820832 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-47-11.016382_xwinograd.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-47-11.016382_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..f9864f2460cd352c630f524b820fe5df2eef2a5e --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-47-11.016382_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.6556529557203866, + "acc_stderr,none": 0.007021810569184652, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.7290322580645161, + "acc_stderr,none": 0.009219643045448322 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6144578313253012, + "acc_stderr,none": 0.0537495779731939 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5411887382690302, + "acc_stderr,none": 0.01609936161806395 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.55893536121673, + "acc_stderr,none": 0.0306747666644263 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.5746031746031746, + "acc_stderr,none": 0.027900777694976245 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.6428571428571429, + "acc_stderr,none": 0.021364573561124416 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T20-21-44.743544_lambada_openai.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T20-21-44.743544_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..724c592cdc60ac2c760a79a02d26e1f7b2805693 --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T20-21-44.743544_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 8.874438771398895, + "perplexity_stderr,none": 0.25110180223575285, + "acc,none": 0.5402678051620415, + "acc_stderr,none": 0.0069433502956647445 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-02-39.723956_xnli.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-02-39.723956_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..dbce0a087973b1e7930329f02016083ffed41a7a --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-02-39.723956_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.3847657295850067, + "acc_stderr,none": 0.0025079690852275174, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.009448900914617616 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.37028112449799194, + "acc_stderr,none": 0.009678915409840288 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.43012048192771085, + "acc_stderr,none": 0.00992371167540806 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3582329317269076, + "acc_stderr,none": 0.009610788482973929 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4943775100401606, + "acc_stderr,none": 0.010021439203777294 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.39759036144578314, + "acc_stderr,none": 0.009809602996075804 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.42248995983935744, + "acc_stderr,none": 0.009900919227857794 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3650602409638554, + "acc_stderr,none": 0.009650194822749635 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.43775100401606426, + "acc_stderr,none": 0.009944099734290161 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3409638554216867, + "acc_stderr,none": 0.009501591178361544 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.348995983935743, + "acc_stderr,none": 0.009554095988300678 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.3718875502008032, + "acc_stderr,none": 0.009687507958631799 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3345381526104418, + "acc_stderr,none": 0.009457404390939166 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.38393574297188754, + "acc_stderr,none": 0.009748321202534375 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3819277108433735, + "acc_stderr,none": 0.00973862791451752 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-35-32.643193_xstorycloze.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-35-32.643193_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..09a159d965e27155500dafdb18ea899e3fc22c0f --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-35-32.643193_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5407616870224415, + "acc_stderr,none": 0.003849468335294993, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.48974189278623426, + "acc_stderr,none": 0.012864417047980472 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6657842488418266, + "acc_stderr,none": 0.012139246810918228 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5698213103904699, + "acc_stderr,none": 0.012741052817471078 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5188616810059563, + "acc_stderr,none": 0.012857966762464996 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.5168762409000662, + "acc_stderr,none": 0.012859793919977608 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5360688285903376, + "acc_stderr,none": 0.012833602406620017 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.4930509596293845, + "acc_stderr,none": 0.01286588257096072 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.5519523494374586, + "acc_stderr,none": 0.012797478885304733 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.5029781601588352, + "acc_stderr,none": 0.012866897066011228 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5473196558570483, + "acc_stderr,none": 0.012809372866181954 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5559232296492389, + "acc_stderr,none": 0.012786390539820832 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-39-50.695003_xwinograd.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-39-50.695003_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..f9864f2460cd352c630f524b820fe5df2eef2a5e --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-39-50.695003_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.6556529557203866, + "acc_stderr,none": 0.007021810569184652, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.7290322580645161, + "acc_stderr,none": 0.009219643045448322 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6144578313253012, + "acc_stderr,none": 0.0537495779731939 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5411887382690302, + "acc_stderr,none": 0.01609936161806395 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.55893536121673, + "acc_stderr,none": 0.0306747666644263 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.5746031746031746, + "acc_stderr,none": 0.027900777694976245 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.6428571428571429, + "acc_stderr,none": 0.021364573561124416 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T23-45-11.895035_mmlu.json b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T23-45-11.895035_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..1e641d5c168937c99bd0514779abcc1be1e97e9d --- /dev/null +++ b/lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T23-45-11.895035_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.23764421022646345, + "acc_stderr,none": 0.003587790530224594, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.24782146652497344, + "acc_stderr,none": 0.0062902893350153324, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3253968253968254, + "acc_stderr,none": 0.041905964388711366 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.22424242424242424, + "acc_stderr,none": 0.03256866661681102 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.2107843137254902, + "acc_stderr,none": 0.028626547912437388 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.270042194092827, + "acc_stderr,none": 0.028900721906293433 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2396694214876033, + "acc_stderr,none": 0.038968789850704164 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.32407407407407407, + "acc_stderr,none": 0.04524596007030048 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.20245398773006135, + "acc_stderr,none": 0.03157065078911902 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.2398843930635838, + "acc_stderr,none": 0.022989592543123567 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.25027932960893856, + "acc_stderr,none": 0.014487500852850412 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.19935691318327975, + "acc_stderr,none": 0.022691033780549656 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.23148148148148148, + "acc_stderr,none": 0.023468429832451152 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.25358539765319427, + "acc_stderr,none": 0.01111171533610114 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.2982456140350877, + "acc_stderr,none": 0.035087719298245654 + }, + "mmlu_other": { + "acc,none": 0.24460894753781784, + "acc_stderr,none": 0.007691719275922855, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.31, + "acc_stderr,none": 0.04648231987117316 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.19245283018867926, + "acc_stderr,none": 0.02426297983937226 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2023121387283237, + "acc_stderr,none": 0.03063114553919882 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.3273542600896861, + "acc_stderr,none": 0.03149384670994132 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.20388349514563106, + "acc_stderr,none": 0.03989139859531771 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2905982905982906, + "acc_stderr,none": 0.029745048572674064 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.29, + "acc_stderr,none": 0.04560480215720684 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.23243933588761176, + "acc_stderr,none": 0.015104550008905704 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.25163398692810457, + "acc_stderr,none": 0.024848018263875195 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.2695035460992908, + "acc_stderr,none": 0.026469036818590627 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.1801470588235294, + "acc_stderr,none": 0.02334516361654486 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.26506024096385544, + "acc_stderr,none": 0.03436024037944967 + }, + "mmlu_social_sciences": { + "acc,none": 0.22099447513812154, + "acc_stderr,none": 0.007486677746029817, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.21929824561403508, + "acc_stderr,none": 0.0389243110651875 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.19696969696969696, + "acc_stderr,none": 0.028335609732463355 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.23316062176165803, + "acc_stderr,none": 0.030516111371476008 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2, + "acc_stderr,none": 0.020280805062535726 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.20168067226890757, + "acc_stderr,none": 0.026064313406304534 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.21467889908256882, + "acc_stderr,none": 0.01760430414925649 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.22900763358778625, + "acc_stderr,none": 0.036853466317118506 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.24673202614379086, + "acc_stderr,none": 0.0174408203674025 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072775 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.21224489795918366, + "acc_stderr,none": 0.026176967197866767 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.21393034825870647, + "acc_stderr,none": 0.0289969096933289 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.27, + "acc_stderr,none": 0.0446196043338474 + }, + "mmlu_stem": { + "acc,none": 0.23184268950206152, + "acc_stderr,none": 0.00750773023393252, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.23703703703703705, + "acc_stderr,none": 0.03673731683969506 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.21052631578947367, + "acc_stderr,none": 0.033176727875331574 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2916666666666667, + "acc_stderr,none": 0.03800968060554859 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.21, + "acc_stderr,none": 0.040936018074033256 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909284 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.21568627450980393, + "acc_stderr,none": 0.04092563958237655 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.28936170212765955, + "acc_stderr,none": 0.02964400657700962 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.25517241379310346, + "acc_stderr,none": 0.03632984052707842 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.21693121693121692, + "acc_stderr,none": 0.02122708244944506 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.1935483870967742, + "acc_stderr,none": 0.022475258525536057 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.1921182266009852, + "acc_stderr,none": 0.027719315709614778 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.26296296296296295, + "acc_stderr,none": 0.026842057873833706 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2251655629139073, + "acc_stderr,none": 0.03410435282008937 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.16203703703703703, + "acc_stderr,none": 0.02513045365226846 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.2857142857142857, + "acc_stderr,none": 0.04287858751340456 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T09-41-13.419478_lambada_openai.json b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T09-41-13.419478_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..f5c6bf6c63bcdd7f5f4a3168730d93bb5167a9ea --- /dev/null +++ b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T09-41-13.419478_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 4.637882037692772, + "perplexity_stderr,none": 0.10579675670275654, + "acc,none": 0.6741703861828061, + "acc_stderr,none": 0.0065296843174760975 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-43-54.649276_hellaswag.json b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-43-54.649276_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..0fce05259c61556a95341cc75dfc0673f1ea1d9d --- /dev/null +++ b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-43-54.649276_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.4644493128858793, + "acc_stderr,none": 0.004977152746478598, + "acc_norm,none": 0.6107349133638718, + "acc_norm_stderr,none": 0.004865871290143343 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-47-38.776674_piqa.json b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-47-38.776674_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..159a9804d600e663c5704a6d4e86ab6d46403221 --- /dev/null +++ b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-47-38.776674_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.7437431991294886, + "acc_stderr,none": 0.010185787831565084, + "acc_norm,none": 0.7453754080522307, + "acc_norm_stderr,none": 0.010164432237060482 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-55-41.507473_arc_easy.json b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-55-41.507473_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..58375c15dba682085ca5569c9b40399dffd3ed0b --- /dev/null +++ b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-55-41.507473_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.6426767676767676, + "acc_stderr,none": 0.00983320561246312, + "acc_norm,none": 0.6216329966329966, + "acc_norm_stderr,none": 0.009951575683331947 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T11-00-09.309689_arc_challenge.json b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T11-00-09.309689_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..fc78eee14ffc6f4c03366757e5bd109878b5a4ec --- /dev/null +++ b/lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T11-00-09.309689_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.3097269624573379, + "acc_stderr,none": 0.01351205841523836, + "acc_norm,none": 0.3370307167235495, + "acc_norm_stderr,none": 0.013813476652902274 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T00-45-26.618704_lambada_openai.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T00-45-26.618704_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..73f16ecbccfce776ad494cecd842d3f32a0c4e8c --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T00-45-26.618704_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 3.8612709402913734, + "perplexity_stderr,none": 0.08105989741229948, + "acc,none": 0.7172520861634, + "acc_stderr,none": 0.006274045840971217 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-36-50.849530_hellaswag.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-36-50.849530_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..bdee8dcc778adb7f9c9953fd00f50f1a79e1898b --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-36-50.849530_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.5053774148575981, + "acc_stderr,none": 0.004989492828168538, + "acc_norm,none": 0.6835291774546903, + "acc_norm_stderr,none": 0.004641484273335084 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-43-09.170340_piqa.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-43-09.170340_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..a0e90fcfb705c6119a1ac221365f1700e5028ac3 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-43-09.170340_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.763873775843308, + "acc_stderr,none": 0.009908965890558216, + "acc_norm,none": 0.7633297062023939, + "acc_norm_stderr,none": 0.009916841655042806 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-57-01.574833_arc_easy.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-57-01.574833_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..b00385d09f94c62149e48542f3802ba3eadcc46f --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-57-01.574833_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.7117003367003367, + "acc_stderr,none": 0.009294774252029625, + "acc_norm,none": 0.6717171717171717, + "acc_norm_stderr,none": 0.009635749509262161 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T03-04-40.682395_arc_challenge.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T03-04-40.682395_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..be45ca46c266ff3839ff792dc96d9b9130942dc3 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T03-04-40.682395_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.35580204778157, + "acc_stderr,none": 0.01399057113791876, + "acc_norm,none": 0.39078498293515357, + "acc_norm_stderr,none": 0.014258563880513778 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-11-37.430416_glue.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-11-37.430416_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..8e5900de3ee6beb39c739969b0a4360c66e8fec5 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-11-37.430416_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.07139041138969343, + "mcc_stderr,none": 0.03374782039086001 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.37748344370860926, + "acc_stderr,none": 0.004893294795433131 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.3817127746135069, + "acc_stderr,none": 0.004899645239995305 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.6397058823529411, + "acc_stderr,none": 0.023796963985532167, + "f1,none": 0.7111984282907662, + "f1_stderr,none": 0.02254781842194292 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.5317591067179206, + "acc_stderr,none": 0.006751749019242126 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.6318080633193174, + "acc_stderr,none": 0.002398740231240921, + "f1,none": 0.0, + "f1_stderr,none": 0.0 + }, + "rte": { + "alias": "rte", + "acc,none": 0.6137184115523465, + "acc_stderr,none": 0.029307720385270512 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.893348623853211, + "acc_stderr,none": 0.010458867008246879 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.43661971830985913, + "acc_stderr,none": 0.0592793555841297 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-14-56.977956_winogrande.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-14-56.977956_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..c18d1d35ed541130698238e95a2a3019e99f2d1b --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-14-56.977956_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.6629834254143646, + "acc_stderr,none": 0.01328495576939525 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-29-25.494712_sciq.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-29-25.494712_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..44e8695228a5403fc7daa1ddec0f4cf555ca7f20 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-29-25.494712_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.922, + "acc_stderr,none": 0.008484573530118585, + "acc_norm,none": 0.886, + "acc_norm_stderr,none": 0.01005510343582333 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T16-15-02.568125_mmlu.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T16-15-02.568125_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..5fb602d766be57014c2e2d7aaf074943cb96c18d --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T16-15-02.568125_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.28300811850163793, + "acc_stderr,none": 0.0037926401279525677, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.29330499468650373, + "acc_stderr,none": 0.00661831036231422, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.23809523809523808, + "acc_stderr,none": 0.03809523809523811 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.37575757575757573, + "acc_stderr,none": 0.03781887353205982 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.29411764705882354, + "acc_stderr,none": 0.031980016601150726 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2616033755274262, + "acc_stderr,none": 0.028609516716994934 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.38016528925619836, + "acc_stderr,none": 0.04431324501968432 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.2777777777777778, + "acc_stderr,none": 0.04330043749650741 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.3312883435582822, + "acc_stderr,none": 0.03697983910025588 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.3063583815028902, + "acc_stderr,none": 0.024818350129436596 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2324022346368715, + "acc_stderr,none": 0.014125968754673403 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.33762057877813506, + "acc_stderr,none": 0.02685882587948854 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.3425925925925926, + "acc_stderr,none": 0.02640614597362568 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2907431551499348, + "acc_stderr,none": 0.011598062372851981 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.3508771929824561, + "acc_stderr,none": 0.03660298834049163 + }, + "mmlu_other": { + "acc,none": 0.28451882845188287, + "acc_stderr,none": 0.008058064756071836, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.32, + "acc_stderr,none": 0.046882617226215034 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.24150943396226415, + "acc_stderr,none": 0.02634148037111836 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2543352601156069, + "acc_stderr,none": 0.0332055644308557 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.32, + "acc_stderr,none": 0.04688261722621505 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.2242152466367713, + "acc_stderr,none": 0.02799153425851952 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.20388349514563106, + "acc_stderr,none": 0.0398913985953177 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.3247863247863248, + "acc_stderr,none": 0.030679022765498835 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.19, + "acc_stderr,none": 0.03942772444036623 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.351213282247765, + "acc_stderr,none": 0.01706998205149943 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.29411764705882354, + "acc_stderr,none": 0.026090162504279046 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.2765957446808511, + "acc_stderr,none": 0.026684564340461 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.22794117647058823, + "acc_stderr,none": 0.025483081468029804 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.2469879518072289, + "acc_stderr,none": 0.03357351982064536 + }, + "mmlu_social_sciences": { + "acc,none": 0.27494312642183943, + "acc_stderr,none": 0.008044129845426675, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.24561403508771928, + "acc_stderr,none": 0.040493392977481425 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.29797979797979796, + "acc_stderr,none": 0.03258630383836556 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.31088082901554404, + "acc_stderr,none": 0.03340361906276586 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.22564102564102564, + "acc_stderr,none": 0.021193632525148533 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.23529411764705882, + "acc_stderr,none": 0.02755361446786379 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.27155963302752295, + "acc_stderr,none": 0.019069098363191445 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2900763358778626, + "acc_stderr,none": 0.03980066246467766 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.29248366013071897, + "acc_stderr,none": 0.01840341571010978 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.36363636363636365, + "acc_stderr,none": 0.046075820907199756 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.2612244897959184, + "acc_stderr,none": 0.028123429335142787 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.3034825870646766, + "acc_stderr,none": 0.03251006816458619 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_stem": { + "acc,none": 0.27402473834443386, + "acc_stderr,none": 0.007942050435080712, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.23, + "acc_stderr,none": 0.042295258468165065 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.35555555555555557, + "acc_stderr,none": 0.04135176749720386 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.2894736842105263, + "acc_stderr,none": 0.03690677986137282 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2777777777777778, + "acc_stderr,none": 0.03745554791462457 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.19, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.27, + "acc_stderr,none": 0.0446196043338474 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.26, + "acc_stderr,none": 0.0440844002276808 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.21568627450980393, + "acc_stderr,none": 0.04092563958237656 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.31, + "acc_stderr,none": 0.04648231987117316 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.2425531914893617, + "acc_stderr,none": 0.028020226271200217 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.3103448275862069, + "acc_stderr,none": 0.038552896163789485 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2698412698412698, + "acc_stderr,none": 0.022860838309232072 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2870967741935484, + "acc_stderr,none": 0.02573654274559452 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.30049261083743845, + "acc_stderr,none": 0.03225799476233485 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.33, + "acc_stderr,none": 0.047258156262526045 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.25925925925925924, + "acc_stderr,none": 0.026719240783712184 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2913907284768212, + "acc_stderr,none": 0.03710185726119994 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.22685185185185186, + "acc_stderr,none": 0.028561650102422263 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.30357142857142855, + "acc_stderr,none": 0.04364226155841044 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T17-54-48.511357_lambada_multilingual.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T17-54-48.511357_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..6f2f06810ba9295131ba83f9fc2483906cfac793 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T17-54-48.511357_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 43.68824880447971, + "perplexity_stderr,none": 2.514152238024757, + "acc,none": 0.4028721133320396, + "acc_stderr,none": 0.006833282145636733 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 3.861270938377691, + "perplexity_stderr,none": 0.08105989728747866, + "acc,none": 0.7172520861634, + "acc_stderr,none": 0.006274045840971217 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 38.09532017842916, + "perplexity_stderr,none": 1.965392103779422, + "acc,none": 0.4281001358431981, + "acc_stderr,none": 0.0068935789269446 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 22.40375284101206, + "perplexity_stderr,none": 1.1532300539224993, + "acc,none": 0.5177566466136231, + "acc_stderr,none": 0.006961583546700133 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 30.290230628389285, + "perplexity_stderr,none": 1.6679780932575874, + "acc,none": 0.48437803221424414, + "acc_stderr,none": 0.006962576790761937 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T19-27-11.159807_pawsx.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T19-27-11.159807_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..808eecee2a778399516ef7d79aab882519795f58 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T19-27-11.159807_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.5337142857142857, + "acc_stderr,none": 0.004198417715621075, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.572, + "acc_stderr,none": 0.011066581884995262 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.569, + "acc_stderr,none": 0.01107613833518756 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.5985, + "acc_stderr,none": 0.010963985565921716 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.534, + "acc_stderr,none": 0.011157250652425772 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4495, + "acc_stderr,none": 0.011125950223877364 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.5085, + "acc_stderr,none": 0.011181519941139164 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.5045, + "acc_stderr,none": 0.011182683094883903 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T21-19-09.734718_pawsx.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T21-19-09.734718_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..808eecee2a778399516ef7d79aab882519795f58 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T21-19-09.734718_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.5337142857142857, + "acc_stderr,none": 0.004198417715621075, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.572, + "acc_stderr,none": 0.011066581884995262 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.569, + "acc_stderr,none": 0.01107613833518756 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.5985, + "acc_stderr,none": 0.010963985565921716 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.534, + "acc_stderr,none": 0.011157250652425772 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4495, + "acc_stderr,none": 0.011125950223877364 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.5085, + "acc_stderr,none": 0.011181519941139164 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.5045, + "acc_stderr,none": 0.011182683094883903 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T21-36-18.415182_xcopa.json b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T21-36-18.415182_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..d9d2835b1aa3dbc561c63c2919883e676507d600 --- /dev/null +++ b/lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T21-36-18.415182_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.6016363636363636, + "acc_stderr,none": 0.006542159248225501, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.554, + "acc_stderr,none": 0.022252153078595897 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.542, + "acc_stderr,none": 0.022303966774269945 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.692, + "acc_stderr,none": 0.020667032987466104 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.704, + "acc_stderr,none": 0.020435342091896135 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.49, + "acc_stderr,none": 0.02237859698923078 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.546, + "acc_stderr,none": 0.022288147591176945 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.592, + "acc_stderr,none": 0.022000910893877193 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.548, + "acc_stderr,none": 0.022279694107843424 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.614, + "acc_stderr,none": 0.02179352921928116 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.658, + "acc_stderr,none": 0.021236147199899254 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.678, + "acc_stderr,none": 0.020916668330019886 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-22-36.899379_lambada_openai.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-22-36.899379_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..b7bcb09a84939523c7b300a89a77ee97959fd11e --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-22-36.899379_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 6.967655618555803, + "perplexity_stderr,none": 0.18433426410235756, + "acc,none": 0.5856782456821269, + "acc_stderr,none": 0.006862944515138107 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-50-08.180742_hellaswag.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-50-08.180742_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..a90aeecb19c67cdac7cbb15b78801c928c167237 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-50-08.180742_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.433379804819757, + "acc_stderr,none": 0.004945291270072424, + "acc_norm,none": 0.568213503286198, + "acc_norm_stderr,none": 0.0049431275832909054 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-52-22.899359_piqa.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-52-22.899359_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..22065b9b3ff0bb6a201b89d61cd0a6855a0ba7a2 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-52-22.899359_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.7285092491838956, + "acc_stderr,none": 0.010376251176596138, + "acc_norm,none": 0.7415669205658324, + "acc_norm_stderr,none": 0.01021397163677332 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-57-02.219064_arc_easy.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-57-02.219064_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..2dca1bc2bd3a85f50e185a0ead67e0c9adbb4864 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-57-02.219064_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.6868686868686869, + "acc_stderr,none": 0.00951630387930954, + "acc_norm,none": 0.6481481481481481, + "acc_norm_stderr,none": 0.009799078929868706 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-59-35.645217_arc_challenge.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-59-35.645217_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..ebd790b40e56fb6c3423a3140cb5609cfc783c9c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T00-59-35.645217_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.3191126279863481, + "acc_stderr,none": 0.013621696119173307, + "acc_norm,none": 0.3506825938566553, + "acc_norm_stderr,none": 0.013944635930726094 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-23-56.915893_glue.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-23-56.915893_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..4974693d08a37ef5af4159ef8f231e104c496d22 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-23-56.915893_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": -0.038995993274974224, + "mcc_stderr,none": 0.03098825117606045 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.3369332654100866, + "acc_stderr,none": 0.0047711990887903295 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.35598047192839705, + "acc_stderr,none": 0.0048290728527781235 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.42892156862745096, + "acc_stderr,none": 0.024532376270716263, + "f1,none": 0.3473389355742297, + "f1_stderr,none": 0.031887748206337066 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.4927695405454878, + "acc_stderr,none": 0.006764703129634555 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.46779619094731634, + "acc_stderr,none": 0.002481537430018941, + "f1,none": 0.47551492992078004, + "f1_stderr,none": 0.0030257288166911775 + }, + "rte": { + "alias": "rte", + "acc,none": 0.48736462093862815, + "acc_stderr,none": 0.030086851767188564 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.9025229357798165, + "acc_stderr,none": 0.010050120445908329 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.4788732394366197, + "acc_stderr,none": 0.05970805879899505 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-25-22.558921_winogrande.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-25-22.558921_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..76fcaf7043573d247ca40106440e62cce7f395ef --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-25-22.558921_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.5990528808208366, + "acc_stderr,none": 0.013773974554948025 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-28-48.538316_sciq.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-28-48.538316_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..f048de121ab0abc1bb514521a18c7addd17c72bc --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T02-28-48.538316_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.897, + "acc_stderr,none": 0.009616833339695803, + "acc_norm,none": 0.867, + "acc_norm_stderr,none": 0.010743669132397337 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-15-30.600735_mmlu.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-15-30.600735_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..bc6cc3e7760f7cea9c1be0b54d14bc7e5d99fa4c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-15-30.600735_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.26114513602050987, + "acc_stderr,none": 0.0037004954683899113, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2514346439957492, + "acc_stderr,none": 0.0063219035743082175, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.30158730158730157, + "acc_stderr,none": 0.04104947269903394 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.26666666666666666, + "acc_stderr,none": 0.03453131801885415 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.28921568627450983, + "acc_stderr,none": 0.03182231867647553 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.22784810126582278, + "acc_stderr,none": 0.02730348459906942 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2644628099173554, + "acc_stderr,none": 0.040261875275912046 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.21296296296296297, + "acc_stderr,none": 0.03957835471980978 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.19631901840490798, + "acc_stderr,none": 0.031207970394709218 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.21098265895953758, + "acc_stderr,none": 0.021966309947043114 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2581005586592179, + "acc_stderr,none": 0.014635185616527829 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.21221864951768488, + "acc_stderr,none": 0.023222756797435105 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2839506172839506, + "acc_stderr,none": 0.025089478523765134 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2529335071707953, + "acc_stderr,none": 0.011102268713839987 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.2982456140350877, + "acc_stderr,none": 0.03508771929824564 + }, + "mmlu_other": { + "acc,none": 0.2584486643064049, + "acc_stderr,none": 0.007840247289852885, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542127 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.25660377358490566, + "acc_stderr,none": 0.026880647889051996 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2832369942196532, + "acc_stderr,none": 0.034355680560478746 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.32, + "acc_stderr,none": 0.046882617226215034 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.21524663677130046, + "acc_stderr,none": 0.02758406660220827 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.24271844660194175, + "acc_stderr,none": 0.04245022486384495 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.20512820512820512, + "acc_stderr,none": 0.026453508054040353 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.27, + "acc_stderr,none": 0.0446196043338474 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.24265644955300128, + "acc_stderr,none": 0.015329888940899873 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.25163398692810457, + "acc_stderr,none": 0.0248480182638752 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.23049645390070922, + "acc_stderr,none": 0.025123739226872402 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.3382352941176471, + "acc_stderr,none": 0.028739328513983572 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.3253012048192771, + "acc_stderr,none": 0.03647168523683227 + }, + "mmlu_social_sciences": { + "acc,none": 0.2690932726681833, + "acc_stderr,none": 0.007966030232741745, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2543859649122807, + "acc_stderr,none": 0.0409698513984367 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.29292929292929293, + "acc_stderr,none": 0.03242497958178817 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.31088082901554404, + "acc_stderr,none": 0.033403619062765864 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.3230769230769231, + "acc_stderr,none": 0.02371088850197057 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.3445378151260504, + "acc_stderr,none": 0.030868682604121633 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.23119266055045873, + "acc_stderr,none": 0.018075750241633156 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.21374045801526717, + "acc_stderr,none": 0.035954616117746904 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.23202614379084968, + "acc_stderr,none": 0.01707737337785701 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.3, + "acc_stderr,none": 0.04389311454644286 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.31020408163265306, + "acc_stderr,none": 0.029613459872484378 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.25870646766169153, + "acc_stderr,none": 0.030965903123573037 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.16, + "acc_stderr,none": 0.03684529491774709 + }, + "mmlu_stem": { + "acc,none": 0.2705359974627339, + "acc_stderr,none": 0.007904793861161824, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.29, + "acc_stderr,none": 0.04560480215720684 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.23703703703703705, + "acc_stderr,none": 0.03673731683969506 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.21052631578947367, + "acc_stderr,none": 0.03317672787533157 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2708333333333333, + "acc_stderr,none": 0.037161774375660185 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.28, + "acc_stderr,none": 0.045126085985421255 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.36, + "acc_stderr,none": 0.04824181513244218 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.34, + "acc_stderr,none": 0.04760952285695235 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.27450980392156865, + "acc_stderr,none": 0.04440521906179326 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.26, + "acc_stderr,none": 0.044084400227680794 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.22127659574468084, + "acc_stderr,none": 0.027136349602424045 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.2206896551724138, + "acc_stderr,none": 0.03455930201924814 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.25132275132275134, + "acc_stderr,none": 0.022340482339643898 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.3, + "acc_stderr,none": 0.026069362295335123 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2660098522167488, + "acc_stderr,none": 0.031089826002937523 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542128 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.2962962962962963, + "acc_stderr,none": 0.027840811495871923 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.271523178807947, + "acc_stderr,none": 0.036313298039696525 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3287037037037037, + "acc_stderr,none": 0.03203614084670058 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.20535714285714285, + "acc_stderr,none": 0.038342410214190714 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-35-38.020976_lambada_multilingual.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-35-38.020976_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..10ceb4ac414683effb98d7ab0360b31dec005ee0 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-35-38.020976_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 98.27105892371014, + "perplexity_stderr,none": 5.883230117903311, + "acc,none": 0.31612652823597903, + "acc_stderr,none": 0.006477843064847518 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 6.967655621883261, + "perplexity_stderr,none": 0.18433426383275314, + "acc,none": 0.5856782456821269, + "acc_stderr,none": 0.006862944515138107 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 113.27038115782568, + "perplexity_stderr,none": 6.458942076704442, + "acc,none": 0.32117213273821077, + "acc_stderr,none": 0.006505202676138956 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 58.05263248614414, + "perplexity_stderr,none": 3.2555289241178285, + "acc,none": 0.39355715117407336, + "acc_stderr,none": 0.006806297320641501 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 88.55092732381516, + "perplexity_stderr,none": 5.357139992555299, + "acc,none": 0.36503007956530176, + "acc_stderr,none": 0.006707380989588295 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-59-27.433122_pawsx.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-59-27.433122_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..79f4a7557cfcfec4ed40f1c0d3ad784324d0909c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T03-59-27.433122_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.48692857142857143, + "acc_stderr,none": 0.004216885486287155, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.5355, + "acc_stderr,none": 0.011154913314119564 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.5085, + "acc_stderr,none": 0.011181519941139164 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.5205, + "acc_stderr,none": 0.011173732641806813 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.477, + "acc_stderr,none": 0.011171297997523606 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4605, + "acc_stderr,none": 0.01114818442653329 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.451, + "acc_stderr,none": 0.011129305041886327 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.4555, + "acc_stderr,none": 0.011138757154883975 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T04-05-26.306971_xcopa.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T04-05-26.306971_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..cff4e4d48272801d061f0bf9ef2ffb6b20ad660e --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T04-05-26.306971_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5538181818181818, + "acc_stderr,none": 0.006692969416717101, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.508, + "acc_stderr,none": 0.022380208834928035 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.516, + "acc_stderr,none": 0.0223716109825804 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.574, + "acc_stderr,none": 0.022136577335085637 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.556, + "acc_stderr,none": 0.02224224437573102 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.5, + "acc_stderr,none": 0.022383074051792257 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.552, + "acc_stderr,none": 0.022261697292270132 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.548, + "acc_stderr,none": 0.022279694107843417 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.564, + "acc_stderr,none": 0.022198954641476802 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.556, + "acc_stderr,none": 0.02224224437573102 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.596, + "acc_stderr,none": 0.021966635293832925 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.622, + "acc_stderr,none": 0.02170655082451818 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T05-45-13.420160_xnli.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T05-45-13.420160_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..eb23628ba71e3dea684b952f52b146f7fecd6491 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T05-45-13.420160_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.4028647925033467, + "acc_stderr,none": 0.002522037482892674, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3357429718875502, + "acc_stderr,none": 0.009465838617337349 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.40441767068273093, + "acc_stderr,none": 0.009837245625453003 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.45140562248995986, + "acc_stderr,none": 0.009974628047721984 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3682730923694779, + "acc_stderr,none": 0.009668013178998446 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.5333333333333333, + "acc_stderr,none": 0.009999776793187639 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.4566265060240964, + "acc_stderr,none": 0.00998429341084031 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.44176706827309237, + "acc_stderr,none": 0.009953869607025618 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3742971887550201, + "acc_stderr,none": 0.009700182103576727 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.46224899598393576, + "acc_stderr,none": 0.009993466360872783 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3485943775100402, + "acc_stderr,none": 0.009551542053301816 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.37630522088353413, + "acc_stderr,none": 0.009710547744216048 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.42168674698795183, + "acc_stderr,none": 0.009898379493335446 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.009448900914617614 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.39156626506024095, + "acc_stderr,none": 0.009783558109997084 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3433734939759036, + "acc_stderr,none": 0.009517658993060705 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T06-19-40.693069_xstorycloze.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T06-19-40.693069_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..8c52ba986cf215f050a0739f293b5e7e7f00b1c4 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T06-19-40.693069_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5532759761747187, + "acc_stderr,none": 0.003834717836242436, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.4990072799470549, + "acc_stderr,none": 0.012867099955422926 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6909331568497684, + "acc_stderr,none": 0.011892023305070085 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5903375248180013, + "acc_stderr,none": 0.012655369030750355 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5334215751158173, + "acc_stderr,none": 0.01283834793473167 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.5334215751158173, + "acc_stderr,none": 0.012838347934731669 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5618795499669094, + "acc_stderr,none": 0.012768206616277759 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.48378557246856385, + "acc_stderr,none": 0.012860357805055867 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.5671740569159497, + "acc_stderr,none": 0.012750474502985826 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.5049636002647253, + "acc_stderr,none": 0.012866491277589945 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5459960291197882, + "acc_stderr,none": 0.012812565368728929 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5751158173395102, + "acc_stderr,none": 0.012721094073523329 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T06-24-04.408383_xwinograd.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T06-24-04.408383_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..9fbf42734c79145417668c95fe9064abd3d436e0 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T06-24-04.408383_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.7293773881771185, + "acc_stderr,none": 0.006498743127659834, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.8116129032258065, + "acc_stderr,none": 0.008111141657897709 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6626506024096386, + "acc_stderr,none": 0.052212602620321284 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5797705943691345, + "acc_stderr,none": 0.015947351147972303 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.6463878326996197, + "acc_stderr,none": 0.02953653465680206 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.6222222222222222, + "acc_stderr,none": 0.027360632861056396 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.7559523809523809, + "acc_stderr,none": 0.019151399446646847 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T10-49-47.989145_mmlu.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T10-49-47.989145_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..bae6a5051df8f31057cbac5d42f9a11578c37186 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_11/0.4.8_2025-03-15T10-49-47.989145_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.26114513602050987, + "acc_stderr,none": 0.0037004954683899113, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2514346439957492, + "acc_stderr,none": 0.0063219035743082175, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.30158730158730157, + "acc_stderr,none": 0.04104947269903394 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.26666666666666666, + "acc_stderr,none": 0.03453131801885415 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.28921568627450983, + "acc_stderr,none": 0.03182231867647553 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.22784810126582278, + "acc_stderr,none": 0.02730348459906942 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2644628099173554, + "acc_stderr,none": 0.040261875275912046 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.21296296296296297, + "acc_stderr,none": 0.03957835471980978 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.19631901840490798, + "acc_stderr,none": 0.031207970394709218 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.21098265895953758, + "acc_stderr,none": 0.021966309947043114 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2581005586592179, + "acc_stderr,none": 0.014635185616527829 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.21221864951768488, + "acc_stderr,none": 0.023222756797435105 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2839506172839506, + "acc_stderr,none": 0.025089478523765134 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2529335071707953, + "acc_stderr,none": 0.011102268713839987 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.2982456140350877, + "acc_stderr,none": 0.03508771929824564 + }, + "mmlu_other": { + "acc,none": 0.2584486643064049, + "acc_stderr,none": 0.007840247289852885, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542127 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.25660377358490566, + "acc_stderr,none": 0.026880647889051996 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2832369942196532, + "acc_stderr,none": 0.034355680560478746 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.32, + "acc_stderr,none": 0.046882617226215034 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.21524663677130046, + "acc_stderr,none": 0.02758406660220827 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.24271844660194175, + "acc_stderr,none": 0.04245022486384495 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.20512820512820512, + "acc_stderr,none": 0.026453508054040353 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.27, + "acc_stderr,none": 0.0446196043338474 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.24265644955300128, + "acc_stderr,none": 0.015329888940899873 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.25163398692810457, + "acc_stderr,none": 0.0248480182638752 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.23049645390070922, + "acc_stderr,none": 0.025123739226872402 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.3382352941176471, + "acc_stderr,none": 0.028739328513983572 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.3253012048192771, + "acc_stderr,none": 0.03647168523683227 + }, + "mmlu_social_sciences": { + "acc,none": 0.2690932726681833, + "acc_stderr,none": 0.007966030232741745, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2543859649122807, + "acc_stderr,none": 0.0409698513984367 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.29292929292929293, + "acc_stderr,none": 0.03242497958178817 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.31088082901554404, + "acc_stderr,none": 0.033403619062765864 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.3230769230769231, + "acc_stderr,none": 0.02371088850197057 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.3445378151260504, + "acc_stderr,none": 0.030868682604121633 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.23119266055045873, + "acc_stderr,none": 0.018075750241633156 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.21374045801526717, + "acc_stderr,none": 0.035954616117746904 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.23202614379084968, + "acc_stderr,none": 0.01707737337785701 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.3, + "acc_stderr,none": 0.04389311454644286 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.31020408163265306, + "acc_stderr,none": 0.029613459872484378 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.25870646766169153, + "acc_stderr,none": 0.030965903123573037 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.16, + "acc_stderr,none": 0.03684529491774709 + }, + "mmlu_stem": { + "acc,none": 0.2705359974627339, + "acc_stderr,none": 0.007904793861161824, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.29, + "acc_stderr,none": 0.04560480215720684 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.23703703703703705, + "acc_stderr,none": 0.03673731683969506 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.21052631578947367, + "acc_stderr,none": 0.03317672787533157 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2708333333333333, + "acc_stderr,none": 0.037161774375660185 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.28, + "acc_stderr,none": 0.045126085985421255 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.36, + "acc_stderr,none": 0.04824181513244218 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.34, + "acc_stderr,none": 0.04760952285695235 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.27450980392156865, + "acc_stderr,none": 0.04440521906179326 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.26, + "acc_stderr,none": 0.044084400227680794 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.22127659574468084, + "acc_stderr,none": 0.027136349602424045 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.2206896551724138, + "acc_stderr,none": 0.03455930201924814 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.25132275132275134, + "acc_stderr,none": 0.022340482339643898 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.3, + "acc_stderr,none": 0.026069362295335123 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2660098522167488, + "acc_stderr,none": 0.031089826002937523 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542128 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.2962962962962963, + "acc_stderr,none": 0.027840811495871923 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.271523178807947, + "acc_stderr,none": 0.036313298039696525 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3287037037037037, + "acc_stderr,none": 0.03203614084670058 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.20535714285714285, + "acc_stderr,none": 0.038342410214190714 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_none/0.4.8_2025-03-15T00-14-08.586070_lambada_openai.json b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_none/0.4.8_2025-03-15T00-14-08.586070_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..1def7b263c126fac04b004f9bc220f0c603035c7 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096/pad_none/0.4.8_2025-03-15T00-14-08.586070_lambada_openai.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.4B-v2.9-20250107-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 7.333725941235262, + "perplexity_stderr,none": 0.19721967642528107, + "acc,none": 0.574616728119542, + "acc_stderr,none": 0.006887972570117885 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T00-32-01.907082_lambada_openai.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T00-32-01.907082_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..96526f30fa51755770ac587177589d840f5effa6 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T00-32-01.907082_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 4.174870788924788, + "perplexity_stderr,none": 0.09003244838599012, + "acc,none": 0.6951290510382302, + "acc_stderr,none": 0.006413613926848405 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-00-46.942678_hellaswag.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-00-46.942678_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..dd569d0654292af676f4c2d04de26126fcf765b1 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-00-46.942678_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.5325632344154551, + "acc_stderr,none": 0.004979188195338185, + "acc_norm,none": 0.7075283808006373, + "acc_norm_stderr,none": 0.004539680764142235 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-05-57.788724_piqa.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-05-57.788724_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..bcfc8c841756554135433f35661156c671d89783 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-05-57.788724_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.7714907508161044, + "acc_stderr,none": 0.009796313511829519, + "acc_norm,none": 0.7736670293797606, + "acc_norm_stderr,none": 0.009763294246879418 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-17-28.427153_arc_easy.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-17-28.427153_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..0098cf5fc29fe780fe389fb27161dce51a6b76ad --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-17-28.427153_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.7811447811447811, + "acc_stderr,none": 0.008484229631251974, + "acc_norm,none": 0.7622053872053872, + "acc_norm_stderr,none": 0.00873585075350799 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-23-51.810081_arc_challenge.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-23-51.810081_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..7f24ffaa4ff444b626755ce83a44e70ef00390a6 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T02-23-51.810081_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.4445392491467577, + "acc_stderr,none": 0.01452122640562708, + "acc_norm,none": 0.47696245733788395, + "acc_norm_stderr,none": 0.014595873205358273 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-41-34.617031_glue.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-41-34.617031_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..bb28a1bf38439ddae85c0a41de77b1a1560387b9 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-41-34.617031_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.0307324251468042, + "mcc_stderr,none": 0.030274978371852362 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.49190015282730515, + "acc_stderr,none": 0.005046496589231483 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.4823026851098454, + "acc_stderr,none": 0.005039633429206254 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.7573529411764706, + "acc_stderr,none": 0.021249047596394875, + "f1,none": 0.835820895522388, + "f1_stderr,none": 0.01641362934915138 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.5046677649643053, + "acc_stderr,none": 0.006765115735419823 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.6313875834776156, + "acc_stderr,none": 0.002399310744871031, + "f1,none": 0.01766528244677345, + "f1_stderr,none": 0.0015027489748862003 + }, + "rte": { + "alias": "rte", + "acc,none": 0.592057761732852, + "acc_stderr,none": 0.0295819525196062 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.9105504587155964, + "acc_stderr,none": 0.009670122820901166 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.6197183098591549, + "acc_stderr,none": 0.05802308977399397 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-44-21.159812_winogrande.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-44-21.159812_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..bb128a1d0609c04ce3435b3d694e3b0574880aa4 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-44-21.159812_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.6819258089976322, + "acc_stderr,none": 0.013089285079884678 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-55-51.822309_sciq.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-55-51.822309_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..90d26bcaba32b40f426b0aac2f884850726fe42a --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T06-55-51.822309_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.943, + "acc_stderr,none": 0.007335175853706828, + "acc_norm,none": 0.916, + "acc_norm_stderr,none": 0.008776162089491129 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T09-44-55.589219_mmlu.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T09-44-55.589219_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..711e70f1178c21270b87e521ba1067e900ed927a --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T09-44-55.589219_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.4325594644637516, + "acc_stderr,none": 0.004076131622419778, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.4148777895855473, + "acc_stderr,none": 0.006926849412307071, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.2777777777777778, + "acc_stderr,none": 0.04006168083848876 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.6, + "acc_stderr,none": 0.03825460278380025 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.6176470588235294, + "acc_stderr,none": 0.034107853389047184 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.6244725738396625, + "acc_stderr,none": 0.03152256243091157 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.5206611570247934, + "acc_stderr,none": 0.04560456086387235 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.5555555555555556, + "acc_stderr,none": 0.04803752235190193 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.43558282208588955, + "acc_stderr,none": 0.03895632464138936 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.47109826589595377, + "acc_stderr,none": 0.026874085883518348 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.27039106145251396, + "acc_stderr,none": 0.014854993938010076 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.5594855305466238, + "acc_stderr,none": 0.028196400574197426 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.5061728395061729, + "acc_stderr,none": 0.027818623962583302 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.3220338983050847, + "acc_stderr,none": 0.01193393607189109 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.6608187134502924, + "acc_stderr,none": 0.036310534964889056 + }, + "mmlu_other": { + "acc,none": 0.4734470550370132, + "acc_stderr,none": 0.008813899255282013, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.45, + "acc_stderr,none": 0.05 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.4679245283018868, + "acc_stderr,none": 0.03070948699255655 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.3988439306358382, + "acc_stderr,none": 0.03733626655383509 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.26, + "acc_stderr,none": 0.044084400227680794 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.5067264573991032, + "acc_stderr,none": 0.03355476596234355 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.5631067961165048, + "acc_stderr,none": 0.04911147107365777 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.6153846153846154, + "acc_stderr,none": 0.03187195347942466 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.48, + "acc_stderr,none": 0.050211673156867795 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.5568326947637292, + "acc_stderr,none": 0.0177640850353484 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.49019607843137253, + "acc_stderr,none": 0.028624412550167958 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.32269503546099293, + "acc_stderr,none": 0.027889139300534795 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.3492647058823529, + "acc_stderr,none": 0.028959755196824855 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.43373493975903615, + "acc_stderr,none": 0.03858158940685516 + }, + "mmlu_social_sciences": { + "acc,none": 0.48976275593110175, + "acc_stderr,none": 0.00890971099432837, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.23684210526315788, + "acc_stderr,none": 0.03999423879281336 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.4797979797979798, + "acc_stderr,none": 0.03559443565563918 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.5906735751295337, + "acc_stderr,none": 0.03548608168860806 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.4205128205128205, + "acc_stderr,none": 0.02502861027671086 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.4411764705882353, + "acc_stderr,none": 0.0322529423239964 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.5743119266055046, + "acc_stderr,none": 0.0211992359724708 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.48091603053435117, + "acc_stderr,none": 0.04382094705550988 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.45588235294117646, + "acc_stderr,none": 0.020148939420415735 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.45454545454545453, + "acc_stderr,none": 0.04769300568972744 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.47346938775510206, + "acc_stderr,none": 0.03196412734523272 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.6019900497512438, + "acc_stderr,none": 0.034611994290400135 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.6, + "acc_stderr,none": 0.04923659639173309 + }, + "mmlu_stem": { + "acc,none": 0.3628290516967967, + "acc_stderr,none": 0.00846519332082686, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542127 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.4444444444444444, + "acc_stderr,none": 0.04292596718256981 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.375, + "acc_stderr,none": 0.039397364351956274 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.4861111111111111, + "acc_stderr,none": 0.04179596617581 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.39, + "acc_stderr,none": 0.04902071300001975 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.31, + "acc_stderr,none": 0.04648231987117316 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04690650298201942 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.55, + "acc_stderr,none": 0.049999999999999996 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.39574468085106385, + "acc_stderr,none": 0.031967586978353627 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.4413793103448276, + "acc_stderr,none": 0.04137931034482758 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.291005291005291, + "acc_stderr,none": 0.023393826500484875 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.4806451612903226, + "acc_stderr,none": 0.02842268740431211 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2955665024630542, + "acc_stderr,none": 0.032104944337514575 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.42, + "acc_stderr,none": 0.04960449637488584 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.25925925925925924, + "acc_stderr,none": 0.026719240783712173 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2913907284768212, + "acc_stderr,none": 0.03710185726119994 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3287037037037037, + "acc_stderr,none": 0.03203614084670058 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.33035714285714285, + "acc_stderr,none": 0.04464285714285713 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T10-55-18.603205_lambada_multilingual.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T10-55-18.603205_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..9f2181d4c6d92dcd2b1a2dd286e14243c96fc314 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T10-55-18.603205_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 50.33220699652879, + "perplexity_stderr,none": 2.875751810537097, + "acc,none": 0.37725596739763245, + "acc_stderr,none": 0.006752816045614378 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 4.174870790718894, + "perplexity_stderr,none": 0.09003244843526123, + "acc,none": 0.6951290510382302, + "acc_stderr,none": 0.006413613926848405 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 47.37317638732882, + "perplexity_stderr,none": 2.4684015404741317, + "acc,none": 0.40403648360178535, + "acc_stderr,none": 0.006836474546408966 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 27.656931959217978, + "perplexity_stderr,none": 1.4381410539292208, + "acc,none": 0.4867067727537357, + "acc_stderr,none": 0.0069635153076936076 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 38.527507581412365, + "perplexity_stderr,none": 2.132282061178724, + "acc,none": 0.45623908402872115, + "acc_stderr,none": 0.006939246426049399 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T13-05-13.140733_pawsx.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T13-05-13.140733_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..ec236eaa0f6c847ce0916ca32f2ee5bac8d7540b --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T13-05-13.140733_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.5482142857142858, + "acc_stderr,none": 0.004177314582530008, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.587, + "acc_stderr,none": 0.011012544577391422 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.6345, + "acc_stderr,none": 0.010770927603540936 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.6115, + "acc_stderr,none": 0.010901527262192295 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.5455, + "acc_stderr,none": 0.011136735987003725 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4855, + "acc_stderr,none": 0.011178432523249468 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.4825, + "acc_stderr,none": 0.011176284251254182 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.491, + "acc_stderr,none": 0.011181324206260288 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T13-17-39.735091_xcopa.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T13-17-39.735091_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..6a377a0699aa374d534f564f6d429e17746987fb --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T13-17-39.735091_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5974545454545455, + "acc_stderr,none": 0.006555143534329369, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.586, + "acc_stderr,none": 0.02204949796982787 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.506, + "acc_stderr,none": 0.022381462412439324 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.69, + "acc_stderr,none": 0.020704041021724805 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.668, + "acc_stderr,none": 0.021081766571222856 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.494, + "acc_stderr,none": 0.022381462412439324 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.534, + "acc_stderr,none": 0.022331264423258383 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.566, + "acc_stderr,none": 0.022187215803029004 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.57, + "acc_stderr,none": 0.02216263442665284 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.606, + "acc_stderr,none": 0.021874299301689253 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.668, + "acc_stderr,none": 0.021081766571222862 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.684, + "acc_stderr,none": 0.020812359515855864 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T17-23-32.245915_xnli.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T17-23-32.245915_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..04e98b4b07108151039bdb1d3cb92282016bcccd --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T17-23-32.245915_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.43694779116465865, + "acc_stderr,none": 0.002553863736991264, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3485943775100402, + "acc_stderr,none": 0.00955154205330182 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.45502008032128516, + "acc_stderr,none": 0.009981437307797264 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.4931726907630522, + "acc_stderr,none": 0.010021138522919162 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.41967871485943775, + "acc_stderr,none": 0.009891912665432366 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.5522088353413654, + "acc_stderr,none": 0.009967287545636128 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.43172690763052207, + "acc_stderr,none": 0.009928203186112919 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.4867469879518072, + "acc_stderr,none": 0.010018551648218457 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.44738955823293175, + "acc_stderr,none": 0.009966439091407937 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.4670682730923695, + "acc_stderr,none": 0.010000311392557843 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.37991967871485943, + "acc_stderr,none": 0.009728758452987867 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.42128514056224897, + "acc_stderr,none": 0.009897099560589201 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.4506024096385542, + "acc_stderr,none": 0.009973042774811673 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.39036144578313253, + "acc_stderr,none": 0.009778161879954582 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.43253012048192774, + "acc_stderr,none": 0.009930409027139452 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3779116465863454, + "acc_stderr,none": 0.009718712281227466 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T19-04-47.179291_xstorycloze.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T19-04-47.179291_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..3a382bc77969909233bd0415590993e6498d99c5 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T19-04-47.179291_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.6136213224234403, + "acc_stderr,none": 0.0037432233645277526, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.5731303772336201, + "acc_stderr,none": 0.012728753181936876 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.7663798808735937, + "acc_stderr,none": 0.01088901746568812 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.671078755790867, + "acc_stderr,none": 0.012090499234239516 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5678358702845797, + "acc_stderr,none": 0.012748153864597584 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.587028457974851, + "acc_stderr,none": 0.012670716290966723 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.6373262739907346, + "acc_stderr,none": 0.012372301216772916 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.5360688285903376, + "acc_stderr,none": 0.01283360240662002 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.6485771012574454, + "acc_stderr,none": 0.012285910871738331 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.5234943745863666, + "acc_stderr,none": 0.01285291253005175 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.598941098610192, + "acc_stderr,none": 0.012612688318767057 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.6399735274652548, + "acc_stderr,none": 0.012352638981498527 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T19-14-11.408082_xwinograd.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T19-14-11.408082_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..76fba0f01af7b948392cbfa81d442388cdab7e4a --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T19-14-11.408082_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.7979321195774332, + "acc_stderr,none": 0.005907201283301501, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.8705376344086021, + "acc_stderr,none": 0.006963819130882298 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6626506024096386, + "acc_stderr,none": 0.05221260262032129 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.7101147028154328, + "acc_stderr,none": 0.014658670817167402 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.7338403041825095, + "acc_stderr,none": 0.027303685972946624 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.6761904761904762, + "acc_stderr,none": 0.02640672299672999 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.7619047619047619, + "acc_stderr,none": 0.018990732054332833 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T21-53-53.131133_mmlu.json b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T21-53-53.131133_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..395bcdc6b6a40859867ce76be4654faffb672f14 --- /dev/null +++ b/lm_eval/RWKV-x070-World-1.5B-v3-20250127-ctx4096/pad_11/0.4.8_2025-03-15T21-53-53.131133_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-1.5B-v3-20250127-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.4325594644637516, + "acc_stderr,none": 0.004076131622419778, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.4148777895855473, + "acc_stderr,none": 0.006926849412307071, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.2777777777777778, + "acc_stderr,none": 0.04006168083848876 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.6, + "acc_stderr,none": 0.03825460278380025 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.6176470588235294, + "acc_stderr,none": 0.034107853389047184 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.6244725738396625, + "acc_stderr,none": 0.03152256243091157 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.5206611570247934, + "acc_stderr,none": 0.04560456086387235 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.5555555555555556, + "acc_stderr,none": 0.04803752235190193 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.43558282208588955, + "acc_stderr,none": 0.03895632464138936 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.47109826589595377, + "acc_stderr,none": 0.026874085883518348 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.27039106145251396, + "acc_stderr,none": 0.014854993938010076 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.5594855305466238, + "acc_stderr,none": 0.028196400574197426 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.5061728395061729, + "acc_stderr,none": 0.027818623962583302 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.3220338983050847, + "acc_stderr,none": 0.01193393607189109 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.6608187134502924, + "acc_stderr,none": 0.036310534964889056 + }, + "mmlu_other": { + "acc,none": 0.4734470550370132, + "acc_stderr,none": 0.008813899255282013, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.45, + "acc_stderr,none": 0.05 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.4679245283018868, + "acc_stderr,none": 0.03070948699255655 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.3988439306358382, + "acc_stderr,none": 0.03733626655383509 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.26, + "acc_stderr,none": 0.044084400227680794 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.5067264573991032, + "acc_stderr,none": 0.03355476596234355 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.5631067961165048, + "acc_stderr,none": 0.04911147107365777 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.6153846153846154, + "acc_stderr,none": 0.03187195347942466 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.48, + "acc_stderr,none": 0.050211673156867795 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.5568326947637292, + "acc_stderr,none": 0.0177640850353484 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.49019607843137253, + "acc_stderr,none": 0.028624412550167958 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.32269503546099293, + "acc_stderr,none": 0.027889139300534795 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.3492647058823529, + "acc_stderr,none": 0.028959755196824855 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.43373493975903615, + "acc_stderr,none": 0.03858158940685516 + }, + "mmlu_social_sciences": { + "acc,none": 0.48976275593110175, + "acc_stderr,none": 0.00890971099432837, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.23684210526315788, + "acc_stderr,none": 0.03999423879281336 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.4797979797979798, + "acc_stderr,none": 0.03559443565563918 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.5906735751295337, + "acc_stderr,none": 0.03548608168860806 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.4205128205128205, + "acc_stderr,none": 0.02502861027671086 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.4411764705882353, + "acc_stderr,none": 0.0322529423239964 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.5743119266055046, + "acc_stderr,none": 0.0211992359724708 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.48091603053435117, + "acc_stderr,none": 0.04382094705550988 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.45588235294117646, + "acc_stderr,none": 0.020148939420415735 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.45454545454545453, + "acc_stderr,none": 0.04769300568972744 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.47346938775510206, + "acc_stderr,none": 0.03196412734523272 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.6019900497512438, + "acc_stderr,none": 0.034611994290400135 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.6, + "acc_stderr,none": 0.04923659639173309 + }, + "mmlu_stem": { + "acc,none": 0.3628290516967967, + "acc_stderr,none": 0.00846519332082686, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542127 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.4444444444444444, + "acc_stderr,none": 0.04292596718256981 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.375, + "acc_stderr,none": 0.039397364351956274 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.4861111111111111, + "acc_stderr,none": 0.04179596617581 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.39, + "acc_stderr,none": 0.04902071300001975 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.31, + "acc_stderr,none": 0.04648231987117316 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04690650298201942 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.55, + "acc_stderr,none": 0.049999999999999996 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.39574468085106385, + "acc_stderr,none": 0.031967586978353627 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.4413793103448276, + "acc_stderr,none": 0.04137931034482758 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.291005291005291, + "acc_stderr,none": 0.023393826500484875 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.4806451612903226, + "acc_stderr,none": 0.02842268740431211 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2955665024630542, + "acc_stderr,none": 0.032104944337514575 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.42, + "acc_stderr,none": 0.04960449637488584 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.25925925925925924, + "acc_stderr,none": 0.026719240783712173 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2913907284768212, + "acc_stderr,none": 0.03710185726119994 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3287037037037037, + "acc_stderr,none": 0.03203614084670058 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.33035714285714285, + "acc_stderr,none": 0.04464285714285713 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T00-45-02.563429_lambada_openai.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T00-45-02.563429_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..c5732d7575e55029f45f2fa440c67f9b83d0c484 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T00-45-02.563429_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 3.4448142120876546, + "perplexity_stderr,none": 0.06805866415475693, + "acc,none": 0.7341354550747138, + "acc_stderr,none": 0.006155032983828544 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-25-51.899753_hellaswag.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-25-51.899753_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..d5856118aebea65c435ee69c373cffdeff3d5901 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-25-51.899753_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.5697072296355308, + "acc_stderr,none": 0.004941051795214782, + "acc_norm,none": 0.7644891455885282, + "acc_norm_stderr,none": 0.004234504924494641 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-34-35.486419_piqa.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-34-35.486419_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..5295b6e8d3f5c5297f2901193ff1a7ad25f17b72 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-34-35.486419_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.7965179542981502, + "acc_stderr,none": 0.009393041784049923, + "acc_norm,none": 0.7959738846572362, + "acc_norm_stderr,none": 0.00940237810294265 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-54-20.759902_arc_easy.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-54-20.759902_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..8f6965e7044e0176f67105f314a3cafc25ff3b09 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T03-54-20.759902_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.8101851851851852, + "acc_stderr,none": 0.008046840527852239, + "acc_norm,none": 0.8017676767676768, + "acc_norm_stderr,none": 0.008180497199102583 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T04-05-25.576984_arc_challenge.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T04-05-25.576984_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..1e77e8f5de1ef3861d9a0e743bbce28aa5a4675f --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T04-05-25.576984_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.4872013651877133, + "acc_stderr,none": 0.014606603181012538, + "acc_norm,none": 0.5136518771331058, + "acc_norm_stderr,none": 0.014605943429860945 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T11-47-35.065128_glue.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T11-47-35.065128_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..cb05f4c3e41ca125d3b4e7097e1b32ba5ef70337 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T11-47-35.065128_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.06533467288531206, + "mcc_stderr,none": 0.03193435068362558 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.41385634233316354, + "acc_stderr,none": 0.004971687419191915 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.3921887713588283, + "acc_stderr,none": 0.004924170580172808 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.7769607843137255, + "acc_stderr,none": 0.020634452949654675, + "f1,none": 0.8566929133858266, + "f1_stderr,none": 0.01498737105701562 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.5304777594728172, + "acc_stderr,none": 0.0067528301589159945 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.7338857284194905, + "acc_stderr,none": 0.0021978690051922102, + "f1,none": 0.46708603695081474, + "f1_stderr,none": 0.004299263216912846 + }, + "rte": { + "alias": "rte", + "acc,none": 0.6967509025270758, + "acc_stderr,none": 0.027668396293593703 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.9105504587155964, + "acc_stderr,none": 0.009670122820901166 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.49295774647887325, + "acc_stderr,none": 0.059755502635482904 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T11-51-44.123440_winogrande.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T11-51-44.123440_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..a45897d1f57c3e83c1739cc41a543ecc3c87ff29 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T11-51-44.123440_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.7277032359905288, + "acc_stderr,none": 0.012510697991453936 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T12-21-13.125017_sciq.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T12-21-13.125017_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..89a70b362f503c85763081e303cc93fcc83b82d0 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T12-21-13.125017_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.95, + "acc_stderr,none": 0.0068954729748978765, + "acc_norm,none": 0.927, + "acc_norm_stderr,none": 0.008230354715244054 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T17-22-27.761938_mmlu.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T17-22-27.761938_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..e22bd2200f205d267d2205e38e4718f269c19a37 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T17-22-27.761938_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.5497792337273892, + "acc_stderr,none": 0.003963981576508324, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.48437832093517535, + "acc_stderr,none": 0.0067885499316894275, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.29365079365079366, + "acc_stderr,none": 0.040735243221471255 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.6666666666666666, + "acc_stderr,none": 0.0368105086916155 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.7303921568627451, + "acc_stderr,none": 0.03114557065948678 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.7468354430379747, + "acc_stderr,none": 0.02830465794303529 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.6198347107438017, + "acc_stderr,none": 0.04431324501968431 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.6296296296296297, + "acc_stderr,none": 0.04668408033024931 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.6503067484662577, + "acc_stderr,none": 0.03746668325470021 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.5606936416184971, + "acc_stderr,none": 0.026720034380514995 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2435754189944134, + "acc_stderr,none": 0.01435591196476786 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.6655948553054662, + "acc_stderr,none": 0.02679542232789394 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.6882716049382716, + "acc_stderr,none": 0.02577311116963045 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.378748370273794, + "acc_stderr,none": 0.012389052105003732 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.783625730994152, + "acc_stderr,none": 0.031581495393387324 + }, + "mmlu_other": { + "acc,none": 0.6308336015448986, + "acc_stderr,none": 0.008394869420696602, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.49, + "acc_stderr,none": 0.05024183937956911 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.6150943396226415, + "acc_stderr,none": 0.029946498567699948 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.5606936416184971, + "acc_stderr,none": 0.03784271932887467 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.36, + "acc_stderr,none": 0.04824181513244218 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.6367713004484304, + "acc_stderr,none": 0.032277904428505 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.7281553398058253, + "acc_stderr,none": 0.044052680241409216 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.7948717948717948, + "acc_stderr,none": 0.026453508054040342 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.67, + "acc_stderr,none": 0.04725815626252607 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.7598978288633461, + "acc_stderr,none": 0.015274685213734193 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.6111111111111112, + "acc_stderr,none": 0.027914055510468008 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.4397163120567376, + "acc_stderr,none": 0.02960991207559411 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.5955882352941176, + "acc_stderr,none": 0.029812630701569736 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.463855421686747, + "acc_stderr,none": 0.038823108508905954 + }, + "mmlu_social_sciences": { + "acc,none": 0.6542086447838804, + "acc_stderr,none": 0.008364170798955382, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.32456140350877194, + "acc_stderr,none": 0.04404556157374767 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.6818181818181818, + "acc_stderr,none": 0.033184773338453315 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.7409326424870466, + "acc_stderr,none": 0.03161877917935411 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.5743589743589743, + "acc_stderr,none": 0.02506909438729654 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.6176470588235294, + "acc_stderr,none": 0.03156663099215417 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.7724770642201835, + "acc_stderr,none": 0.017974463578776502 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.7022900763358778, + "acc_stderr,none": 0.04010358942462203 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.5718954248366013, + "acc_stderr,none": 0.0200176292142131 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.6272727272727273, + "acc_stderr,none": 0.04631381319425465 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.6204081632653061, + "acc_stderr,none": 0.031067211262872492 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.8109452736318408, + "acc_stderr,none": 0.027686913588013014 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.8, + "acc_stderr,none": 0.04020151261036844 + }, + "mmlu_stem": { + "acc,none": 0.4655883285759594, + "acc_stderr,none": 0.008601542176930127, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.35, + "acc_stderr,none": 0.04793724854411019 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.5259259259259259, + "acc_stderr,none": 0.04313531696750574 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.631578947368421, + "acc_stderr,none": 0.039255233810529325 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.6180555555555556, + "acc_stderr,none": 0.040629907841466674 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.44, + "acc_stderr,none": 0.04988876515698589 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.43, + "acc_stderr,none": 0.04975698519562428 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.36, + "acc_stderr,none": 0.048241815132442176 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3137254901960784, + "acc_stderr,none": 0.046170348270067184 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.66, + "acc_stderr,none": 0.04760952285695238 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.5659574468085107, + "acc_stderr,none": 0.03240038086792747 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.5103448275862069, + "acc_stderr,none": 0.04165774775728763 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.37566137566137564, + "acc_stderr,none": 0.02494236893115978 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.6870967741935484, + "acc_stderr,none": 0.02637756702864586 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.47783251231527096, + "acc_stderr,none": 0.035145285621750094 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.52, + "acc_stderr,none": 0.05021167315686779 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.3074074074074074, + "acc_stderr,none": 0.028133252578815632 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2980132450331126, + "acc_stderr,none": 0.037345356767871984 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.38425925925925924, + "acc_stderr,none": 0.03317354514310742 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.30357142857142855, + "acc_stderr,none": 0.04364226155841044 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T19-28-15.373498_lambada_multilingual.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T19-28-15.373498_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..15e2562e45d4db6feb54173a977843eb2de68146 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T19-28-15.373498_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 36.94722881745765, + "perplexity_stderr,none": 2.040854921125967, + "acc,none": 0.4248010867455851, + "acc_stderr,none": 0.006886743547830467 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 3.444814210572178, + "perplexity_stderr,none": 0.06805866415921108, + "acc,none": 0.7341354550747138, + "acc_stderr,none": 0.006155032983828544 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 32.48268671643239, + "perplexity_stderr,none": 1.629209395318766, + "acc,none": 0.44343101106151755, + "acc_stderr,none": 0.006921251108304397 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 18.628391886241197, + "perplexity_stderr,none": 0.9264505879099262, + "acc,none": 0.5400737434504173, + "acc_stderr,none": 0.006943568216279225 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 24.800628124325186, + "perplexity_stderr,none": 1.3322764645806922, + "acc,none": 0.5049485736464195, + "acc_stderr,none": 0.006965636477805361 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T21-25-02.924067_pawsx.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T21-25-02.924067_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..3e76cd85ff35f5ea17e7c4179a25571fa05e19f8 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T21-25-02.924067_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.5822857142857143, + "acc_stderr,none": 0.0041384104160179765, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.6045, + "acc_stderr,none": 0.010936163625212346 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.608, + "acc_stderr,none": 0.010919139792442533 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.6425, + "acc_stderr,none": 0.010719343597608065 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.627, + "acc_stderr,none": 0.010816376333990086 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4675, + "acc_stderr,none": 0.011159486640120933 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.5165, + "acc_stderr,none": 0.011177045144808299 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.61, + "acc_stderr,none": 0.010909147755547927 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T21-48-36.697244_xcopa.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T21-48-36.697244_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..adbe1d7c4b3bbf48db0468a0f9f186f3d9d3bcef --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-15T21-48-36.697244_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.6314545454545455, + "acc_stderr,none": 0.006419296957185815, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.598, + "acc_stderr,none": 0.021948929609938612 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.534, + "acc_stderr,none": 0.02233126442325838 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.718, + "acc_stderr,none": 0.020143572847290795 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.724, + "acc_stderr,none": 0.020011219298073528 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.504, + "acc_stderr,none": 0.022382357781962132 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.572, + "acc_stderr,none": 0.022149790663861926 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.606, + "acc_stderr,none": 0.02187429930168925 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.568, + "acc_stderr,none": 0.02217510926561317 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.66, + "acc_stderr,none": 0.021206117013673066 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.724, + "acc_stderr,none": 0.020011219298073535 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.738, + "acc_stderr,none": 0.01968468882019472 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T06-39-19.690588_xnli.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T06-39-19.690588_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..2d878df5df9c3f8c4b3f1e96e33040cac10f8a03 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T06-39-19.690588_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.4540829986613119, + "acc_stderr,none": 0.002560362561536021, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3349397590361446, + "acc_stderr,none": 0.009460223484996472 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.478714859437751, + "acc_stderr,none": 0.010012987604500418 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.4875502008032129, + "acc_stderr,none": 0.010018965593055394 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.42530120481927713, + "acc_stderr,none": 0.009909597192221129 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.5678714859437751, + "acc_stderr,none": 0.009929309430958669 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.4971887550200803, + "acc_stderr,none": 0.010021914455122176 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.5208835341365462, + "acc_stderr,none": 0.010013327358568523 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.45461847389558235, + "acc_stderr,none": 0.00998070692297781 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.4907630522088353, + "acc_stderr,none": 0.010020362530631358 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.39718875502008033, + "acc_stderr,none": 0.00980791507067729 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.44819277108433736, + "acc_stderr,none": 0.009968129426909876 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.4742971887550201, + "acc_stderr,none": 0.010008822253312047 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.40080321285140563, + "acc_stderr,none": 0.00982285847304738 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.43132530120481927, + "acc_stderr,none": 0.009927090290379255 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.40160642570281124, + "acc_stderr,none": 0.009826103601507126 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T09-58-44.026972_xstorycloze.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T09-58-44.026972_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..2a04f8e0af682a9b871c3df7421f2d628f96e283 --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T09-58-44.026972_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.6473738042235726, + "acc_stderr,none": 0.00367521705654363, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.6121773659827928, + "acc_stderr,none": 0.012539110696551463 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.7829252150893448, + "acc_stderr,none": 0.010609046579012717 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.7154202514890801, + "acc_stderr,none": 0.011611655347089394 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.6062210456651225, + "acc_stderr,none": 0.01257341591296518 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.6194573130377233, + "acc_stderr,none": 0.012494500786685353 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.6803441429516877, + "acc_stderr,none": 0.01200099306329728 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.5665122435473197, + "acc_stderr,none": 0.012752771973917618 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.6803441429516877, + "acc_stderr,none": 0.012000993063297279 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.5612177365982793, + "acc_stderr,none": 0.012770319186938005 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.6353408338848445, + "acc_stderr,none": 0.012386781532906167 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.6611515552614163, + "acc_stderr,none": 0.012180490758739051 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T10-15-43.744395_xwinograd.json b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T10-15-43.744395_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..36ea64822d5bb1882ee7ae868a1627a0153aa97c --- /dev/null +++ b/lm_eval/RWKV-x070-World-2.9B-v3-20250211-ctx4096/pad_11/0.4.8_2025-03-16T10-15-43.744395_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-2.9B-v3-20250211-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.8242301640818162, + "acc_stderr,none": 0.005621985207160807, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.8834408602150537, + "acc_stderr,none": 0.006656467960805373 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.7710843373493976, + "acc_stderr,none": 0.04639613130999837 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.7601668404588112, + "acc_stderr,none": 0.013795146612796672 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.7680608365019012, + "acc_stderr,none": 0.026075593860304693 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.6825396825396826, + "acc_stderr,none": 0.026269018848607703 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.7996031746031746, + "acc_stderr,none": 0.017848374956947147 + } + } +} \ No newline at end of file