diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-06-20.531501_lambada_openai.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-06-20.531501_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..b52aa048bee2aebe990cdbf0dc0376a5bf8456db --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-06-20.531501_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 12.407350784089648, + "perplexity_stderr,none": 0.37458781533549945, + "acc,none": 0.48981176013972444, + "acc_stderr,none": 0.006964531366864929 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-21-44.119690_hellaswag.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-21-44.119690_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..09783945a52458c54e7efcea694d038eabb8cab8 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-21-44.119690_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.3467436765584545, + "acc_stderr,none": 0.004749606196363352, + "acc_norm,none": 0.4219279028082055, + "acc_norm_stderr,none": 0.004928578106026366 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-23-17.145963_piqa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-23-17.145963_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..91cf02a850d11d30d08c62b84ef206e62dec10ef --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-23-17.145963_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.6713819368879217, + "acc_stderr,none": 0.010959127105167044, + "acc_norm,none": 0.6637649619151251, + "acc_norm_stderr,none": 0.01102234670897023 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-26-14.237824_arc_easy.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-26-14.237824_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..5f35c741c5b8fc4808fd6bc36d18bf619f542f82 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-26-14.237824_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.5660774410774411, + "acc_stderr,none": 0.010169795770462104, + "acc_norm,none": 0.4877946127946128, + "acc_norm_stderr,none": 0.01025672623512901 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-27-50.078594_arc_challenge.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-27-50.078594_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..388fa3937791041d189d1980ecc478680e5f71de --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-27-50.078594_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.2363481228668942, + "acc_stderr,none": 0.012414960524301853, + "acc_norm,none": 0.2764505119453925, + "acc_norm_stderr,none": 0.013069662474252428 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-26-56.754045_glue.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-26-56.754045_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..27e1a15c8850671811dd5fa4e4c27e5c4afe74d1 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-26-56.754045_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.0, + "mcc_stderr,none": 0.0 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.31818644931227713, + "acc_stderr,none": 0.004701653585969689 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.31834825061025224, + "acc_stderr,none": 0.004698223389253123 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.31862745098039214, + "acc_stderr,none": 0.023095996571841474, + "f1,none": 0.027972027972027972, + "f1_stderr,none": 0.013657795743166324 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.49203734211971445, + "acc_stderr,none": 0.006764552590269392 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.4093742270591145, + "acc_stderr,none": 0.0024455128327978796, + "f1,none": 0.5085108572604713, + "f1_stderr,none": 0.002764972824540013 + }, + "rte": { + "alias": "rte", + "acc,none": 0.5631768953068592, + "acc_stderr,none": 0.029855247390314938 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.8188073394495413, + "acc_stderr,none": 0.013051249343626403 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.4647887323943662, + "acc_stderr,none": 0.05961305784972239 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-27-55.978255_winogrande.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-27-55.978255_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..2385efed792c46cd1764589cdca4cf70dd1cb1a7 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-27-55.978255_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.5256511444356748, + "acc_stderr,none": 0.014033980956108558 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-29-37.701435_sciq.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-29-37.701435_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..a503833a0266c41e321c5bfb39a762b7f7bc4281 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-29-37.701435_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.862, + "acc_stderr,none": 0.010912152632504417, + "acc_norm,none": 0.79, + "acc_norm_stderr,none": 0.012886662332274527 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-50-27.279294_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-50-27.279294_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..a62aa6ca0f268d396fb8acb8c9cbb760c92cbef5 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-50-27.279294_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.2577980344680245, + "acc_stderr,none": 0.003687241789650295, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2459086078639745, + "acc_stderr,none": 0.006272632557690886, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04216370213557836 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.25, + "acc_stderr,none": 0.03039153369274154 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2742616033755274, + "acc_stderr,none": 0.02904133351059801 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2396694214876033, + "acc_stderr,none": 0.03896878985070417 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.21296296296296297, + "acc_stderr,none": 0.0395783547198098 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.2392638036809816, + "acc_stderr,none": 0.03351953879521271 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.22832369942196531, + "acc_stderr,none": 0.022598703804321635 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2547486033519553, + "acc_stderr,none": 0.014572650383409158 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.1832797427652733, + "acc_stderr,none": 0.021974198848265805 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.21604938271604937, + "acc_stderr,none": 0.022899162918445785 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2503259452411995, + "acc_stderr,none": 0.011064151027165441 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.031885780176863984 + }, + "mmlu_other": { + "acc,none": 0.2581268104280657, + "acc_stderr,none": 0.007836786127741097, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.2943396226415094, + "acc_stderr,none": 0.028049186315695245 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2543352601156069, + "acc_stderr,none": 0.0332055644308557 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.2, + "acc_stderr,none": 0.04020151261036843 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.28699551569506726, + "acc_stderr,none": 0.03036037971029195 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.33980582524271846, + "acc_stderr,none": 0.046897659372781356 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2606837606837607, + "acc_stderr,none": 0.028760348956523414 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.21966794380587484, + "acc_stderr,none": 0.014805384478371158 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.28104575163398693, + "acc_stderr,none": 0.025738854797818705 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.25886524822695034, + "acc_stderr,none": 0.026129572527180848 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.3235294117647059, + "acc_stderr,none": 0.028418208619406794 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.21084337349397592, + "acc_stderr,none": 0.031755547866299194 + }, + "mmlu_social_sciences": { + "acc,none": 0.2612934676633084, + "acc_stderr,none": 0.007931489440202161, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.23684210526315788, + "acc_stderr,none": 0.039994238792813344 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2878787878787879, + "acc_stderr,none": 0.03225883512300992 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.25906735751295334, + "acc_stderr,none": 0.0316187791793541 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.26153846153846155, + "acc_stderr,none": 0.022282141204204426 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2857142857142857, + "acc_stderr,none": 0.029344572500634335 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.25504587155963304, + "acc_stderr,none": 0.018688500856535832 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2748091603053435, + "acc_stderr,none": 0.03915345408847836 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.25980392156862747, + "acc_stderr,none": 0.017740899509177795 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.2636363636363636, + "acc_stderr,none": 0.04220224692971987 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.24081632653061225, + "acc_stderr,none": 0.027372942201788163 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.2736318407960199, + "acc_stderr,none": 0.031524391865554016 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816505 + }, + "mmlu_stem": { + "acc,none": 0.2718046305106248, + "acc_stderr,none": 0.00790588931133328, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.03591444084196969 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.27631578947368424, + "acc_stderr,none": 0.03639057569952924 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2569444444444444, + "acc_stderr,none": 0.03653946969442099 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.36, + "acc_stderr,none": 0.04824181513244218 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.27, + "acc_stderr,none": 0.04461960433384741 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3431372549019608, + "acc_stderr,none": 0.047240073523838896 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.20851063829787234, + "acc_stderr,none": 0.026556982117838725 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.2, + "acc_stderr,none": 0.033333333333333284 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2724867724867725, + "acc_stderr,none": 0.022930973071633356 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2903225806451613, + "acc_stderr,none": 0.025822106119415898 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2561576354679803, + "acc_stderr,none": 0.030712730070982592 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816508 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.3, + "acc_stderr,none": 0.027940457136228416 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2781456953642384, + "acc_stderr,none": 0.03658603262763743 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.36574074074074076, + "acc_stderr,none": 0.032847388576472056 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.24107142857142858, + "acc_stderr,none": 0.04059867246952687 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-00-05.534236_lambada_multilingual.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-00-05.534236_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..2f33e9f7fbb8c434762e22a20ab96157c4ef3d45 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-00-05.534236_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 217.60668191767158, + "perplexity_stderr,none": 13.386879667864363, + "acc,none": 0.24238307781874635, + "acc_stderr,none": 0.00597018864415414 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 12.40735077022245, + "perplexity_stderr,none": 0.37458781378621286, + "acc,none": 0.48981176013972444, + "acc_stderr,none": 0.006964531366864929 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 266.1166411896921, + "perplexity_stderr,none": 15.92044089608634, + "acc,none": 0.2501455462837182, + "acc_stderr,none": 0.006033883877757193 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 128.38541768361372, + "perplexity_stderr,none": 7.512376165987988, + "acc,none": 0.31476809625460894, + "acc_stderr,none": 0.0064703267662255814 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 209.98639759161097, + "perplexity_stderr,none": 13.315525534037729, + "acc,none": 0.2850766543760916, + "acc_stderr,none": 0.006289594388741717 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-15-59.241658_pawsx.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-15-59.241658_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..be80cbc6a1c14de4b7962707d0f185f0e0ebb01c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-15-59.241658_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.4645714285714286, + "acc_stderr,none": 0.004212978865599906, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.4535, + "acc_stderr,none": 0.011134669525078671 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.466, + "acc_stderr,none": 0.01115725065242577 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.508, + "acc_stderr,none": 0.01118170448803001 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.4715, + "acc_stderr,none": 0.011164954236428791 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4455, + "acc_stderr,none": 0.011116504096687392 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.4555, + "acc_stderr,none": 0.011138757154883975 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.452, + "acc_stderr,none": 0.01113148485052578 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-20-10.333493_xcopa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-20-10.333493_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..3fed72d49e0b524ae4bfdb41a16cadb2c9694afe --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-20-10.333493_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5296363636363637, + "acc_stderr,none": 0.006732007807068195, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.496, + "acc_stderr,none": 0.02238235778196214 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.526, + "acc_stderr,none": 0.02235279165091416 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.56, + "acc_stderr,none": 0.02222133153414303 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.524, + "acc_stderr,none": 0.022357273881016403 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.5, + "acc_stderr,none": 0.022383074051792257 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.528, + "acc_stderr,none": 0.022347949832668093 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.534, + "acc_stderr,none": 0.02233126442325838 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.554, + "acc_stderr,none": 0.022252153078595897 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.524, + "acc_stderr,none": 0.0223572738810164 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.532, + "acc_stderr,none": 0.022337186479044296 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.548, + "acc_stderr,none": 0.02227969410784342 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-31-47.400032_xnli.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-31-47.400032_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..d99bbc1dd137a9494d87ffa0781c9e5e550bcaab --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-31-47.400032_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.3738152610441767, + "acc_stderr,none": 0.0024958379883944465, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3385542168674699, + "acc_stderr,none": 0.009485250208516881 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.3646586345381526, + "acc_stderr,none": 0.009647934990250467 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.41405622489959837, + "acc_stderr,none": 0.009872910116421196 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3417670682730924, + "acc_stderr,none": 0.009506977398287627 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4682730923694779, + "acc_stderr,none": 0.010001876146466708 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.37751004016064255, + "acc_stderr,none": 0.009716681793584016 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.42610441767068274, + "acc_stderr,none": 0.00991201637745908 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3542168674698795, + "acc_stderr,none": 0.009586620142951845 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.42289156626506025, + "acc_stderr,none": 0.009902179034797433 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.351004016064257, + "acc_stderr,none": 0.0095667538348033 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.3485943775100402, + "acc_stderr,none": 0.009551542053301817 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.3550200803212851, + "acc_stderr,none": 0.009591512730974291 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.336144578313253, + "acc_stderr,none": 0.009468634669293527 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.37028112449799194, + "acc_stderr,none": 0.009678915409840288 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3381526104417671, + "acc_stderr,none": 0.009482500057981024 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-48-34.071254_xstorycloze.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-48-34.071254_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..bd6490e6e3195d9f4a8099fa01771ce3c31e98a9 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-48-34.071254_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5253594849888695, + "acc_stderr,none": 0.0038631877983218758, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.47650562541363334, + "acc_stderr,none": 0.012852912530051752 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6307081403044341, + "acc_stderr,none": 0.01241968588127358 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5433487756452681, + "acc_stderr,none": 0.01281867645248196 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5201853077432164, + "acc_stderr,none": 0.012856635706498292 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.5129053606882858, + "acc_stderr,none": 0.012862838605728474 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5069490403706155, + "acc_stderr,none": 0.01286588257096072 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.5016545334215751, + "acc_stderr,none": 0.012867054869163341 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.5155526141628061, + "acc_stderr,none": 0.012860899111470784 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.49636002647253474, + "acc_stderr,none": 0.01286678434828923 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5360688285903376, + "acc_stderr,none": 0.012833602406620017 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5387160820648577, + "acc_stderr,none": 0.012828493353271535 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-51-40.216025_xwinograd.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-51-40.216025_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..8ee8fdff54ce3d3e9c58125f8c79c19e9c155f13 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-51-40.216025_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.640143852551135, + "acc_stderr,none": 0.007122015764479138, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.7002150537634408, + "acc_stderr,none": 0.00950391558599881 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6144578313253012, + "acc_stderr,none": 0.0537495779731939 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5464025026068822, + "acc_stderr,none": 0.016084549821364785 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.5247148288973384, + "acc_stderr,none": 0.030852343325490784 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.5650793650793651, + "acc_stderr,none": 0.027976598287184684 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.6527777777777778, + "acc_stderr,none": 0.021227675707409237 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T11-47-43.816593_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T11-47-43.816593_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..596d77069f45a337787dbad08ef3808f4e9e5975 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T11-47-43.816593_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 0, + "pad_token_ids": [ + 0 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.2577980344680245, + "acc_stderr,none": 0.003687241789650295, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2459086078639745, + "acc_stderr,none": 0.006272632557690886, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04216370213557836 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.25, + "acc_stderr,none": 0.03039153369274154 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2742616033755274, + "acc_stderr,none": 0.02904133351059801 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2396694214876033, + "acc_stderr,none": 0.03896878985070417 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.21296296296296297, + "acc_stderr,none": 0.0395783547198098 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.2392638036809816, + "acc_stderr,none": 0.03351953879521271 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.22832369942196531, + "acc_stderr,none": 0.022598703804321635 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2547486033519553, + "acc_stderr,none": 0.014572650383409158 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.1832797427652733, + "acc_stderr,none": 0.021974198848265805 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.21604938271604937, + "acc_stderr,none": 0.022899162918445785 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2503259452411995, + "acc_stderr,none": 0.011064151027165441 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.031885780176863984 + }, + "mmlu_other": { + "acc,none": 0.2581268104280657, + "acc_stderr,none": 0.007836786127741097, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.2943396226415094, + "acc_stderr,none": 0.028049186315695245 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.2543352601156069, + "acc_stderr,none": 0.0332055644308557 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.2, + "acc_stderr,none": 0.04020151261036843 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.28699551569506726, + "acc_stderr,none": 0.03036037971029195 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.33980582524271846, + "acc_stderr,none": 0.046897659372781356 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2606837606837607, + "acc_stderr,none": 0.028760348956523414 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.21966794380587484, + "acc_stderr,none": 0.014805384478371158 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.28104575163398693, + "acc_stderr,none": 0.025738854797818705 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.25886524822695034, + "acc_stderr,none": 0.026129572527180848 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.3235294117647059, + "acc_stderr,none": 0.028418208619406794 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.21084337349397592, + "acc_stderr,none": 0.031755547866299194 + }, + "mmlu_social_sciences": { + "acc,none": 0.2612934676633084, + "acc_stderr,none": 0.007931489440202161, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.23684210526315788, + "acc_stderr,none": 0.039994238792813344 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2878787878787879, + "acc_stderr,none": 0.03225883512300992 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.25906735751295334, + "acc_stderr,none": 0.0316187791793541 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.26153846153846155, + "acc_stderr,none": 0.022282141204204426 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2857142857142857, + "acc_stderr,none": 0.029344572500634335 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.25504587155963304, + "acc_stderr,none": 0.018688500856535832 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2748091603053435, + "acc_stderr,none": 0.03915345408847836 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.25980392156862747, + "acc_stderr,none": 0.017740899509177795 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.2636363636363636, + "acc_stderr,none": 0.04220224692971987 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.24081632653061225, + "acc_stderr,none": 0.027372942201788163 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.2736318407960199, + "acc_stderr,none": 0.031524391865554016 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816505 + }, + "mmlu_stem": { + "acc,none": 0.2718046305106248, + "acc_stderr,none": 0.00790588931133328, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.03591444084196969 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.27631578947368424, + "acc_stderr,none": 0.03639057569952924 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2569444444444444, + "acc_stderr,none": 0.03653946969442099 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.36, + "acc_stderr,none": 0.04824181513244218 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.27, + "acc_stderr,none": 0.04461960433384741 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3431372549019608, + "acc_stderr,none": 0.047240073523838896 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.20851063829787234, + "acc_stderr,none": 0.026556982117838725 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.2, + "acc_stderr,none": 0.033333333333333284 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2724867724867725, + "acc_stderr,none": 0.022930973071633356 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2903225806451613, + "acc_stderr,none": 0.025822106119415898 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2561576354679803, + "acc_stderr,none": 0.030712730070982592 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816508 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.3, + "acc_stderr,none": 0.027940457136228416 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2781456953642384, + "acc_stderr,none": 0.03658603262763743 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.36574074074074076, + "acc_stderr,none": 0.032847388576472056 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.24107142857142858, + "acc_stderr,none": 0.04059867246952687 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T11-56-41.258584_lambada_openai.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T11-56-41.258584_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..90875a642546e95d982e4b2d4b49aecde056bb97 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T11-56-41.258584_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 12.642543091460501, + "perplexity_stderr,none": 0.38123879021241674, + "acc,none": 0.4814671065398797, + "acc_stderr,none": 0.006961190829726007 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-14-40.804741_hellaswag.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-14-40.804741_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..b60d5c3403ddcefd04e8eae5b0d19115507db794 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-14-40.804741_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.34475204142601074, + "acc_stderr,none": 0.00474316003427116, + "acc_norm,none": 0.4213304122684724, + "acc_norm_stderr,none": 0.0049276318064775575 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-16-12.146728_piqa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-16-12.146728_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..0f57f2566d33f7792f8d146a55aa499c0afd0a74 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-16-12.146728_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.6730141458106638, + "acc_stderr,none": 0.010945157126978227, + "acc_norm,none": 0.6653971708378672, + "acc_norm_stderr,none": 0.011009071725162503 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-19-04.686563_arc_easy.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-19-04.686563_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..495cdccb0bdb9e4212537571205443c0c2393a8a --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-19-04.686563_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.593013468013468, + "acc_stderr,none": 0.010080695355466593, + "acc_norm,none": 0.5437710437710438, + "acc_norm_stderr,none": 0.010220394383722018 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-20-40.573806_arc_challenge.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-20-40.573806_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..446ed510b3844d0bd72b357ff34bb1278f17c51f --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-20-40.573806_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.2551194539249147, + "acc_stderr,none": 0.0127390386952021, + "acc_norm,none": 0.2909556313993174, + "acc_norm_stderr,none": 0.013273077865907593 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-19-33.146311_glue.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-19-33.146311_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..bb052b05fed9e4c74a85d4d97828752c279fe8bc --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-19-33.146311_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.0, + "mcc_stderr,none": 0.0 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.31900152827305145, + "acc_stderr,none": 0.00470485695337838 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.31682262001627337, + "acc_stderr,none": 0.0046921942364258435 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.3799019607843137, + "acc_stderr,none": 0.024058510831539842, + "f1,none": 0.24925816023738873, + "f1_stderr,none": 0.031299043218421745 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.48892549881017755, + "acc_stderr,none": 0.006763750866374647 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.4644076181053673, + "acc_stderr,none": 0.002480392237462389, + "f1,none": 0.46043057908900625, + "f1_stderr,none": 0.0030958101543491746 + }, + "rte": { + "alias": "rte", + "acc,none": 0.5523465703971119, + "acc_stderr,none": 0.02993107036293953 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.801605504587156, + "acc_stderr,none": 0.013512511513295078 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.5211267605633803, + "acc_stderr,none": 0.05970805879899504 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-20-31.009617_winogrande.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-20-31.009617_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..035b17b65864100110deab86ec2a690d5dec5010 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-20-31.009617_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.5272296764009471, + "acc_stderr,none": 0.014031631629827698 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-22-11.575335_sciq.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-22-11.575335_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..856be2043d1663b310f36f9b145cdd90f98d45af --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-22-11.575335_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.863, + "acc_stderr,none": 0.010878848714333299, + "acc_norm,none": 0.81, + "acc_norm_stderr,none": 0.012411851354816324 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-41-59.570521_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-41-59.570521_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..304007e5094144eef043aa13fa45378122946035 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-41-59.570521_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.25409485828229594, + "acc_stderr,none": 0.003673285974361475, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2454835281615303, + "acc_stderr,none": 0.006269554905683639, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3412698412698413, + "acc_stderr,none": 0.04240799327574924 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.24509803921568626, + "acc_stderr,none": 0.030190282453501943 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2742616033755274, + "acc_stderr,none": 0.02904133351059801 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.23140495867768596, + "acc_stderr,none": 0.03849856098794088 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.21296296296296297, + "acc_stderr,none": 0.039578354719809805 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.26380368098159507, + "acc_stderr,none": 0.03462419931615624 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.2398843930635838, + "acc_stderr,none": 0.022989592543123567 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.24581005586592178, + "acc_stderr,none": 0.01440029642922559 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.1864951768488746, + "acc_stderr,none": 0.022122439772480764 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2191358024691358, + "acc_stderr,none": 0.023016705640262175 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.24771838331160365, + "acc_stderr,none": 0.011025499291443735 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.22807017543859648, + "acc_stderr,none": 0.03218093795602357 + }, + "mmlu_other": { + "acc,none": 0.25523012552301255, + "acc_stderr,none": 0.00781936823628676, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.26037735849056604, + "acc_stderr,none": 0.02700876609070809 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.24277456647398843, + "acc_stderr,none": 0.0326926380614177 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.22, + "acc_stderr,none": 0.0416333199893227 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.32286995515695066, + "acc_stderr,none": 0.03138147637575498 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.2912621359223301, + "acc_stderr,none": 0.04498676320572924 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2863247863247863, + "acc_stderr,none": 0.029614323690456648 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.23371647509578544, + "acc_stderr,none": 0.015133383278988832 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.23529411764705882, + "acc_stderr,none": 0.024288619466046116 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.2553191489361702, + "acc_stderr,none": 0.02601199293090201 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.2977941176470588, + "acc_stderr,none": 0.027778298701545443 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.22289156626506024, + "acc_stderr,none": 0.03240004825594688 + }, + "mmlu_social_sciences": { + "acc,none": 0.25901852453688656, + "acc_stderr,none": 0.007907951917759793, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2719298245614035, + "acc_stderr,none": 0.041857744240220575 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2828282828282828, + "acc_stderr,none": 0.032087795587867514 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.26424870466321243, + "acc_stderr,none": 0.03182155050916647 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.25384615384615383, + "acc_stderr,none": 0.022066054378726253 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2689075630252101, + "acc_stderr,none": 0.028801392193631276 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.24403669724770644, + "acc_stderr,none": 0.018415286351416406 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2900763358778626, + "acc_stderr,none": 0.03980066246467765 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.26143790849673204, + "acc_stderr,none": 0.017776947157528027 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072774 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.24489795918367346, + "acc_stderr,none": 0.027529637440174923 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.2835820895522388, + "acc_stderr,none": 0.03187187537919797 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.23, + "acc_stderr,none": 0.042295258468165065 + }, + "mmlu_stem": { + "acc,none": 0.26102124960355216, + "acc_stderr,none": 0.007821062178349759, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.19, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.21481481481481482, + "acc_stderr,none": 0.035478541985608236 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.2894736842105263, + "acc_stderr,none": 0.036906779861372814 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2361111111111111, + "acc_stderr,none": 0.03551446610810826 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.34, + "acc_stderr,none": 0.047609522856952365 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542128 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.28431372549019607, + "acc_stderr,none": 0.04488482852329017 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.2170212765957447, + "acc_stderr,none": 0.02694748312149622 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.2206896551724138, + "acc_stderr,none": 0.03455930201924813 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2671957671957672, + "acc_stderr,none": 0.022789673145776575 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2806451612903226, + "acc_stderr,none": 0.025560604721022888 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2561576354679803, + "acc_stderr,none": 0.030712730070982592 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909282 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.2740740740740741, + "acc_stderr,none": 0.027195934804085622 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.24503311258278146, + "acc_stderr,none": 0.03511807571804726 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3287037037037037, + "acc_stderr,none": 0.03203614084670058 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.25, + "acc_stderr,none": 0.04109974682633932 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-51-36.484339_lambada_multilingual.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-51-36.484339_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..4966623433e989b1ad7b36d104ec4a4fd210d798 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-51-36.484339_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 212.25983991266492, + "perplexity_stderr,none": 13.036191156227552, + "acc,none": 0.2456821269163594, + "acc_stderr,none": 0.0059975800540142 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 12.64254308002806, + "perplexity_stderr,none": 0.3812387886051039, + "acc,none": 0.4814671065398797, + "acc_stderr,none": 0.006961190829726007 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 266.87295366430163, + "perplexity_stderr,none": 15.932942020346013, + "acc,none": 0.2536386570929556, + "acc_stderr,none": 0.006061698956508257 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 129.37466016136514, + "perplexity_stderr,none": 7.571204962328844, + "acc,none": 0.31593246652435475, + "acc_stderr,none": 0.0064767732277837935 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 209.52126390403453, + "perplexity_stderr,none": 13.299710278444714, + "acc,none": 0.2856588395109645, + "acc_stderr,none": 0.00629344939005611 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-07-25.400235_pawsx.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-07-25.400235_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..a989d4c53774480cfd0bdfdf764e70f9c562132a --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-07-25.400235_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.4612857142857143, + "acc_stderr,none": 0.004212518419096327, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.451, + "acc_stderr,none": 0.011129305041886329 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.4705, + "acc_stderr,none": 0.011163654804511655 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.4885, + "acc_stderr,none": 0.011180177690296084 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.4585, + "acc_stderr,none": 0.011144549137930344 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4415, + "acc_stderr,none": 0.011106329288974695 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.459, + "acc_stderr,none": 0.011145474902641254 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.46, + "acc_stderr,none": 0.01114729254418001 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-57-32.308002_xcopa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-57-32.308002_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..5c1eed7c6ffba6fb17a937c8213cab6e18b62a45 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-57-32.308002_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5330909090909091, + "acc_stderr,none": 0.0067289909324144215, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.496, + "acc_stderr,none": 0.02238235778196214 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.544, + "acc_stderr,none": 0.022296238348407053 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.556, + "acc_stderr,none": 0.022242244375731024 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.532, + "acc_stderr,none": 0.022337186479044292 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.502, + "acc_stderr,none": 0.022382894986483524 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.528, + "acc_stderr,none": 0.022347949832668093 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.538, + "acc_stderr,none": 0.022318338119870523 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.56, + "acc_stderr,none": 0.022221331534143015 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.526, + "acc_stderr,none": 0.022352791650914167 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.534, + "acc_stderr,none": 0.022331264423258383 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.548, + "acc_stderr,none": 0.02227969410784342 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-02-57.684170_xnli.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-02-57.684170_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..298802cf79f75f86637592c86b96cd41039324f0 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-02-57.684170_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.37566265060240966, + "acc_stderr,none": 0.0024964839295963676, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3405622489959839, + "acc_stderr,none": 0.009498886690274442 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.37269076305220883, + "acc_stderr,none": 0.009691761259693463 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.41686746987951806, + "acc_stderr,none": 0.009882576606533236 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3369477911646586, + "acc_stderr,none": 0.009474203778757712 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4827309236947791, + "acc_stderr,none": 0.010016093498409703 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.3815261044176707, + "acc_stderr,none": 0.00973666813309817 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.42730923694779116, + "acc_stderr,none": 0.009915595034908124 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3514056224899598, + "acc_stderr,none": 0.00956926307982396 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.43333333333333335, + "acc_stderr,none": 0.009932588282324236 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3465863453815261, + "acc_stderr,none": 0.009538660220459 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.3397590361445783, + "acc_stderr,none": 0.009493454925438249 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.3598393574297189, + "acc_stderr,none": 0.009620250217765997 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3345381526104418, + "acc_stderr,none": 0.009457404390939166 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.3690763052208835, + "acc_stderr,none": 0.00967239564447043 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3417670682730924, + "acc_stderr,none": 0.009506977398287621 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-19-00.595561_xstorycloze.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-19-00.595561_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..6fce1b3d64dccdd039afd6db2929e3c999b0e6fe --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-19-00.595561_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5257204740990313, + "acc_stderr,none": 0.003863789045312555, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.4818001323626737, + "acc_stderr,none": 0.012858598401831848 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6293845135671741, + "acc_stderr,none": 0.012428861084065903 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5400397088021178, + "acc_stderr,none": 0.012825802370083987 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5201853077432164, + "acc_stderr,none": 0.012856635706498292 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.514890800794176, + "acc_stderr,none": 0.012861417842074004 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5069490403706155, + "acc_stderr,none": 0.012865882570960722 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.5029781601588352, + "acc_stderr,none": 0.012866897066011233 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.5129053606882858, + "acc_stderr,none": 0.012862838605728476 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.4990072799470549, + "acc_stderr,none": 0.012867099955422926 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5340833884844474, + "acc_stderr,none": 0.012837195610619434 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5407015221707479, + "acc_stderr,none": 0.012824422739625578 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-21-46.468616_xwinograd.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-21-46.468616_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..4fe23dc8baf177d714918ccad697e451a159d9e2 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-21-46.468616_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.6414924702180266, + "acc_stderr,none": 0.007097604851218048, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.7088172043010753, + "acc_stderr,none": 0.009423927122193903 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6144578313253012, + "acc_stderr,none": 0.0537495779731939 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5338894681960376, + "acc_stderr,none": 0.016117117806017902 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.532319391634981, + "acc_stderr,none": 0.030825503526303782 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.5523809523809524, + "acc_stderr,none": 0.0280613656383537 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.6527777777777778, + "acc_stderr,none": 0.021227675707409233 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T17-22-18.851434_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T17-22-18.851434_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..6e120a6975053e70fc1870b2b467f2ec5354a768 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T17-22-18.851434_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.25409485828229594, + "acc_stderr,none": 0.003673285974361475, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2454835281615303, + "acc_stderr,none": 0.006269554905683639, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3412698412698413, + "acc_stderr,none": 0.04240799327574924 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.24509803921568626, + "acc_stderr,none": 0.030190282453501943 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2742616033755274, + "acc_stderr,none": 0.02904133351059801 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.23140495867768596, + "acc_stderr,none": 0.03849856098794088 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.21296296296296297, + "acc_stderr,none": 0.039578354719809805 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.26380368098159507, + "acc_stderr,none": 0.03462419931615624 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.2398843930635838, + "acc_stderr,none": 0.022989592543123567 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.24581005586592178, + "acc_stderr,none": 0.01440029642922559 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.1864951768488746, + "acc_stderr,none": 0.022122439772480764 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2191358024691358, + "acc_stderr,none": 0.023016705640262175 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.24771838331160365, + "acc_stderr,none": 0.011025499291443735 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.22807017543859648, + "acc_stderr,none": 0.03218093795602357 + }, + "mmlu_other": { + "acc,none": 0.25523012552301255, + "acc_stderr,none": 0.00781936823628676, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.26037735849056604, + "acc_stderr,none": 0.02700876609070809 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.24277456647398843, + "acc_stderr,none": 0.0326926380614177 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.22, + "acc_stderr,none": 0.0416333199893227 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.32286995515695066, + "acc_stderr,none": 0.03138147637575498 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.2912621359223301, + "acc_stderr,none": 0.04498676320572924 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.2863247863247863, + "acc_stderr,none": 0.029614323690456648 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.23371647509578544, + "acc_stderr,none": 0.015133383278988832 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.23529411764705882, + "acc_stderr,none": 0.024288619466046116 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.2553191489361702, + "acc_stderr,none": 0.02601199293090201 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.2977941176470588, + "acc_stderr,none": 0.027778298701545443 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.22289156626506024, + "acc_stderr,none": 0.03240004825594688 + }, + "mmlu_social_sciences": { + "acc,none": 0.25901852453688656, + "acc_stderr,none": 0.007907951917759795, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2719298245614035, + "acc_stderr,none": 0.041857744240220575 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2828282828282828, + "acc_stderr,none": 0.032087795587867514 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.26424870466321243, + "acc_stderr,none": 0.03182155050916647 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.25384615384615383, + "acc_stderr,none": 0.022066054378726253 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2689075630252101, + "acc_stderr,none": 0.028801392193631276 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.24403669724770644, + "acc_stderr,none": 0.018415286351416406 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.2900763358778626, + "acc_stderr,none": 0.03980066246467765 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.26143790849673204, + "acc_stderr,none": 0.017776947157528027 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072774 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.24489795918367346, + "acc_stderr,none": 0.027529637440174923 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.2835820895522388, + "acc_stderr,none": 0.03187187537919797 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.23, + "acc_stderr,none": 0.042295258468165065 + }, + "mmlu_stem": { + "acc,none": 0.26102124960355216, + "acc_stderr,none": 0.007821062178349759, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.19, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.21481481481481482, + "acc_stderr,none": 0.035478541985608236 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.2894736842105263, + "acc_stderr,none": 0.036906779861372814 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.2361111111111111, + "acc_stderr,none": 0.03551446610810826 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.34, + "acc_stderr,none": 0.047609522856952365 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542128 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.25, + "acc_stderr,none": 0.04351941398892446 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.28431372549019607, + "acc_stderr,none": 0.04488482852329017 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.2170212765957447, + "acc_stderr,none": 0.02694748312149622 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.2206896551724138, + "acc_stderr,none": 0.03455930201924813 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2671957671957672, + "acc_stderr,none": 0.022789673145776575 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2806451612903226, + "acc_stderr,none": 0.025560604721022888 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2561576354679803, + "acc_stderr,none": 0.030712730070982592 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909282 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.2740740740740741, + "acc_stderr,none": 0.027195934804085622 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.24503311258278146, + "acc_stderr,none": 0.03511807571804726 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3287037037037037, + "acc_stderr,none": 0.03203614084670058 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.25, + "acc_stderr,none": 0.04109974682633932 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T18-54-53.797865_lambada_openai_.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T18-54-53.797865_lambada_openai_.json new file mode 100644 index 0000000000000000000000000000000000000000..247377c7416e393326d55a6e6fcc69f798316c08 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T18-54-53.797865_lambada_openai_.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": 11, + "pad_token_ids": [ + 11 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 12.642543877737143, + "perplexity_stderr,none": 0.3812387951513916, + "acc,none": 0.4814671065398797, + "acc_stderr,none": 0.006961190829726007 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T09-57-49.932590_lambada_openai.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T09-57-49.932590_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..4a3f0718973ae5aa4bda23d4acba3b4180962ef0 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T09-57-49.932590_lambada_openai.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 12.62972464596384, + "perplexity_stderr,none": 0.3806139731054351, + "acc,none": 0.48282553852124976, + "acc_stderr,none": 0.006961867045185065 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-13-21.086739_hellaswag.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-13-21.086739_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..4f8e1a251269e93b1069eb537414c46efd590742 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-13-21.086739_hellaswag.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.3451503684524995, + "acc_stderr,none": 0.004744456628455124, + "acc_norm,none": 0.4197371041625174, + "acc_norm_stderr,none": 0.004925072159723834 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-14-54.756945_piqa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-14-54.756945_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..671f548013092c24dc4d07f0d342b7993f255d9b --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-14-54.756945_piqa.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.6735582154515778, + "acc_stderr,none": 0.010940467046177306, + "acc_norm,none": 0.6664853101196954, + "acc_norm_stderr,none": 0.011000139592184566 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-17-57.209289_arc_easy.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-17-57.209289_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..44c4b9fb0a8400236faa876e3f85c9d00da3cd20 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-17-57.209289_arc_easy.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.5854377104377104, + "acc_stderr,none": 0.010108889212447767, + "acc_norm,none": 0.5420875420875421, + "acc_norm_stderr,none": 0.010223371342195895 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-19-33.710789_arc_challenge.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-19-33.710789_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..9d957c0dbccfb2e2ee9eb0bf88ad2c6c839e4420 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-19-33.710789_arc_challenge.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.2551194539249147, + "acc_stderr,none": 0.012739038695202102, + "acc_norm,none": 0.2935153583617747, + "acc_norm_stderr,none": 0.013307250444941113 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-39-38.019272_xnli.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-39-38.019272_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..c693d62c46a6aaa54a55ab0158a0eeb4e375dd8a --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-39-38.019272_xnli.json @@ -0,0 +1,100 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.3731191432396252, + "acc_stderr,none": 0.0024949681237011402, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3373493975903614, + "acc_stderr,none": 0.009476976849778591 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.3682730923694779, + "acc_stderr,none": 0.009668013178998446 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.40602409638554215, + "acc_stderr,none": 0.00984346200738422 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3369477911646586, + "acc_stderr,none": 0.009474203778757713 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4674698795180723, + "acc_stderr,none": 0.010000839483876027 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.3923694779116466, + "acc_stderr,none": 0.009787120838990105 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.42208835341365464, + "acc_stderr,none": 0.009899652714895416 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.351004016064257, + "acc_stderr,none": 0.009566753834803288 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.42208835341365464, + "acc_stderr,none": 0.009899652714895424 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3481927710843373, + "acc_stderr,none": 0.009548980649153391 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.35180722891566263, + "acc_stderr,none": 0.009571764897113621 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.35180722891566263, + "acc_stderr,none": 0.009571764897113625 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3349397590361446, + "acc_stderr,none": 0.009460223484996469 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.3674698795180723, + "acc_stderr,none": 0.009663601903728034 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3389558232931727, + "acc_stderr,none": 0.009487992732201522 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-56-32.162685_xstorycloze.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-56-32.162685_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..516b5a937522c418ad9affac5577a5cb03e1a1ec --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-56-32.162685_xstorycloze.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5243366825100776, + "acc_stderr,none": 0.0038648541830817408, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.4824619457313038, + "acc_stderr,none": 0.012859207453266304 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6260754467240238, + "acc_stderr,none": 0.012451361842944465 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5367306419589676, + "acc_stderr,none": 0.012832359240206969 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5195234943745863, + "acc_stderr,none": 0.01285731253183686 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.514228987425546, + "acc_stderr,none": 0.012861913999596129 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5049636002647253, + "acc_stderr,none": 0.012866491277589943 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.5036399735274653, + "acc_stderr,none": 0.01286678434828923 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.5122435473196558, + "acc_stderr,none": 0.012863267059205548 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.49702183984116477, + "acc_stderr,none": 0.012866897066011233 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5340833884844474, + "acc_stderr,none": 0.012837195610619434 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5367306419589676, + "acc_stderr,none": 0.01283235924020697 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-59-30.016094_xwinograd.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-59-30.016094_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..52a03445087bc0876ea5170c0a639de2ef1d5824 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-59-30.016094_xwinograd.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.6439649359406608, + "acc_stderr,none": 0.007096653855893872, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.7066666666666667, + "acc_stderr,none": 0.00944430382490117 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.6626506024096386, + "acc_stderr,none": 0.052212602620321284 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5380604796663191, + "acc_stderr,none": 0.016107396603808045 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.5513307984790875, + "acc_stderr,none": 0.030726890349707915 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.5587301587301587, + "acc_stderr,none": 0.02802130493237513 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.6547619047619048, + "acc_stderr,none": 0.021199082505488055 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T13-49-01.062852_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T13-49-01.062852_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..17deabd27e7b5930faa3c219839e34e99cee1311 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T13-49-01.062852_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.2547357926221336, + "acc_stderr,none": 0.0036750666019376636, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2454835281615303, + "acc_stderr,none": 0.006267346149858851, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3412698412698413, + "acc_stderr,none": 0.04240799327574925 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.24509803921568626, + "acc_stderr,none": 0.03019028245350194 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.29535864978902954, + "acc_stderr,none": 0.02969633871342286 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2644628099173554, + "acc_stderr,none": 0.04026187527591206 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.0401910747255735 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.24539877300613497, + "acc_stderr,none": 0.03380939813943354 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.23410404624277456, + "acc_stderr,none": 0.022797110278071128 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2435754189944134, + "acc_stderr,none": 0.014355911964767864 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.18006430868167203, + "acc_stderr,none": 0.02182342285774495 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.21604938271604937, + "acc_stderr,none": 0.022899162918445785 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2470664928292047, + "acc_stderr,none": 0.011015752255279338 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.23391812865497075, + "acc_stderr,none": 0.03246721765117827 + }, + "mmlu_other": { + "acc,none": 0.2603797875764403, + "acc_stderr,none": 0.007871165717920162, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816505 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.2830188679245283, + "acc_stderr,none": 0.027724236492700904 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.24277456647398843, + "acc_stderr,none": 0.0326926380614177 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.2, + "acc_stderr,none": 0.04020151261036843 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.31390134529147984, + "acc_stderr,none": 0.031146796482972465 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.2912621359223301, + "acc_stderr,none": 0.04498676320572924 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.29914529914529914, + "acc_stderr,none": 0.029996951858349483 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.23754789272030652, + "acc_stderr,none": 0.015218733046150195 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.27124183006535946, + "acc_stderr,none": 0.025457756696667864 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.25177304964539005, + "acc_stderr,none": 0.025892151156709405 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.29044117647058826, + "acc_stderr,none": 0.027576468622740526 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.2289156626506024, + "acc_stderr,none": 0.03270745277352477 + }, + "mmlu_social_sciences": { + "acc,none": 0.2577185570360741, + "acc_stderr,none": 0.007892283265392744, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2543859649122807, + "acc_stderr,none": 0.040969851398436716 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2727272727272727, + "acc_stderr,none": 0.03173071239071724 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.27461139896373055, + "acc_stderr,none": 0.03221024508041154 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2512820512820513, + "acc_stderr,none": 0.021992016662370564 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2605042016806723, + "acc_stderr,none": 0.028510251512341937 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.24403669724770644, + "acc_stderr,none": 0.018415286351416413 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.26717557251908397, + "acc_stderr,none": 0.038808483010823944 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.2696078431372549, + "acc_stderr,none": 0.017952449196987862 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072775 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.22448979591836735, + "acc_stderr,none": 0.02671143055553841 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.3034825870646766, + "acc_stderr,none": 0.032510068164586174 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_stem": { + "acc,none": 0.260069774817634, + "acc_stderr,none": 0.0078061861482534595, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.2, + "acc_stderr,none": 0.04020151261036845 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.2074074074074074, + "acc_stderr,none": 0.03502553170678318 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.27631578947368424, + "acc_stderr,none": 0.03639057569952925 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.24305555555555555, + "acc_stderr,none": 0.03586879280080342 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.35, + "acc_stderr,none": 0.047937248544110196 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.29, + "acc_stderr,none": 0.04560480215720684 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.26, + "acc_stderr,none": 0.0440844002276808 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04690650298201943 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.2170212765957447, + "acc_stderr,none": 0.02694748312149622 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.19310344827586207, + "acc_stderr,none": 0.032894455221274 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2566137566137566, + "acc_stderr,none": 0.022494510767503154 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2870967741935484, + "acc_stderr,none": 0.025736542745594525 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2512315270935961, + "acc_stderr,none": 0.030516530732694436 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.27037037037037037, + "acc_stderr,none": 0.027080372815145658 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2582781456953642, + "acc_stderr,none": 0.035737053147634576 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3194444444444444, + "acc_stderr,none": 0.03179876342176851 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.25, + "acc_stderr,none": 0.04109974682633932 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-03-10.026654_glue.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-03-10.026654_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..5329725f67574321aa8326fd3790c615eea8fb68 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-03-10.026654_glue.json @@ -0,0 +1,69 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": -0.08703457213373123, + "mcc_stderr,none": 0.030163539085824434 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.3307182883341824, + "acc_stderr,none": 0.0047490917271714875 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.3360455655004068, + "acc_stderr,none": 0.004763973908606825 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.4019607843137255, + "acc_stderr,none": 0.024302976642371528, + "f1,none": 0.34759358288770054, + "f1_stderr,none": 0.0313439979522004 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.4962474830679114, + "acc_stderr,none": 0.006765220016415222 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.5283452881523621, + "acc_stderr,none": 0.002482701510086862, + "f1,none": 0.41103252308737687, + "f1_stderr,none": 0.0034173131749060475 + }, + "rte": { + "alias": "rte", + "acc,none": 0.48736462093862815, + "acc_stderr,none": 0.030086851767188564 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.6284403669724771, + "acc_stderr,none": 0.016373337800737308 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.5774647887323944, + "acc_stderr,none": 0.05903984205682581 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-04-32.038053_winogrande.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-04-32.038053_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..3c62a30a5c9d208691799154f3b01ebfa5c16caf --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-04-32.038053_winogrande.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.5374901341752171, + "acc_stderr,none": 0.014012928183336573 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-09-54.775610_sciq.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-09-54.775610_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..490b2130103f520e2ef926f8e29d00c14c3d0f63 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-09-54.775610_sciq.json @@ -0,0 +1,27 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.92, + "acc_stderr,none": 0.00858333697775365, + "acc_norm,none": 0.908, + "acc_norm_stderr,none": 0.00914437639315114 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-31-43.872563_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-31-43.872563_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..17deabd27e7b5930faa3c219839e34e99cee1311 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-31-43.872563_mmlu.json @@ -0,0 +1,330 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.2547357926221336, + "acc_stderr,none": 0.0036750666019376636, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2454835281615303, + "acc_stderr,none": 0.006267346149858851, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3412698412698413, + "acc_stderr,none": 0.04240799327574925 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.24509803921568626, + "acc_stderr,none": 0.03019028245350194 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.29535864978902954, + "acc_stderr,none": 0.02969633871342286 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2644628099173554, + "acc_stderr,none": 0.04026187527591206 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.2222222222222222, + "acc_stderr,none": 0.0401910747255735 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.24539877300613497, + "acc_stderr,none": 0.03380939813943354 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.23410404624277456, + "acc_stderr,none": 0.022797110278071128 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.2435754189944134, + "acc_stderr,none": 0.014355911964767864 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.18006430868167203, + "acc_stderr,none": 0.02182342285774495 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.21604938271604937, + "acc_stderr,none": 0.022899162918445785 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.2470664928292047, + "acc_stderr,none": 0.011015752255279338 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.23391812865497075, + "acc_stderr,none": 0.03246721765117827 + }, + "mmlu_other": { + "acc,none": 0.2603797875764403, + "acc_stderr,none": 0.007871165717920162, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816505 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.2830188679245283, + "acc_stderr,none": 0.027724236492700904 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.24277456647398843, + "acc_stderr,none": 0.0326926380614177 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.2, + "acc_stderr,none": 0.04020151261036843 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.31390134529147984, + "acc_stderr,none": 0.031146796482972465 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.2912621359223301, + "acc_stderr,none": 0.04498676320572924 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.29914529914529914, + "acc_stderr,none": 0.029996951858349483 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.23754789272030652, + "acc_stderr,none": 0.015218733046150195 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.27124183006535946, + "acc_stderr,none": 0.025457756696667864 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.25177304964539005, + "acc_stderr,none": 0.025892151156709405 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.29044117647058826, + "acc_stderr,none": 0.027576468622740526 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.2289156626506024, + "acc_stderr,none": 0.03270745277352477 + }, + "mmlu_social_sciences": { + "acc,none": 0.2577185570360741, + "acc_stderr,none": 0.007892283265392744, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2543859649122807, + "acc_stderr,none": 0.040969851398436716 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2727272727272727, + "acc_stderr,none": 0.03173071239071724 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.27461139896373055, + "acc_stderr,none": 0.03221024508041154 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2512820512820513, + "acc_stderr,none": 0.021992016662370564 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.2605042016806723, + "acc_stderr,none": 0.028510251512341937 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.24403669724770644, + "acc_stderr,none": 0.018415286351416413 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.26717557251908397, + "acc_stderr,none": 0.038808483010823944 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.2696078431372549, + "acc_stderr,none": 0.017952449196987862 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072775 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.22448979591836735, + "acc_stderr,none": 0.02671143055553841 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.3034825870646766, + "acc_stderr,none": 0.032510068164586174 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_stem": { + "acc,none": 0.260069774817634, + "acc_stderr,none": 0.0078061861482534595, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.2, + "acc_stderr,none": 0.04020151261036845 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.2074074074074074, + "acc_stderr,none": 0.03502553170678318 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.27631578947368424, + "acc_stderr,none": 0.03639057569952925 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.24305555555555555, + "acc_stderr,none": 0.03586879280080342 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.35, + "acc_stderr,none": 0.047937248544110196 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.29, + "acc_stderr,none": 0.04560480215720684 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.26, + "acc_stderr,none": 0.0440844002276808 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04690650298201943 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.22, + "acc_stderr,none": 0.04163331998932269 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.2170212765957447, + "acc_stderr,none": 0.02694748312149622 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.19310344827586207, + "acc_stderr,none": 0.032894455221274 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2566137566137566, + "acc_stderr,none": 0.022494510767503154 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2870967741935484, + "acc_stderr,none": 0.025736542745594525 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2512315270935961, + "acc_stderr,none": 0.030516530732694436 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.27037037037037037, + "acc_stderr,none": 0.027080372815145658 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2582781456953642, + "acc_stderr,none": 0.035737053147634576 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3194444444444444, + "acc_stderr,none": 0.03179876342176851 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.25, + "acc_stderr,none": 0.04109974682633932 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-07-36.031022_lambada_multilingual.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-07-36.031022_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..e81cc2ef816989f7f9557acaf475724b9744f1f6 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-07-36.031022_lambada_multilingual.json @@ -0,0 +1,55 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 267.28408723088717, + "perplexity_stderr,none": 16.35740283931253, + "acc,none": 0.20958664855424025, + "acc_stderr,none": 0.005670495539426846 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 21.37572561784123, + "perplexity_stderr,none": 0.6808300296181946, + "acc,none": 0.38676499126722297, + "acc_stderr,none": 0.006784988579985175 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 370.8411076274339, + "perplexity_stderr,none": 21.595221630946366, + "acc,none": 0.21424412963322337, + "acc_stderr,none": 0.005716238694447705 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 196.3477203013756, + "perplexity_stderr,none": 11.305248298408568, + "acc,none": 0.2450999417814865, + "acc_stderr,none": 0.005992780988422183 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 323.19218684985276, + "perplexity_stderr,none": 20.29110209885659, + "acc,none": 0.2109450805356103, + "acc_stderr,none": 0.005683951840704777 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-24-29.617348_pawsx.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-24-29.617348_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..a677c5f0355628e9c9fceb62dba246da7e9ee2bf --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-24-29.617348_pawsx.json @@ -0,0 +1,60 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.46485714285714286, + "acc_stderr,none": 0.004213340523157351, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.4565, + "acc_stderr,none": 0.011140733053371418 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.48, + "acc_stderr,none": 0.011174185930778313 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.503, + "acc_stderr,none": 0.011182934722804556 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.467, + "acc_stderr,none": 0.011158752568250661 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.448, + "acc_stderr,none": 0.01112249319745629 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.4515, + "acc_stderr,none": 0.011130400617630763 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.448, + "acc_stderr,none": 0.01112249319745629 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-29-15.015293_xcopa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-29-15.015293_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..b685d378a59a38fd66e7f5881ca962bc273856a6 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-29-15.015293_xcopa.json @@ -0,0 +1,80 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [ + 261 + ], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5294545454545454, + "acc_stderr,none": 0.00673238666638665, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.496, + "acc_stderr,none": 0.02238235778196214 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.528, + "acc_stderr,none": 0.02234794983266809 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.546, + "acc_stderr,none": 0.02228814759117695 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.526, + "acc_stderr,none": 0.02235279165091416 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.5, + "acc_stderr,none": 0.022383074051792257 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.528, + "acc_stderr,none": 0.022347949832668093 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.532, + "acc_stderr,none": 0.0223371864790443 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.566, + "acc_stderr,none": 0.02218721580302901 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.526, + "acc_stderr,none": 0.022352791650914167 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.538, + "acc_stderr,none": 0.022318338119870527 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.538, + "acc_stderr,none": 0.02231833811987053 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T11-51-55.842699_lambada_openai.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T11-51-55.842699_lambada_openai.json new file mode 100644 index 0000000000000000000000000000000000000000..a53ef80fd0959a4bbf901cffc2cd1c15b4ae7e3b --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T11-51-55.842699_lambada_openai.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_openai" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai": { + "alias": "lambada_openai", + "perplexity,none": 14.00703003680488, + "perplexity_stderr,none": 0.4392984349795351, + "acc,none": 0.4737046380749078, + "acc_stderr,none": 0.006956337791536673 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-07-58.435251_hellaswag.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-07-58.435251_hellaswag.json new file mode 100644 index 0000000000000000000000000000000000000000..9d57316731306f1b72256531458aaacb33b04830 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-07-58.435251_hellaswag.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "hellaswag" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.34385580561641105, + "acc_stderr,none": 0.004740229212473477, + "acc_norm,none": 0.41894045010953995, + "acc_norm_stderr,none": 0.004923772581848491 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-09-37.408652_piqa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-09-37.408652_piqa.json new file mode 100644 index 0000000000000000000000000000000000000000..98e39a83940b932f9b43739eda3946c24ae8f520 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-09-37.408652_piqa.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "piqa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "piqa": { + "alias": "piqa", + "acc,none": 0.6751904243743199, + "acc_stderr,none": 0.010926296238294038, + "acc_norm,none": 0.6659412404787813, + "acc_norm_stderr,none": 0.011004613886336733 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-12-42.081737_arc_easy.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-12-42.081737_arc_easy.json new file mode 100644 index 0000000000000000000000000000000000000000..7d970a7234684cfd5c8a25bb5d527bfffb54623f --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-12-42.081737_arc_easy.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_easy" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.5913299663299664, + "acc_stderr,none": 0.010087174498762886, + "acc_norm,none": 0.547979797979798, + "acc_norm_stderr,none": 0.010212436978834123 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-14-22.750746_arc_challenge.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-14-22.750746_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..a02b25785d92211f4951057e88a8077a74a37821 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T12-14-22.750746_arc_challenge.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.25170648464163825, + "acc_stderr,none": 0.01268249633404296, + "acc_norm,none": 0.28668941979522183, + "acc_norm_stderr,none": 0.013214986329274777 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T14-26-29.692380_arc_challenge.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T14-26-29.692380_arc_challenge.json new file mode 100644 index 0000000000000000000000000000000000000000..a02b25785d92211f4951057e88a8077a74a37821 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T14-26-29.692380_arc_challenge.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "arc_challenge" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.25170648464163825, + "acc_stderr,none": 0.01268249633404296, + "acc_norm,none": 0.28668941979522183, + "acc_norm_stderr,none": 0.013214986329274777 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-27-55.667402_glue.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-27-55.667402_glue.json new file mode 100644 index 0000000000000000000000000000000000000000..137fc7c175fa76799a3bfddb2e46c130fed4c218 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-27-55.667402_glue.json @@ -0,0 +1,67 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "glue" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "cola": { + "alias": "cola", + "mcc,none": 0.0, + "mcc_stderr,none": 0.0 + }, + "mnli": { + "alias": "mnli", + "acc,none": 0.3218543046357616, + "acc_stderr,none": 0.004715938564705685 + }, + "mnli_mismatch": { + "alias": "mnli_mismatch", + "acc,none": 0.3170260374288039, + "acc_stderr,none": 0.004693001485817688 + }, + "mrpc": { + "alias": "mrpc", + "acc,none": 0.31862745098039214, + "acc_stderr,none": 0.023095996571841474, + "f1,none": 0.034722222222222224, + "f1_stderr,none": 0.015232781559381021 + }, + "qnli": { + "alias": "qnli", + "acc,none": 0.48398315943620723, + "acc_stderr,none": 0.006761938475051307 + }, + "qqp": { + "alias": "qqp", + "acc,none": 0.41375216423447936, + "acc_stderr,none": 0.0024494256748938304, + "f1,none": 0.5115406809001731, + "f1_stderr,none": 0.0027484139258788514 + }, + "rte": { + "alias": "rte", + "acc,none": 0.5234657039711191, + "acc_stderr,none": 0.030063300411902652 + }, + "sst2": { + "alias": "sst2", + "acc,none": 0.805045871559633, + "acc_stderr,none": 0.013423552148735014 + }, + "wnli": { + "alias": "wnli", + "acc,none": 0.5211267605633803, + "acc_stderr,none": 0.05970805879899504 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-28-58.897894_winogrande.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-28-58.897894_winogrande.json new file mode 100644 index 0000000000000000000000000000000000000000..771a4b1bb7052fbbf4938e9303b437f57ddc390c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-28-58.897894_winogrande.json @@ -0,0 +1,23 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "winogrande" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "winogrande": { + "alias": "winogrande", + "acc,none": 0.5217048145224941, + "acc_stderr,none": 0.014039239216484629 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-30-38.695120_sciq.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-30-38.695120_sciq.json new file mode 100644 index 0000000000000000000000000000000000000000..62a58a55f19be9ee1d1f4b8effe3c441b7b562a3 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-30-38.695120_sciq.json @@ -0,0 +1,25 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "sciq" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "sciq": { + "alias": "sciq", + "acc,none": 0.861, + "acc_stderr,none": 0.010945263761042953, + "acc_norm,none": 0.806, + "acc_norm_stderr,none": 0.012510816141264345 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-51-21.322899_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-51-21.322899_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..26e5dc58cb995e00ef2e5db7c241ee291961e137 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T15-51-21.322899_mmlu.json @@ -0,0 +1,328 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.25509186725537675, + "acc_stderr,none": 0.0036788945968586895, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2463336875664187, + "acc_stderr,none": 0.006278892915824143, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04216370213557836 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.23529411764705882, + "acc_stderr,none": 0.029771775228145638 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2742616033755274, + "acc_stderr,none": 0.02904133351059801 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2644628099173554, + "acc_stderr,none": 0.04026187527591207 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.25, + "acc_stderr,none": 0.04186091791394607 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.26993865030674846, + "acc_stderr,none": 0.034878251684978906 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.24566473988439305, + "acc_stderr,none": 0.023176298203992005 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.24804469273743016, + "acc_stderr,none": 0.01444415780826146 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.19614147909967847, + "acc_stderr,none": 0.022552447780478022 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2191358024691358, + "acc_stderr,none": 0.023016705640262175 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.24119947848761408, + "acc_stderr,none": 0.01092649610203494 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.23391812865497075, + "acc_stderr,none": 0.03246721765117827 + }, + "mmlu_other": { + "acc,none": 0.25490827164467333, + "acc_stderr,none": 0.007819268100190985, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816505 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.27169811320754716, + "acc_stderr,none": 0.027377706624670713 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.23121387283236994, + "acc_stderr,none": 0.032147373020294696 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909284 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.2825112107623318, + "acc_stderr,none": 0.03021683101150877 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.2912621359223301, + "acc_stderr,none": 0.04498676320572924 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.27350427350427353, + "acc_stderr,none": 0.029202540153431183 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.2388250319284802, + "acc_stderr,none": 0.015246803197398677 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.24183006535947713, + "acc_stderr,none": 0.024518195641879334 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.24113475177304963, + "acc_stderr,none": 0.025518731049537776 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.31985294117647056, + "acc_stderr,none": 0.02833295951403122 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.21686746987951808, + "acc_stderr,none": 0.03208284450356365 + }, + "mmlu_social_sciences": { + "acc,none": 0.25901852453688656, + "acc_stderr,none": 0.007909093782333265, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2543859649122807, + "acc_stderr,none": 0.040969851398436716 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2676767676767677, + "acc_stderr,none": 0.03154449888270286 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.26424870466321243, + "acc_stderr,none": 0.03182155050916647 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2692307692307692, + "acc_stderr,none": 0.02248938979365481 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.27310924369747897, + "acc_stderr,none": 0.028942004040998167 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.24587155963302754, + "acc_stderr,none": 0.01846194096870845 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.26717557251908397, + "acc_stderr,none": 0.038808483010823944 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.25980392156862747, + "acc_stderr,none": 0.017740899509177795 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072774 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.24081632653061225, + "acc_stderr,none": 0.027372942201788163 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.2885572139303483, + "acc_stderr,none": 0.03203841040213322 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_stem": { + "acc,none": 0.26450999048525214, + "acc_stderr,none": 0.007855635090944856, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.26, + "acc_stderr,none": 0.0440844002276808 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.21481481481481482, + "acc_stderr,none": 0.035478541985608236 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.27631578947368424, + "acc_stderr,none": 0.03639057569952924 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.24305555555555555, + "acc_stderr,none": 0.03586879280080342 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.35, + "acc_stderr,none": 0.0479372485441102 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542128 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.30392156862745096, + "acc_stderr,none": 0.04576665403207764 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.20425531914893616, + "acc_stderr,none": 0.02635515841334941 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.22758620689655173, + "acc_stderr,none": 0.03493950380131183 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2724867724867725, + "acc_stderr,none": 0.022930973071633345 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2838709677419355, + "acc_stderr,none": 0.025649381063029254 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2413793103448276, + "acc_stderr,none": 0.030108330718011625 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.26666666666666666, + "acc_stderr,none": 0.026962424325073828 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2582781456953642, + "acc_stderr,none": 0.035737053147634576 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3194444444444444, + "acc_stderr,none": 0.03179876342176851 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.26785714285714285, + "acc_stderr,none": 0.04203277291467762 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-01-12.021760_lambada_multilingual.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-01-12.021760_lambada_multilingual.json new file mode 100644 index 0000000000000000000000000000000000000000..6bb180df939fc88dec37f8795459fd80daf7c9bd --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-01-12.021760_lambada_multilingual.json @@ -0,0 +1,53 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "lambada_multilingual" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "lambada_openai_mt_de": { + "alias": "lambada_openai_mt_de", + "perplexity,none": 222.18517742693768, + "perplexity_stderr,none": 13.721469376231164, + "acc,none": 0.24412963322336503, + "acc_stderr,none": 0.005984749566113877 + }, + "lambada_openai_mt_en": { + "alias": "lambada_openai_mt_en", + "perplexity,none": 14.00703003680488, + "perplexity_stderr,none": 0.4392984349795351, + "acc,none": 0.4737046380749078, + "acc_stderr,none": 0.006956337791536673 + }, + "lambada_openai_mt_es": { + "alias": "lambada_openai_mt_es", + "perplexity,none": 287.7202380912159, + "perplexity_stderr,none": 17.25691023300858, + "acc,none": 0.24742868232097806, + "acc_stderr,none": 0.006011888608102748 + }, + "lambada_openai_mt_fr": { + "alias": "lambada_openai_mt_fr", + "perplexity,none": 138.6656174344242, + "perplexity_stderr,none": 8.14671598074245, + "acc,none": 0.31030467688725016, + "acc_stderr,none": 0.006445177376219963 + }, + "lambada_openai_mt_it": { + "alias": "lambada_openai_mt_it", + "perplexity,none": 222.69880318413337, + "perplexity_stderr,none": 14.17543902899704, + "acc,none": 0.2829419755482243, + "acc_stderr,none": 0.006275349431343617 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-17-44.000953_pawsx.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-17-44.000953_pawsx.json new file mode 100644 index 0000000000000000000000000000000000000000..4365e0411c751dfeead727bc72c0f0e6a506befa --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-17-44.000953_pawsx.json @@ -0,0 +1,58 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "pawsx" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "pawsx": { + "acc,none": 0.46635714285714286, + "acc_stderr,none": 0.004214729011904722, + "alias": "pawsx" + }, + "paws_de": { + "alias": " - paws_de", + "acc,none": 0.456, + "acc_stderr,none": 0.011139750761283304 + }, + "paws_en": { + "alias": " - paws_en", + "acc,none": 0.4805, + "acc_stderr,none": 0.011174628009718269 + }, + "paws_es": { + "alias": " - paws_es", + "acc,none": 0.4995, + "acc_stderr,none": 0.01118313042949519 + }, + "paws_fr": { + "alias": " - paws_fr", + "acc,none": 0.4675, + "acc_stderr,none": 0.011159486640120933 + }, + "paws_ja": { + "alias": " - paws_ja", + "acc,none": 0.4455, + "acc_stderr,none": 0.011116504096687392 + }, + "paws_ko": { + "alias": " - paws_ko", + "acc,none": 0.4645, + "acc_stderr,none": 0.011154913314119557 + }, + "paws_zh": { + "alias": " - paws_zh", + "acc,none": 0.451, + "acc_stderr,none": 0.011129305041886323 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-22-00.323420_xcopa.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-22-00.323420_xcopa.json new file mode 100644 index 0000000000000000000000000000000000000000..4699f5ba4279c84d98f1a4c4804973de4e8807fd --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T16-22-00.323420_xcopa.json @@ -0,0 +1,78 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xcopa" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xcopa": { + "acc,none": 0.5303636363636364, + "acc_stderr,none": 0.006731742100800349, + "alias": "xcopa" + }, + "xcopa_et": { + "alias": " - xcopa_et", + "acc,none": 0.5, + "acc_stderr,none": 0.022383074051792257 + }, + "xcopa_ht": { + "alias": " - xcopa_ht", + "acc,none": 0.536, + "acc_stderr,none": 0.022324981738385256 + }, + "xcopa_id": { + "alias": " - xcopa_id", + "acc,none": 0.55, + "acc_stderr,none": 0.02227087748536044 + }, + "xcopa_it": { + "alias": " - xcopa_it", + "acc,none": 0.532, + "acc_stderr,none": 0.022337186479044292 + }, + "xcopa_qu": { + "alias": " - xcopa_qu", + "acc,none": 0.498, + "acc_stderr,none": 0.02238289498648353 + }, + "xcopa_sw": { + "alias": " - xcopa_sw", + "acc,none": 0.528, + "acc_stderr,none": 0.022347949832668093 + }, + "xcopa_ta": { + "alias": " - xcopa_ta", + "acc,none": 0.542, + "acc_stderr,none": 0.02230396677426996 + }, + "xcopa_th": { + "alias": " - xcopa_th", + "acc,none": 0.56, + "acc_stderr,none": 0.022221331534143015 + }, + "xcopa_tr": { + "alias": " - xcopa_tr", + "acc,none": 0.518, + "acc_stderr,none": 0.02236856511738799 + }, + "xcopa_vi": { + "alias": " - xcopa_vi", + "acc,none": 0.528, + "acc_stderr,none": 0.02234794983266809 + }, + "xcopa_zh": { + "alias": " - xcopa_zh", + "acc,none": 0.542, + "acc_stderr,none": 0.022303966774269948 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T17-31-13.736932_xnli.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T17-31-13.736932_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..34497deb4884f69dcc00fa29bbc5c65dc880a92c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T17-31-13.736932_xnli.json @@ -0,0 +1,98 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.37427041499330654, + "acc_stderr,none": 0.0024953480415387023, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3417670682730924, + "acc_stderr,none": 0.009506977398287625 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.36626506024096384, + "acc_stderr,none": 0.00965693088601476 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.41485943775100403, + "acc_stderr,none": 0.009875705744164678 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3381526104417671, + "acc_stderr,none": 0.009482500057981026 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4738955823293173, + "acc_stderr,none": 0.01000840465166067 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.3855421686746988, + "acc_stderr,none": 0.009755949341224318 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.4285140562248996, + "acc_stderr,none": 0.009919113605650941 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3469879518072289, + "acc_stderr,none": 0.009541251561568395 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.43012048192771085, + "acc_stderr,none": 0.009923711675408063 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3506024096385542, + "acc_stderr,none": 0.0095642371562061 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.3481927710843373, + "acc_stderr,none": 0.009548980649153382 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.351004016064257, + "acc_stderr,none": 0.009566753834803295 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3357429718875502, + "acc_stderr,none": 0.00946583861733735 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.36666666666666664, + "acc_stderr,none": 0.009659161779186956 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3357429718875502, + "acc_stderr,none": 0.00946583861733735 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-23-59.296005_xnli.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-23-59.296005_xnli.json new file mode 100644 index 0000000000000000000000000000000000000000..34497deb4884f69dcc00fa29bbc5c65dc880a92c --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-23-59.296005_xnli.json @@ -0,0 +1,98 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xnli" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xnli": { + "acc,none": 0.37427041499330654, + "acc_stderr,none": 0.0024953480415387023, + "alias": "xnli" + }, + "xnli_ar": { + "alias": " - xnli_ar", + "acc,none": 0.3417670682730924, + "acc_stderr,none": 0.009506977398287625 + }, + "xnli_bg": { + "alias": " - xnli_bg", + "acc,none": 0.36626506024096384, + "acc_stderr,none": 0.00965693088601476 + }, + "xnli_de": { + "alias": " - xnli_de", + "acc,none": 0.41485943775100403, + "acc_stderr,none": 0.009875705744164678 + }, + "xnli_el": { + "alias": " - xnli_el", + "acc,none": 0.3381526104417671, + "acc_stderr,none": 0.009482500057981026 + }, + "xnli_en": { + "alias": " - xnli_en", + "acc,none": 0.4738955823293173, + "acc_stderr,none": 0.01000840465166067 + }, + "xnli_es": { + "alias": " - xnli_es", + "acc,none": 0.3855421686746988, + "acc_stderr,none": 0.009755949341224318 + }, + "xnli_fr": { + "alias": " - xnli_fr", + "acc,none": 0.4285140562248996, + "acc_stderr,none": 0.009919113605650941 + }, + "xnli_hi": { + "alias": " - xnli_hi", + "acc,none": 0.3469879518072289, + "acc_stderr,none": 0.009541251561568395 + }, + "xnli_ru": { + "alias": " - xnli_ru", + "acc,none": 0.43012048192771085, + "acc_stderr,none": 0.009923711675408063 + }, + "xnli_sw": { + "alias": " - xnli_sw", + "acc,none": 0.3506024096385542, + "acc_stderr,none": 0.0095642371562061 + }, + "xnli_th": { + "alias": " - xnli_th", + "acc,none": 0.3481927710843373, + "acc_stderr,none": 0.009548980649153382 + }, + "xnli_tr": { + "alias": " - xnli_tr", + "acc,none": 0.351004016064257, + "acc_stderr,none": 0.009566753834803295 + }, + "xnli_ur": { + "alias": " - xnli_ur", + "acc,none": 0.3357429718875502, + "acc_stderr,none": 0.00946583861733735 + }, + "xnli_vi": { + "alias": " - xnli_vi", + "acc,none": 0.36666666666666664, + "acc_stderr,none": 0.009659161779186956 + }, + "xnli_zh": { + "alias": " - xnli_zh", + "acc,none": 0.3357429718875502, + "acc_stderr,none": 0.00946583861733735 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-41-08.186979_xstorycloze.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-41-08.186979_xstorycloze.json new file mode 100644 index 0000000000000000000000000000000000000000..03763ade3048d41ead41e81bcec9138ac72a81b4 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-41-08.186979_xstorycloze.json @@ -0,0 +1,78 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xstorycloze" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xstorycloze": { + "acc,none": 0.5238553636965285, + "acc_stderr,none": 0.0038651717445196557, + "alias": "xstorycloze" + }, + "xstorycloze_ar": { + "alias": " - xstorycloze_ar", + "acc,none": 0.47650562541363334, + "acc_stderr,none": 0.012852912530051752 + }, + "xstorycloze_en": { + "alias": " - xstorycloze_en", + "acc,none": 0.6227663798808736, + "acc_stderr,none": 0.012473240654741183 + }, + "xstorycloze_es": { + "alias": " - xstorycloze_es", + "acc,none": 0.5387160820648577, + "acc_stderr,none": 0.012828493353271537 + }, + "xstorycloze_eu": { + "alias": " - xstorycloze_eu", + "acc,none": 0.5181998676373263, + "acc_stderr,none": 0.012858598401831846 + }, + "xstorycloze_hi": { + "alias": " - xstorycloze_hi", + "acc,none": 0.5122435473196558, + "acc_stderr,none": 0.012863267059205548 + }, + "xstorycloze_id": { + "alias": " - xstorycloze_id", + "acc,none": 0.5082726671078756, + "acc_stderr,none": 0.012865364020375396 + }, + "xstorycloze_my": { + "alias": " - xstorycloze_my", + "acc,none": 0.5036399735274653, + "acc_stderr,none": 0.01286678434828923 + }, + "xstorycloze_ru": { + "alias": " - xstorycloze_ru", + "acc,none": 0.514890800794176, + "acc_stderr,none": 0.012861417842074004 + }, + "xstorycloze_sw": { + "alias": " - xstorycloze_sw", + "acc,none": 0.49702183984116477, + "acc_stderr,none": 0.01286689706601123 + }, + "xstorycloze_te": { + "alias": " - xstorycloze_te", + "acc,none": 0.5320979483785573, + "acc_stderr,none": 0.012840584503982025 + }, + "xstorycloze_zh": { + "alias": " - xstorycloze_zh", + "acc,none": 0.5380542686962276, + "acc_stderr,none": 0.012829804720321695 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-44-01.999774_xwinograd.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-44-01.999774_xwinograd.json new file mode 100644 index 0000000000000000000000000000000000000000..df688655a8e0bb385dbbcfe37554f643cc7b2a60 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T19-44-01.999774_xwinograd.json @@ -0,0 +1,53 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "xwinograd" + ], + "num_fewshot": 0, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "xwinograd": { + "acc,none": 0.6298044504383007, + "acc_stderr,none": 0.007161814018785208, + "alias": "xwinograd" + }, + "xwinograd_en": { + "alias": " - xwinograd_en", + "acc,none": 0.6911827956989247, + "acc_stderr,none": 0.009583610207426776 + }, + "xwinograd_fr": { + "alias": " - xwinograd_fr", + "acc,none": 0.5783132530120482, + "acc_stderr,none": 0.05453428485295111 + }, + "xwinograd_jp": { + "alias": " - xwinograd_jp", + "acc,none": 0.5318039624608968, + "acc_stderr,none": 0.016121553797293346 + }, + "xwinograd_pt": { + "alias": " - xwinograd_pt", + "acc,none": 0.5171102661596958, + "acc_stderr,none": 0.030872011014694032 + }, + "xwinograd_ru": { + "alias": " - xwinograd_ru", + "acc,none": 0.5555555555555556, + "acc_stderr,none": 0.0280419147291705 + }, + "xwinograd_zh": { + "alias": " - xwinograd_zh", + "acc,none": 0.6468253968253969, + "acc_stderr,none": 0.021311021744798753 + } + } +} \ No newline at end of file diff --git a/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T21-23-47.057040_mmlu.json b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T21-23-47.057040_mmlu.json new file mode 100644 index 0000000000000000000000000000000000000000..e65cb4028a4cbdbbf6ecada080235c8be8792169 --- /dev/null +++ b/lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T21-23-47.057040_mmlu.json @@ -0,0 +1,328 @@ +{ + "model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096", + "tasks": [ + "mmlu" + ], + "num_fewshot": 5, + "lm_eval_version": "0.4.8", + "bos_token_id": 0, + "eos_token_id": 0, + "custom_prefix_token_id": null, + "pad_token_ids": [], + "stop_token_ids": [ + 11, + 261 + ], + "results": { + "mmlu": { + "acc,none": 0.25509186725537675, + "acc_stderr,none": 0.0036788945968586895, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.2463336875664187, + "acc_stderr,none": 0.006278892915824143, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.3333333333333333, + "acc_stderr,none": 0.04216370213557836 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.3151515151515151, + "acc_stderr,none": 0.0362773057502241 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.23529411764705882, + "acc_stderr,none": 0.029771775228145638 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.2742616033755274, + "acc_stderr,none": 0.02904133351059801 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.2644628099173554, + "acc_stderr,none": 0.04026187527591207 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.25, + "acc_stderr,none": 0.04186091791394607 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.26993865030674846, + "acc_stderr,none": 0.034878251684978906 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.24566473988439305, + "acc_stderr,none": 0.023176298203992005 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.24804469273743016, + "acc_stderr,none": 0.01444415780826146 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.19614147909967847, + "acc_stderr,none": 0.022552447780478022 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.2191358024691358, + "acc_stderr,none": 0.023016705640262175 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.24119947848761408, + "acc_stderr,none": 0.01092649610203494 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.23391812865497075, + "acc_stderr,none": 0.03246721765117827 + }, + "mmlu_other": { + "acc,none": 0.25490827164467333, + "acc_stderr,none": 0.007819268100190983, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816505 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.27169811320754716, + "acc_stderr,none": 0.027377706624670713 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.23121387283236994, + "acc_stderr,none": 0.032147373020294696 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909284 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.2825112107623318, + "acc_stderr,none": 0.03021683101150877 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.2912621359223301, + "acc_stderr,none": 0.04498676320572924 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.27350427350427353, + "acc_stderr,none": 0.029202540153431183 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.24, + "acc_stderr,none": 0.04292346959909283 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.2388250319284802, + "acc_stderr,none": 0.015246803197398677 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.24183006535947713, + "acc_stderr,none": 0.024518195641879334 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.24113475177304963, + "acc_stderr,none": 0.025518731049537776 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.31985294117647056, + "acc_stderr,none": 0.02833295951403122 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.21686746987951808, + "acc_stderr,none": 0.03208284450356365 + }, + "mmlu_social_sciences": { + "acc,none": 0.25901852453688656, + "acc_stderr,none": 0.007909093782333265, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.2543859649122807, + "acc_stderr,none": 0.040969851398436716 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.2676767676767677, + "acc_stderr,none": 0.03154449888270286 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.26424870466321243, + "acc_stderr,none": 0.03182155050916647 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.2692307692307692, + "acc_stderr,none": 0.02248938979365481 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.27310924369747897, + "acc_stderr,none": 0.028942004040998167 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.24587155963302754, + "acc_stderr,none": 0.01846194096870845 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.26717557251908397, + "acc_stderr,none": 0.038808483010823944 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.25980392156862747, + "acc_stderr,none": 0.017740899509177795 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.22727272727272727, + "acc_stderr,none": 0.04013964554072774 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.24081632653061225, + "acc_stderr,none": 0.027372942201788163 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.2885572139303483, + "acc_stderr,none": 0.03203841040213322 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_stem": { + "acc,none": 0.26450999048525214, + "acc_stderr,none": 0.007855635090944856, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.26, + "acc_stderr,none": 0.0440844002276808 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.21481481481481482, + "acc_stderr,none": 0.035478541985608236 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.27631578947368424, + "acc_stderr,none": 0.03639057569952924 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.24305555555555555, + "acc_stderr,none": 0.03586879280080342 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.35, + "acc_stderr,none": 0.0479372485441102 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.3, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.28, + "acc_stderr,none": 0.04512608598542128 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.30392156862745096, + "acc_stderr,none": 0.04576665403207764 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.23, + "acc_stderr,none": 0.04229525846816506 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.20425531914893616, + "acc_stderr,none": 0.02635515841334941 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.22758620689655173, + "acc_stderr,none": 0.03493950380131183 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.2724867724867725, + "acc_stderr,none": 0.022930973071633345 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.2838709677419355, + "acc_stderr,none": 0.025649381063029254 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.2413793103448276, + "acc_stderr,none": 0.030108330718011625 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.24, + "acc_stderr,none": 0.042923469599092816 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.26666666666666666, + "acc_stderr,none": 0.026962424325073828 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.2582781456953642, + "acc_stderr,none": 0.035737053147634576 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.3194444444444444, + "acc_stderr,none": 0.03179876342176851 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.26785714285714285, + "acc_stderr,none": 0.04203277291467762 + } + } +} \ No newline at end of file