Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-25-38.029337_lambada_openai.json +27 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-40-26.868929_hellaswag.json +27 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-41-59.115770_piqa.json +27 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-45-05.505108_arc_easy.json +27 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-46-42.690976_arc_challenge.json +27 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-56-38.085921_glue.json +69 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-57-53.763827_winogrande.json +25 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-59-54.095066_sciq.json +27 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-22-58.345845_mmlu.json +330 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-33-26.008450_lambada_multilingual.json +55 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T09-56-09.522548_pawsx.json +60 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T10-00-53.579856_xcopa.json +80 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-13-06.581208_xnli.json +100 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-52-46.021580_xstorycloze.json +80 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-55-53.206548_xwinograd.json +55 -0
- lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T12-17-19.970767_mmlu.json +330 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-14-42.123700_lambada_openai.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-39-58.725375_hellaswag.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-42-10.675354_piqa.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-46-46.674313_arc_easy.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-49-11.748710_arc_challenge.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-08-18.050596_glue.json +69 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-09-43.473147_winogrande.json +25 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-12-44.780584_sciq.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-52-46.693108_mmlu.json +330 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-10-04.238325_lambada_multilingual.json +55 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-31-49.237829_pawsx.json +60 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-37-40.081757_xcopa.json +80 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-11-48.808152_xnli.json +100 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-42-29.131682_xstorycloze.json +80 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-47-11.016382_xwinograd.json +55 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T20-21-44.743544_lambada_openai.json +27 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-02-39.723956_xnli.json +100 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-35-32.643193_xstorycloze.json +80 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-39-50.695003_xwinograd.json +55 -0
- lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T23-45-11.895035_mmlu.json +330 -0
- lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T09-41-13.419478_lambada_openai.json +27 -0
- lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-43-54.649276_hellaswag.json +27 -0
- lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-47-38.776674_piqa.json +27 -0
- lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-55-41.507473_arc_easy.json +27 -0
- lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T11-00-09.309689_arc_challenge.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T00-45-26.618704_lambada_openai.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-36-50.849530_hellaswag.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-43-09.170340_piqa.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-57-01.574833_arc_easy.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T03-04-40.682395_arc_challenge.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-11-37.430416_glue.json +69 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-14-56.977956_winogrande.json +25 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-29-25.494712_sciq.json +27 -0
- lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T16-15-02.568125_mmlu.json +330 -0
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-25-38.029337_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 22.798507649915244,
|
| 22 |
+
"perplexity_stderr,none": 0.7375150682316036,
|
| 23 |
+
"acc,none": 0.38404812730448284,
|
| 24 |
+
"acc_stderr,none": 0.006776076316867708
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-40-26.868929_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.29197371041625175,
|
| 22 |
+
"acc_stderr,none": 0.004537410615572915,
|
| 23 |
+
"acc_norm,none": 0.3192591117307309,
|
| 24 |
+
"acc_norm_stderr,none": 0.004652368273845513
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-41-59.115770_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.6137105549510338,
|
| 22 |
+
"acc_stderr,none": 0.011360138833823677,
|
| 23 |
+
"acc_norm,none": 0.6186071817192601,
|
| 24 |
+
"acc_norm_stderr,none": 0.011332850406528682
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-45-05.505108_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.44234006734006737,
|
| 22 |
+
"acc_stderr,none": 0.010191334444220846,
|
| 23 |
+
"acc_norm,none": 0.3939393939393939,
|
| 24 |
+
"acc_norm_stderr,none": 0.010026305355981804
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_11/0.4.8_2025-03-15T11-46-42.690976_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.19880546075085323,
|
| 22 |
+
"acc_stderr,none": 0.011662850198175536,
|
| 23 |
+
"acc_norm,none": 0.22781569965870307,
|
| 24 |
+
"acc_norm_stderr,none": 0.012256708602326919
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-56-38.085921_glue.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"cola": {
|
| 20 |
+
"alias": "cola",
|
| 21 |
+
"mcc,none": 0.05873054109498616,
|
| 22 |
+
"mcc_stderr,none": 0.03406044652133965
|
| 23 |
+
},
|
| 24 |
+
"mnli": {
|
| 25 |
+
"alias": "mnli",
|
| 26 |
+
"acc,none": 0.3186958736627611,
|
| 27 |
+
"acc_stderr,none": 0.004703657632807151
|
| 28 |
+
},
|
| 29 |
+
"mnli_mismatch": {
|
| 30 |
+
"alias": "mnli_mismatch",
|
| 31 |
+
"acc,none": 0.32414564686737185,
|
| 32 |
+
"acc_stderr,none": 0.00472060656953387
|
| 33 |
+
},
|
| 34 |
+
"mrpc": {
|
| 35 |
+
"alias": "mrpc",
|
| 36 |
+
"acc,none": 0.6838235294117647,
|
| 37 |
+
"acc_stderr,none": 0.023048336668420193,
|
| 38 |
+
"f1,none": 0.8122270742358079,
|
| 39 |
+
"f1_stderr,none": 0.01642422915504594
|
| 40 |
+
},
|
| 41 |
+
"qnli": {
|
| 42 |
+
"alias": "qnli",
|
| 43 |
+
"acc,none": 0.5024711696869851,
|
| 44 |
+
"acc_stderr,none": 0.006765327922882504
|
| 45 |
+
},
|
| 46 |
+
"qqp": {
|
| 47 |
+
"alias": "qqp",
|
| 48 |
+
"acc,none": 0.37380657927281724,
|
| 49 |
+
"acc_stderr,none": 0.002406197221605358,
|
| 50 |
+
"f1,none": 0.5285562652464573,
|
| 51 |
+
"f1_stderr,none": 0.0025938199201718647
|
| 52 |
+
},
|
| 53 |
+
"rte": {
|
| 54 |
+
"alias": "rte",
|
| 55 |
+
"acc,none": 0.5018050541516246,
|
| 56 |
+
"acc_stderr,none": 0.030096267148976626
|
| 57 |
+
},
|
| 58 |
+
"sst2": {
|
| 59 |
+
"alias": "sst2",
|
| 60 |
+
"acc,none": 0.5091743119266054,
|
| 61 |
+
"acc_stderr,none": 0.016939001525351532
|
| 62 |
+
},
|
| 63 |
+
"wnli": {
|
| 64 |
+
"alias": "wnli",
|
| 65 |
+
"acc,none": 0.4647887323943662,
|
| 66 |
+
"acc_stderr,none": 0.05961305784972239
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-57-53.763827_winogrande.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"alias": "winogrande",
|
| 21 |
+
"acc,none": 0.5098658247829518,
|
| 22 |
+
"acc_stderr,none": 0.014049749833367589
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-15T23-59-54.095066_sciq.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"alias": "sciq",
|
| 21 |
+
"acc,none": 0.756,
|
| 22 |
+
"acc_stderr,none": 0.013588548437881431,
|
| 23 |
+
"acc_norm,none": 0.672,
|
| 24 |
+
"acc_norm_stderr,none": 0.014853842487270334
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-22-58.345845_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2315909414613303,
|
| 21 |
+
"acc_stderr,none": 0.0035530263924027183,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2452709883103082,
|
| 26 |
+
"acc_stderr,none": 0.006271012587014002,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.31746031746031744,
|
| 32 |
+
"acc_stderr,none": 0.04163453031302859
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.23030303030303031,
|
| 37 |
+
"acc_stderr,none": 0.0328766675860349
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.27450980392156865,
|
| 42 |
+
"acc_stderr,none": 0.03132179803083292
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.270042194092827,
|
| 47 |
+
"acc_stderr,none": 0.028900721906293426
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2396694214876033,
|
| 52 |
+
"acc_stderr,none": 0.03896878985070417
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.25,
|
| 57 |
+
"acc_stderr,none": 0.04186091791394607
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.22085889570552147,
|
| 62 |
+
"acc_stderr,none": 0.032591773927421776
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.24855491329479767,
|
| 67 |
+
"acc_stderr,none": 0.023267528432100174
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.23798882681564246,
|
| 72 |
+
"acc_stderr,none": 0.014242630070574885
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.19614147909967847,
|
| 77 |
+
"acc_stderr,none": 0.022552447780478026
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.2222222222222222,
|
| 82 |
+
"acc_stderr,none": 0.023132376234543325
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2470664928292047,
|
| 87 |
+
"acc_stderr,none": 0.011015752255279338
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.30994152046783624,
|
| 92 |
+
"acc_stderr,none": 0.03546976959393161
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.24589636305117477,
|
| 96 |
+
"acc_stderr,none": 0.0077088095866843925,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.3,
|
| 102 |
+
"acc_stderr,none": 0.046056618647183814
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.20754716981132076,
|
| 107 |
+
"acc_stderr,none": 0.02495991802891127
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.19,
|
| 117 |
+
"acc_stderr,none": 0.039427724440366234
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.31390134529147984,
|
| 122 |
+
"acc_stderr,none": 0.03114679648297246
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.1650485436893204,
|
| 127 |
+
"acc_stderr,none": 0.036756688322331886
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2905982905982906,
|
| 132 |
+
"acc_stderr,none": 0.029745048572674064
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.33,
|
| 137 |
+
"acc_stderr,none": 0.04725815626252604
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.2388250319284802,
|
| 142 |
+
"acc_stderr,none": 0.015246803197398682
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.238562091503268,
|
| 147 |
+
"acc_stderr,none": 0.02440439492808787
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.24822695035460993,
|
| 152 |
+
"acc_stderr,none": 0.025770015644290406
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.1948529411764706,
|
| 157 |
+
"acc_stderr,none": 0.024060599423487424
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.28313253012048195,
|
| 162 |
+
"acc_stderr,none": 0.03507295431370518
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.2183945401364966,
|
| 166 |
+
"acc_stderr,none": 0.007444613745005196,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.22807017543859648,
|
| 172 |
+
"acc_stderr,none": 0.03947152782669415
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.17676767676767677,
|
| 177 |
+
"acc_stderr,none": 0.027178752639044915
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.20207253886010362,
|
| 182 |
+
"acc_stderr,none": 0.02897908979429673
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2076923076923077,
|
| 187 |
+
"acc_stderr,none": 0.02056753956724681
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2184873949579832,
|
| 192 |
+
"acc_stderr,none": 0.026841514322958938
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.2,
|
| 197 |
+
"acc_stderr,none": 0.017149858514250958
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2595419847328244,
|
| 202 |
+
"acc_stderr,none": 0.0384487613978527
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.2549019607843137,
|
| 207 |
+
"acc_stderr,none": 0.017630827375148383
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.19090909090909092,
|
| 212 |
+
"acc_stderr,none": 0.03764425585984924
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.17959183673469387,
|
| 217 |
+
"acc_stderr,none": 0.024573293589585637
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.23383084577114427,
|
| 222 |
+
"acc_stderr,none": 0.0299294154083484
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.28,
|
| 227 |
+
"acc_stderr,none": 0.045126085985421276
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.20995876942594355,
|
| 231 |
+
"acc_stderr,none": 0.007233045009100066,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.23,
|
| 237 |
+
"acc_stderr,none": 0.04229525846816506
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.18518518518518517,
|
| 242 |
+
"acc_stderr,none": 0.03355677216313142
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.18421052631578946,
|
| 247 |
+
"acc_stderr,none": 0.0315469804508223
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.24305555555555555,
|
| 252 |
+
"acc_stderr,none": 0.03586879280080342
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.14,
|
| 257 |
+
"acc_stderr,none": 0.034873508801977725
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.25,
|
| 262 |
+
"acc_stderr,none": 0.04351941398892446
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.22,
|
| 267 |
+
"acc_stderr,none": 0.0416333199893227
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.21568627450980393,
|
| 272 |
+
"acc_stderr,none": 0.04092563958237655
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.26,
|
| 277 |
+
"acc_stderr,none": 0.044084400227680794
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.26382978723404255,
|
| 282 |
+
"acc_stderr,none": 0.02880998985410298
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.23448275862068965,
|
| 287 |
+
"acc_stderr,none": 0.035306258743465914
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.21693121693121692,
|
| 292 |
+
"acc_stderr,none": 0.021227082449445045
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.17096774193548386,
|
| 297 |
+
"acc_stderr,none": 0.02141724293632157
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.1625615763546798,
|
| 302 |
+
"acc_stderr,none": 0.02596030006460558
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.3,
|
| 307 |
+
"acc_stderr,none": 0.046056618647183814
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.2111111111111111,
|
| 312 |
+
"acc_stderr,none": 0.024882116857655078
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.1986754966887417,
|
| 317 |
+
"acc_stderr,none": 0.032578473844367746
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.12962962962962962,
|
| 322 |
+
"acc_stderr,none": 0.022907883151288597
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.29464285714285715,
|
| 327 |
+
"acc_stderr,none": 0.04327040932578728
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T00-33-26.008450_lambada_multilingual.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai_mt_de": {
|
| 20 |
+
"alias": "lambada_openai_mt_de",
|
| 21 |
+
"perplexity,none": 438.1123448025884,
|
| 22 |
+
"perplexity_stderr,none": 27.631396537461868,
|
| 23 |
+
"acc,none": 0.17970114496409859,
|
| 24 |
+
"acc_stderr,none": 0.005349011697308402
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_en": {
|
| 27 |
+
"alias": "lambada_openai_mt_en",
|
| 28 |
+
"perplexity,none": 21.873502960243776,
|
| 29 |
+
"perplexity_stderr,none": 0.7063940196221459,
|
| 30 |
+
"acc,none": 0.39239278090432755,
|
| 31 |
+
"acc_stderr,none": 0.006802742619162039
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_es": {
|
| 34 |
+
"alias": "lambada_openai_mt_es",
|
| 35 |
+
"perplexity,none": 520.7083247325162,
|
| 36 |
+
"perplexity_stderr,none": 31.60312563024137,
|
| 37 |
+
"acc,none": 0.17581991073161266,
|
| 38 |
+
"acc_stderr,none": 0.005303433892159959
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_fr": {
|
| 41 |
+
"alias": "lambada_openai_mt_fr",
|
| 42 |
+
"perplexity,none": 315.69320350745926,
|
| 43 |
+
"perplexity_stderr,none": 19.029382127916357,
|
| 44 |
+
"acc,none": 0.21075101882398603,
|
| 45 |
+
"acc_stderr,none": 0.005682035322435383
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_it": {
|
| 48 |
+
"alias": "lambada_openai_mt_it",
|
| 49 |
+
"perplexity,none": 753.7215739042271,
|
| 50 |
+
"perplexity_stderr,none": 49.15367277380386,
|
| 51 |
+
"acc,none": 0.16475839316902774,
|
| 52 |
+
"acc_stderr,none": 0.005168230882239359
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T09-56-09.522548_pawsx.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.48514285714285715,
|
| 21 |
+
"acc_stderr,none": 0.0042164415074101135,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"alias": " - paws_de",
|
| 26 |
+
"acc,none": 0.4985,
|
| 27 |
+
"acc_stderr,none": 0.011183085696839198
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"alias": " - paws_en",
|
| 31 |
+
"acc,none": 0.5015,
|
| 32 |
+
"acc_stderr,none": 0.011183085696839203
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"alias": " - paws_es",
|
| 36 |
+
"acc,none": 0.532,
|
| 37 |
+
"acc_stderr,none": 0.011160209457602887
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"alias": " - paws_fr",
|
| 41 |
+
"acc,none": 0.4715,
|
| 42 |
+
"acc_stderr,none": 0.0111649542364288
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"alias": " - paws_ja",
|
| 46 |
+
"acc,none": 0.4395,
|
| 47 |
+
"acc_stderr,none": 0.011100968009384213
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"alias": " - paws_ko",
|
| 51 |
+
"acc,none": 0.445,
|
| 52 |
+
"acc_stderr,none": 0.011115272135099217
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"alias": " - paws_zh",
|
| 56 |
+
"acc,none": 0.508,
|
| 57 |
+
"acc_stderr,none": 0.011181704488030004
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T10-00-53.579856_xcopa.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5310909090909091,
|
| 21 |
+
"acc_stderr,none": 0.006728483885990303,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"alias": " - xcopa_et",
|
| 26 |
+
"acc,none": 0.504,
|
| 27 |
+
"acc_stderr,none": 0.022382357781962132
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"alias": " - xcopa_ht",
|
| 31 |
+
"acc,none": 0.51,
|
| 32 |
+
"acc_stderr,none": 0.02237859698923078
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"alias": " - xcopa_id",
|
| 36 |
+
"acc,none": 0.544,
|
| 37 |
+
"acc_stderr,none": 0.022296238348407056
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"alias": " - xcopa_it",
|
| 41 |
+
"acc,none": 0.514,
|
| 42 |
+
"acc_stderr,none": 0.022374298166353185
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"alias": " - xcopa_qu",
|
| 46 |
+
"acc,none": 0.502,
|
| 47 |
+
"acc_stderr,none": 0.02238289498648352
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"alias": " - xcopa_sw",
|
| 51 |
+
"acc,none": 0.544,
|
| 52 |
+
"acc_stderr,none": 0.022296238348407056
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"alias": " - xcopa_ta",
|
| 56 |
+
"acc,none": 0.53,
|
| 57 |
+
"acc_stderr,none": 0.022342748192502846
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"alias": " - xcopa_th",
|
| 61 |
+
"acc,none": 0.578,
|
| 62 |
+
"acc_stderr,none": 0.022109039310618556
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"alias": " - xcopa_tr",
|
| 66 |
+
"acc,none": 0.562,
|
| 67 |
+
"acc_stderr,none": 0.022210326363977417
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"alias": " - xcopa_vi",
|
| 71 |
+
"acc,none": 0.53,
|
| 72 |
+
"acc_stderr,none": 0.022342748192502843
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"alias": " - xcopa_zh",
|
| 76 |
+
"acc,none": 0.524,
|
| 77 |
+
"acc_stderr,none": 0.0223572738810164
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-13-06.581208_xnli.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3610709504685408,
|
| 21 |
+
"acc_stderr,none": 0.002478789488090545,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"alias": " - xnli_ar",
|
| 26 |
+
"acc,none": 0.3433734939759036,
|
| 27 |
+
"acc_stderr,none": 0.009517658993060705
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"alias": " - xnli_bg",
|
| 31 |
+
"acc,none": 0.3457831325301205,
|
| 32 |
+
"acc_stderr,none": 0.009533455033752768
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"alias": " - xnli_de",
|
| 36 |
+
"acc,none": 0.39718875502008033,
|
| 37 |
+
"acc_stderr,none": 0.009807915070677289
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"alias": " - xnli_el",
|
| 41 |
+
"acc,none": 0.3353413654618474,
|
| 42 |
+
"acc_stderr,none": 0.009463034891512703
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"alias": " - xnli_en",
|
| 46 |
+
"acc,none": 0.46224899598393576,
|
| 47 |
+
"acc_stderr,none": 0.009993466360872784
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"alias": " - xnli_es",
|
| 51 |
+
"acc,none": 0.344578313253012,
|
| 52 |
+
"acc_stderr,none": 0.009525590900110655
|
| 53 |
+
},
|
| 54 |
+
"xnli_fr": {
|
| 55 |
+
"alias": " - xnli_fr",
|
| 56 |
+
"acc,none": 0.41044176706827307,
|
| 57 |
+
"acc_stderr,none": 0.00985999467258512
|
| 58 |
+
},
|
| 59 |
+
"xnli_hi": {
|
| 60 |
+
"alias": " - xnli_hi",
|
| 61 |
+
"acc,none": 0.348995983935743,
|
| 62 |
+
"acc_stderr,none": 0.009554095988300674
|
| 63 |
+
},
|
| 64 |
+
"xnli_ru": {
|
| 65 |
+
"alias": " - xnli_ru",
|
| 66 |
+
"acc,none": 0.3795180722891566,
|
| 67 |
+
"acc_stderr,none": 0.009726763372837137
|
| 68 |
+
},
|
| 69 |
+
"xnli_sw": {
|
| 70 |
+
"alias": " - xnli_sw",
|
| 71 |
+
"acc,none": 0.3285140562248996,
|
| 72 |
+
"acc_stderr,none": 0.009414190734131762
|
| 73 |
+
},
|
| 74 |
+
"xnli_th": {
|
| 75 |
+
"alias": " - xnli_th",
|
| 76 |
+
"acc,none": 0.3285140562248996,
|
| 77 |
+
"acc_stderr,none": 0.009414190734131762
|
| 78 |
+
},
|
| 79 |
+
"xnli_tr": {
|
| 80 |
+
"alias": " - xnli_tr",
|
| 81 |
+
"acc,none": 0.35903614457831323,
|
| 82 |
+
"acc_stderr,none": 0.00961553399241459
|
| 83 |
+
},
|
| 84 |
+
"xnli_ur": {
|
| 85 |
+
"alias": " - xnli_ur",
|
| 86 |
+
"acc,none": 0.3333333333333333,
|
| 87 |
+
"acc_stderr,none": 0.009448900914617614
|
| 88 |
+
},
|
| 89 |
+
"xnli_vi": {
|
| 90 |
+
"alias": " - xnli_vi",
|
| 91 |
+
"acc,none": 0.36305220883534134,
|
| 92 |
+
"acc_stderr,none": 0.00963882313398499
|
| 93 |
+
},
|
| 94 |
+
"xnli_zh": {
|
| 95 |
+
"alias": " - xnli_zh",
|
| 96 |
+
"acc,none": 0.336144578313253,
|
| 97 |
+
"acc_stderr,none": 0.00946863466929353
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-52-46.021580_xstorycloze.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5168762409000662,
|
| 21 |
+
"acc_stderr,none": 0.0038693051060829046,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"alias": " - xstorycloze_ar",
|
| 26 |
+
"acc,none": 0.47253474520185307,
|
| 27 |
+
"acc_stderr,none": 0.012847698270388213
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"alias": " - xstorycloze_en",
|
| 31 |
+
"acc,none": 0.6029119788219722,
|
| 32 |
+
"acc_stderr,none": 0.012591627740247462
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"alias": " - xstorycloze_es",
|
| 36 |
+
"acc,none": 0.5241561879549967,
|
| 37 |
+
"acc_stderr,none": 0.01285210005730961
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"alias": " - xstorycloze_eu",
|
| 41 |
+
"acc,none": 0.5168762409000662,
|
| 42 |
+
"acc_stderr,none": 0.012859793919977602
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"alias": " - xstorycloze_hi",
|
| 46 |
+
"acc,none": 0.514890800794176,
|
| 47 |
+
"acc_stderr,none": 0.012861417842074004
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"alias": " - xstorycloze_id",
|
| 51 |
+
"acc,none": 0.500992720052945,
|
| 52 |
+
"acc_stderr,none": 0.012867099955422933
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"alias": " - xstorycloze_my",
|
| 56 |
+
"acc,none": 0.49503639973527463,
|
| 57 |
+
"acc_stderr,none": 0.012866491277589943
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"alias": " - xstorycloze_ru",
|
| 61 |
+
"acc,none": 0.4943745863666446,
|
| 62 |
+
"acc_stderr,none": 0.01286631092307252
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"alias": " - xstorycloze_sw",
|
| 66 |
+
"acc,none": 0.5056254136333554,
|
| 67 |
+
"acc_stderr,none": 0.012866310923072511
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"alias": " - xstorycloze_te",
|
| 71 |
+
"acc,none": 0.5420251489080079,
|
| 72 |
+
"acc_stderr,none": 0.012821595164245275
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"alias": " - xstorycloze_zh",
|
| 76 |
+
"acc,none": 0.5162144275314361,
|
| 77 |
+
"acc_stderr,none": 0.012860357805055855
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T11-55-53.206548_xwinograd.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.5927174645987863,
|
| 21 |
+
"acc_stderr,none": 0.007316470702661161,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"alias": " - xwinograd_en",
|
| 26 |
+
"acc,none": 0.6468817204301075,
|
| 27 |
+
"acc_stderr,none": 0.009914126992783658
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"alias": " - xwinograd_fr",
|
| 31 |
+
"acc,none": 0.5421686746987951,
|
| 32 |
+
"acc_stderr,none": 0.05501904358494246
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"alias": " - xwinograd_jp",
|
| 36 |
+
"acc,none": 0.5078206465067779,
|
| 37 |
+
"acc_stderr,none": 0.01615229055184455
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"alias": " - xwinograd_pt",
|
| 41 |
+
"acc,none": 0.5285171102661597,
|
| 42 |
+
"acc_stderr,none": 0.030839820992717426
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"alias": " - xwinograd_ru",
|
| 46 |
+
"acc,none": 0.546031746031746,
|
| 47 |
+
"acc_stderr,none": 0.028096800277810533
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"alias": " - xwinograd_zh",
|
| 51 |
+
"acc,none": 0.5753968253968254,
|
| 52 |
+
"acc_stderr,none": 0.022038973193044563
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-5-World-0.1B-v1-20230803-ctx4096/pad_261/0.4.8_2025-03-16T12-17-19.970767_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.1B-v1-20230803-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2315909414613303,
|
| 21 |
+
"acc_stderr,none": 0.0035530263924027183,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2452709883103082,
|
| 26 |
+
"acc_stderr,none": 0.006271012587014002,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.31746031746031744,
|
| 32 |
+
"acc_stderr,none": 0.04163453031302859
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.23030303030303031,
|
| 37 |
+
"acc_stderr,none": 0.0328766675860349
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.27450980392156865,
|
| 42 |
+
"acc_stderr,none": 0.03132179803083292
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.270042194092827,
|
| 47 |
+
"acc_stderr,none": 0.028900721906293426
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2396694214876033,
|
| 52 |
+
"acc_stderr,none": 0.03896878985070417
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.25,
|
| 57 |
+
"acc_stderr,none": 0.04186091791394607
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.22085889570552147,
|
| 62 |
+
"acc_stderr,none": 0.032591773927421776
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.24855491329479767,
|
| 67 |
+
"acc_stderr,none": 0.023267528432100174
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.23798882681564246,
|
| 72 |
+
"acc_stderr,none": 0.014242630070574885
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.19614147909967847,
|
| 77 |
+
"acc_stderr,none": 0.022552447780478026
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.2222222222222222,
|
| 82 |
+
"acc_stderr,none": 0.023132376234543325
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2470664928292047,
|
| 87 |
+
"acc_stderr,none": 0.011015752255279338
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.30994152046783624,
|
| 92 |
+
"acc_stderr,none": 0.03546976959393161
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.24589636305117477,
|
| 96 |
+
"acc_stderr,none": 0.0077088095866843925,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.3,
|
| 102 |
+
"acc_stderr,none": 0.046056618647183814
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.20754716981132076,
|
| 107 |
+
"acc_stderr,none": 0.02495991802891127
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.19,
|
| 117 |
+
"acc_stderr,none": 0.039427724440366234
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.31390134529147984,
|
| 122 |
+
"acc_stderr,none": 0.03114679648297246
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.1650485436893204,
|
| 127 |
+
"acc_stderr,none": 0.036756688322331886
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2905982905982906,
|
| 132 |
+
"acc_stderr,none": 0.029745048572674064
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.33,
|
| 137 |
+
"acc_stderr,none": 0.04725815626252604
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.2388250319284802,
|
| 142 |
+
"acc_stderr,none": 0.015246803197398682
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.238562091503268,
|
| 147 |
+
"acc_stderr,none": 0.02440439492808787
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.24822695035460993,
|
| 152 |
+
"acc_stderr,none": 0.025770015644290406
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.1948529411764706,
|
| 157 |
+
"acc_stderr,none": 0.024060599423487424
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.28313253012048195,
|
| 162 |
+
"acc_stderr,none": 0.03507295431370518
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.2183945401364966,
|
| 166 |
+
"acc_stderr,none": 0.007444613745005196,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.22807017543859648,
|
| 172 |
+
"acc_stderr,none": 0.03947152782669415
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.17676767676767677,
|
| 177 |
+
"acc_stderr,none": 0.027178752639044915
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.20207253886010362,
|
| 182 |
+
"acc_stderr,none": 0.02897908979429673
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2076923076923077,
|
| 187 |
+
"acc_stderr,none": 0.02056753956724681
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2184873949579832,
|
| 192 |
+
"acc_stderr,none": 0.026841514322958938
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.2,
|
| 197 |
+
"acc_stderr,none": 0.017149858514250958
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2595419847328244,
|
| 202 |
+
"acc_stderr,none": 0.0384487613978527
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.2549019607843137,
|
| 207 |
+
"acc_stderr,none": 0.017630827375148383
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.19090909090909092,
|
| 212 |
+
"acc_stderr,none": 0.03764425585984924
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.17959183673469387,
|
| 217 |
+
"acc_stderr,none": 0.024573293589585637
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.23383084577114427,
|
| 222 |
+
"acc_stderr,none": 0.0299294154083484
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.28,
|
| 227 |
+
"acc_stderr,none": 0.045126085985421276
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.20995876942594355,
|
| 231 |
+
"acc_stderr,none": 0.007233045009100066,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.23,
|
| 237 |
+
"acc_stderr,none": 0.04229525846816506
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.18518518518518517,
|
| 242 |
+
"acc_stderr,none": 0.03355677216313142
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.18421052631578946,
|
| 247 |
+
"acc_stderr,none": 0.0315469804508223
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.24305555555555555,
|
| 252 |
+
"acc_stderr,none": 0.03586879280080342
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.14,
|
| 257 |
+
"acc_stderr,none": 0.034873508801977725
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.25,
|
| 262 |
+
"acc_stderr,none": 0.04351941398892446
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.22,
|
| 267 |
+
"acc_stderr,none": 0.0416333199893227
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.21568627450980393,
|
| 272 |
+
"acc_stderr,none": 0.04092563958237655
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.26,
|
| 277 |
+
"acc_stderr,none": 0.044084400227680794
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.26382978723404255,
|
| 282 |
+
"acc_stderr,none": 0.02880998985410298
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.23448275862068965,
|
| 287 |
+
"acc_stderr,none": 0.035306258743465914
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.21693121693121692,
|
| 292 |
+
"acc_stderr,none": 0.021227082449445045
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.17096774193548386,
|
| 297 |
+
"acc_stderr,none": 0.02141724293632157
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.1625615763546798,
|
| 302 |
+
"acc_stderr,none": 0.02596030006460558
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.3,
|
| 307 |
+
"acc_stderr,none": 0.046056618647183814
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.2111111111111111,
|
| 312 |
+
"acc_stderr,none": 0.024882116857655078
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.1986754966887417,
|
| 317 |
+
"acc_stderr,none": 0.032578473844367746
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.12962962962962962,
|
| 322 |
+
"acc_stderr,none": 0.022907883151288597
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.29464285714285715,
|
| 327 |
+
"acc_stderr,none": 0.04327040932578728
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-14-42.123700_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 8.874438771398895,
|
| 22 |
+
"perplexity_stderr,none": 0.25110180223575285,
|
| 23 |
+
"acc,none": 0.5402678051620415,
|
| 24 |
+
"acc_stderr,none": 0.0069433502956647445
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-39-58.725375_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.34176458872734516,
|
| 22 |
+
"acc_stderr,none": 0.004733317847006545,
|
| 23 |
+
"acc_norm,none": 0.40928101971718783,
|
| 24 |
+
"acc_norm_stderr,none": 0.004906962980328275
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-42-10.675354_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.6653971708378672,
|
| 22 |
+
"acc_stderr,none": 0.011009071725162505,
|
| 23 |
+
"acc_norm,none": 0.6724700761697497,
|
| 24 |
+
"acc_norm_stderr,none": 0.010949830482825478
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-46-46.674313_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.5395622895622896,
|
| 22 |
+
"acc_stderr,none": 0.010227616386289006,
|
| 23 |
+
"acc_norm,none": 0.4877946127946128,
|
| 24 |
+
"acc_norm_stderr,none": 0.010256726235129009
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T14-49-11.748710_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.23976109215017063,
|
| 22 |
+
"acc_stderr,none": 0.012476304127453932,
|
| 23 |
+
"acc_norm,none": 0.2627986348122867,
|
| 24 |
+
"acc_norm_stderr,none": 0.012862523175351331
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-08-18.050596_glue.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"cola": {
|
| 20 |
+
"alias": "cola",
|
| 21 |
+
"mcc,none": 0.0024763761493444563,
|
| 22 |
+
"mcc_stderr,none": 0.030693706731630372
|
| 23 |
+
},
|
| 24 |
+
"mnli": {
|
| 25 |
+
"alias": "mnli",
|
| 26 |
+
"acc,none": 0.34212939378502294,
|
| 27 |
+
"acc_stderr,none": 0.004788973218637058
|
| 28 |
+
},
|
| 29 |
+
"mnli_mismatch": {
|
| 30 |
+
"alias": "mnli_mismatch",
|
| 31 |
+
"acc,none": 0.34662327095199347,
|
| 32 |
+
"acc_stderr,none": 0.004799675113044456
|
| 33 |
+
},
|
| 34 |
+
"mrpc": {
|
| 35 |
+
"alias": "mrpc",
|
| 36 |
+
"acc,none": 0.6838235294117647,
|
| 37 |
+
"acc_stderr,none": 0.023048336668420193,
|
| 38 |
+
"f1,none": 0.8122270742358079,
|
| 39 |
+
"f1_stderr,none": 0.01642422915504585
|
| 40 |
+
},
|
| 41 |
+
"qnli": {
|
| 42 |
+
"alias": "qnli",
|
| 43 |
+
"acc,none": 0.5066813106351822,
|
| 44 |
+
"acc_stderr,none": 0.006764806510150313
|
| 45 |
+
},
|
| 46 |
+
"qqp": {
|
| 47 |
+
"alias": "qqp",
|
| 48 |
+
"acc,none": 0.610907741775909,
|
| 49 |
+
"acc_stderr,none": 0.0024247535821838137,
|
| 50 |
+
"f1,none": 0.057628946264901455,
|
| 51 |
+
"f1_stderr,none": 0.002516531840072046
|
| 52 |
+
},
|
| 53 |
+
"rte": {
|
| 54 |
+
"alias": "rte",
|
| 55 |
+
"acc,none": 0.5270758122743683,
|
| 56 |
+
"acc_stderr,none": 0.030052303463143706
|
| 57 |
+
},
|
| 58 |
+
"sst2": {
|
| 59 |
+
"alias": "sst2",
|
| 60 |
+
"acc,none": 0.5149082568807339,
|
| 61 |
+
"acc_stderr,none": 0.0169343211533256
|
| 62 |
+
},
|
| 63 |
+
"wnli": {
|
| 64 |
+
"alias": "wnli",
|
| 65 |
+
"acc,none": 0.4647887323943662,
|
| 66 |
+
"acc_stderr,none": 0.0596130578497224
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-09-43.473147_winogrande.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"alias": "winogrande",
|
| 21 |
+
"acc,none": 0.531965272296764,
|
| 22 |
+
"acc_stderr,none": 0.01402373922116638
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-12-44.780584_sciq.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"alias": "sciq",
|
| 21 |
+
"acc,none": 0.869,
|
| 22 |
+
"acc_stderr,none": 0.010674874844837957,
|
| 23 |
+
"acc_norm,none": 0.809,
|
| 24 |
+
"acc_norm_stderr,none": 0.012436787112179517
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T16-52-46.693108_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.23764421022646345,
|
| 21 |
+
"acc_stderr,none": 0.003587790530224594,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.24782146652497344,
|
| 26 |
+
"acc_stderr,none": 0.0062902893350153324,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3253968253968254,
|
| 32 |
+
"acc_stderr,none": 0.041905964388711366
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.22424242424242424,
|
| 37 |
+
"acc_stderr,none": 0.03256866661681102
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.2107843137254902,
|
| 42 |
+
"acc_stderr,none": 0.028626547912437388
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.270042194092827,
|
| 47 |
+
"acc_stderr,none": 0.028900721906293433
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2396694214876033,
|
| 52 |
+
"acc_stderr,none": 0.038968789850704164
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.32407407407407407,
|
| 57 |
+
"acc_stderr,none": 0.04524596007030048
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.20245398773006135,
|
| 62 |
+
"acc_stderr,none": 0.03157065078911902
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.2398843930635838,
|
| 67 |
+
"acc_stderr,none": 0.022989592543123567
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.25027932960893856,
|
| 72 |
+
"acc_stderr,none": 0.014487500852850412
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.19935691318327975,
|
| 77 |
+
"acc_stderr,none": 0.022691033780549656
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.23148148148148148,
|
| 82 |
+
"acc_stderr,none": 0.023468429832451152
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.25358539765319427,
|
| 87 |
+
"acc_stderr,none": 0.01111171533610114
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.2982456140350877,
|
| 92 |
+
"acc_stderr,none": 0.035087719298245654
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.24460894753781784,
|
| 96 |
+
"acc_stderr,none": 0.007691719275922855,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.31,
|
| 102 |
+
"acc_stderr,none": 0.04648231987117316
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.19245283018867926,
|
| 107 |
+
"acc_stderr,none": 0.02426297983937226
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.2023121387283237,
|
| 112 |
+
"acc_stderr,none": 0.03063114553919882
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.24,
|
| 117 |
+
"acc_stderr,none": 0.042923469599092816
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.3273542600896861,
|
| 122 |
+
"acc_stderr,none": 0.03149384670994132
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.20388349514563106,
|
| 127 |
+
"acc_stderr,none": 0.03989139859531771
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2905982905982906,
|
| 132 |
+
"acc_stderr,none": 0.029745048572674064
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.29,
|
| 137 |
+
"acc_stderr,none": 0.04560480215720684
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.23243933588761176,
|
| 142 |
+
"acc_stderr,none": 0.015104550008905704
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.25163398692810457,
|
| 147 |
+
"acc_stderr,none": 0.024848018263875195
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.2695035460992908,
|
| 152 |
+
"acc_stderr,none": 0.026469036818590627
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.1801470588235294,
|
| 157 |
+
"acc_stderr,none": 0.02334516361654486
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.26506024096385544,
|
| 162 |
+
"acc_stderr,none": 0.03436024037944967
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.22099447513812154,
|
| 166 |
+
"acc_stderr,none": 0.007486677746029817,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.21929824561403508,
|
| 172 |
+
"acc_stderr,none": 0.0389243110651875
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.19696969696969696,
|
| 177 |
+
"acc_stderr,none": 0.028335609732463355
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.23316062176165803,
|
| 182 |
+
"acc_stderr,none": 0.030516111371476008
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2,
|
| 187 |
+
"acc_stderr,none": 0.020280805062535726
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.20168067226890757,
|
| 192 |
+
"acc_stderr,none": 0.026064313406304534
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.21467889908256882,
|
| 197 |
+
"acc_stderr,none": 0.01760430414925649
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.22900763358778625,
|
| 202 |
+
"acc_stderr,none": 0.036853466317118506
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.24673202614379086,
|
| 207 |
+
"acc_stderr,none": 0.0174408203674025
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.22727272727272727,
|
| 212 |
+
"acc_stderr,none": 0.04013964554072775
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.21224489795918366,
|
| 217 |
+
"acc_stderr,none": 0.026176967197866767
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.21393034825870647,
|
| 222 |
+
"acc_stderr,none": 0.0289969096933289
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.27,
|
| 227 |
+
"acc_stderr,none": 0.0446196043338474
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.23184268950206152,
|
| 231 |
+
"acc_stderr,none": 0.00750773023393252,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.22,
|
| 237 |
+
"acc_stderr,none": 0.04163331998932269
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.23703703703703705,
|
| 242 |
+
"acc_stderr,none": 0.03673731683969506
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.21052631578947367,
|
| 247 |
+
"acc_stderr,none": 0.033176727875331574
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2916666666666667,
|
| 252 |
+
"acc_stderr,none": 0.03800968060554859
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.21,
|
| 257 |
+
"acc_stderr,none": 0.040936018074033256
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.25,
|
| 262 |
+
"acc_stderr,none": 0.04351941398892446
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.24,
|
| 267 |
+
"acc_stderr,none": 0.04292346959909284
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.21568627450980393,
|
| 272 |
+
"acc_stderr,none": 0.04092563958237655
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.3,
|
| 277 |
+
"acc_stderr,none": 0.046056618647183814
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.28936170212765955,
|
| 282 |
+
"acc_stderr,none": 0.02964400657700962
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.25517241379310346,
|
| 287 |
+
"acc_stderr,none": 0.03632984052707842
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.21693121693121692,
|
| 292 |
+
"acc_stderr,none": 0.02122708244944506
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.1935483870967742,
|
| 297 |
+
"acc_stderr,none": 0.022475258525536057
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.1921182266009852,
|
| 302 |
+
"acc_stderr,none": 0.027719315709614778
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.23,
|
| 307 |
+
"acc_stderr,none": 0.04229525846816506
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.26296296296296295,
|
| 312 |
+
"acc_stderr,none": 0.026842057873833706
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2251655629139073,
|
| 317 |
+
"acc_stderr,none": 0.03410435282008937
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.16203703703703703,
|
| 322 |
+
"acc_stderr,none": 0.02513045365226846
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.2857142857142857,
|
| 327 |
+
"acc_stderr,none": 0.04287858751340456
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-10-04.238325_lambada_multilingual.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai_mt_de": {
|
| 20 |
+
"alias": "lambada_openai_mt_de",
|
| 21 |
+
"perplexity,none": 122.582684053421,
|
| 22 |
+
"perplexity_stderr,none": 7.611410508434413,
|
| 23 |
+
"acc,none": 0.2905103823015719,
|
| 24 |
+
"acc_stderr,none": 0.006325078561068121
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_en": {
|
| 27 |
+
"alias": "lambada_openai_mt_en",
|
| 28 |
+
"perplexity,none": 8.874438778769504,
|
| 29 |
+
"perplexity_stderr,none": 0.2511018027503425,
|
| 30 |
+
"acc,none": 0.5402678051620415,
|
| 31 |
+
"acc_stderr,none": 0.0069433502956647445
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_es": {
|
| 34 |
+
"alias": "lambada_openai_mt_es",
|
| 35 |
+
"perplexity,none": 134.88945576369062,
|
| 36 |
+
"perplexity_stderr,none": 7.962805036133316,
|
| 37 |
+
"acc,none": 0.3062293809431399,
|
| 38 |
+
"acc_stderr,none": 0.006421603000130909
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_fr": {
|
| 41 |
+
"alias": "lambada_openai_mt_fr",
|
| 42 |
+
"perplexity,none": 73.64587512871607,
|
| 43 |
+
"perplexity_stderr,none": 4.301058908781159,
|
| 44 |
+
"acc,none": 0.3685231903745391,
|
| 45 |
+
"acc_stderr,none": 0.006720834282814074
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_it": {
|
| 48 |
+
"alias": "lambada_openai_mt_it",
|
| 49 |
+
"perplexity,none": 113.04863122926666,
|
| 50 |
+
"perplexity_stderr,none": 7.0611616362975695,
|
| 51 |
+
"acc,none": 0.33630894624490587,
|
| 52 |
+
"acc_stderr,none": 0.006582096796438631
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-31-49.237829_pawsx.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.4947857142857143,
|
| 21 |
+
"acc_stderr,none": 0.0042167946968630675,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"alias": " - paws_de",
|
| 26 |
+
"acc,none": 0.545,
|
| 27 |
+
"acc_stderr,none": 0.011137752231145222
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"alias": " - paws_en",
|
| 31 |
+
"acc,none": 0.533,
|
| 32 |
+
"acc_stderr,none": 0.011158752568250671
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"alias": " - paws_es",
|
| 36 |
+
"acc,none": 0.515,
|
| 37 |
+
"acc_stderr,none": 0.011178102477052804
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"alias": " - paws_fr",
|
| 41 |
+
"acc,none": 0.463,
|
| 42 |
+
"acc_stderr,none": 0.011152474561478177
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"alias": " - paws_ja",
|
| 46 |
+
"acc,none": 0.4795,
|
| 47 |
+
"acc_stderr,none": 0.011173732641806813
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"alias": " - paws_ko",
|
| 51 |
+
"acc,none": 0.447,
|
| 52 |
+
"acc_stderr,none": 0.011120131683767735
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"alias": " - paws_zh",
|
| 56 |
+
"acc,none": 0.481,
|
| 57 |
+
"acc_stderr,none": 0.01117505887995606
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T17-37-40.081757_xcopa.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5401818181818182,
|
| 21 |
+
"acc_stderr,none": 0.006718472971138079,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"alias": " - xcopa_et",
|
| 26 |
+
"acc,none": 0.506,
|
| 27 |
+
"acc_stderr,none": 0.022381462412439324
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"alias": " - xcopa_ht",
|
| 31 |
+
"acc,none": 0.522,
|
| 32 |
+
"acc_stderr,none": 0.02236139673920788
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"alias": " - xcopa_id",
|
| 36 |
+
"acc,none": 0.57,
|
| 37 |
+
"acc_stderr,none": 0.022162634426652835
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"alias": " - xcopa_it",
|
| 41 |
+
"acc,none": 0.548,
|
| 42 |
+
"acc_stderr,none": 0.02227969410784342
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"alias": " - xcopa_qu",
|
| 46 |
+
"acc,none": 0.51,
|
| 47 |
+
"acc_stderr,none": 0.02237859698923078
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"alias": " - xcopa_sw",
|
| 51 |
+
"acc,none": 0.55,
|
| 52 |
+
"acc_stderr,none": 0.022270877485360437
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"alias": " - xcopa_ta",
|
| 56 |
+
"acc,none": 0.512,
|
| 57 |
+
"acc_stderr,none": 0.02237662679792717
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"alias": " - xcopa_th",
|
| 61 |
+
"acc,none": 0.534,
|
| 62 |
+
"acc_stderr,none": 0.02233126442325838
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"alias": " - xcopa_tr",
|
| 66 |
+
"acc,none": 0.538,
|
| 67 |
+
"acc_stderr,none": 0.02231833811987053
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"alias": " - xcopa_vi",
|
| 71 |
+
"acc,none": 0.576,
|
| 72 |
+
"acc_stderr,none": 0.022122993778135404
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"alias": " - xcopa_zh",
|
| 76 |
+
"acc,none": 0.576,
|
| 77 |
+
"acc_stderr,none": 0.022122993778135404
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-11-48.808152_xnli.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3847657295850067,
|
| 21 |
+
"acc_stderr,none": 0.0025079690852275174,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"alias": " - xnli_ar",
|
| 26 |
+
"acc,none": 0.3333333333333333,
|
| 27 |
+
"acc_stderr,none": 0.009448900914617616
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"alias": " - xnli_bg",
|
| 31 |
+
"acc,none": 0.37028112449799194,
|
| 32 |
+
"acc_stderr,none": 0.009678915409840288
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"alias": " - xnli_de",
|
| 36 |
+
"acc,none": 0.43012048192771085,
|
| 37 |
+
"acc_stderr,none": 0.00992371167540806
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"alias": " - xnli_el",
|
| 41 |
+
"acc,none": 0.3582329317269076,
|
| 42 |
+
"acc_stderr,none": 0.009610788482973929
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"alias": " - xnli_en",
|
| 46 |
+
"acc,none": 0.4943775100401606,
|
| 47 |
+
"acc_stderr,none": 0.010021439203777294
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"alias": " - xnli_es",
|
| 51 |
+
"acc,none": 0.39759036144578314,
|
| 52 |
+
"acc_stderr,none": 0.009809602996075804
|
| 53 |
+
},
|
| 54 |
+
"xnli_fr": {
|
| 55 |
+
"alias": " - xnli_fr",
|
| 56 |
+
"acc,none": 0.42248995983935744,
|
| 57 |
+
"acc_stderr,none": 0.009900919227857794
|
| 58 |
+
},
|
| 59 |
+
"xnli_hi": {
|
| 60 |
+
"alias": " - xnli_hi",
|
| 61 |
+
"acc,none": 0.3650602409638554,
|
| 62 |
+
"acc_stderr,none": 0.009650194822749635
|
| 63 |
+
},
|
| 64 |
+
"xnli_ru": {
|
| 65 |
+
"alias": " - xnli_ru",
|
| 66 |
+
"acc,none": 0.43775100401606426,
|
| 67 |
+
"acc_stderr,none": 0.009944099734290161
|
| 68 |
+
},
|
| 69 |
+
"xnli_sw": {
|
| 70 |
+
"alias": " - xnli_sw",
|
| 71 |
+
"acc,none": 0.3409638554216867,
|
| 72 |
+
"acc_stderr,none": 0.009501591178361544
|
| 73 |
+
},
|
| 74 |
+
"xnli_th": {
|
| 75 |
+
"alias": " - xnli_th",
|
| 76 |
+
"acc,none": 0.348995983935743,
|
| 77 |
+
"acc_stderr,none": 0.009554095988300678
|
| 78 |
+
},
|
| 79 |
+
"xnli_tr": {
|
| 80 |
+
"alias": " - xnli_tr",
|
| 81 |
+
"acc,none": 0.3718875502008032,
|
| 82 |
+
"acc_stderr,none": 0.009687507958631799
|
| 83 |
+
},
|
| 84 |
+
"xnli_ur": {
|
| 85 |
+
"alias": " - xnli_ur",
|
| 86 |
+
"acc,none": 0.3345381526104418,
|
| 87 |
+
"acc_stderr,none": 0.009457404390939166
|
| 88 |
+
},
|
| 89 |
+
"xnli_vi": {
|
| 90 |
+
"alias": " - xnli_vi",
|
| 91 |
+
"acc,none": 0.38393574297188754,
|
| 92 |
+
"acc_stderr,none": 0.009748321202534375
|
| 93 |
+
},
|
| 94 |
+
"xnli_zh": {
|
| 95 |
+
"alias": " - xnli_zh",
|
| 96 |
+
"acc,none": 0.3819277108433735,
|
| 97 |
+
"acc_stderr,none": 0.00973862791451752
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-42-29.131682_xstorycloze.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5407616870224415,
|
| 21 |
+
"acc_stderr,none": 0.003849468335294993,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"alias": " - xstorycloze_ar",
|
| 26 |
+
"acc,none": 0.48974189278623426,
|
| 27 |
+
"acc_stderr,none": 0.012864417047980472
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"alias": " - xstorycloze_en",
|
| 31 |
+
"acc,none": 0.6657842488418266,
|
| 32 |
+
"acc_stderr,none": 0.012139246810918228
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"alias": " - xstorycloze_es",
|
| 36 |
+
"acc,none": 0.5698213103904699,
|
| 37 |
+
"acc_stderr,none": 0.012741052817471078
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"alias": " - xstorycloze_eu",
|
| 41 |
+
"acc,none": 0.5188616810059563,
|
| 42 |
+
"acc_stderr,none": 0.012857966762464996
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"alias": " - xstorycloze_hi",
|
| 46 |
+
"acc,none": 0.5168762409000662,
|
| 47 |
+
"acc_stderr,none": 0.012859793919977608
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"alias": " - xstorycloze_id",
|
| 51 |
+
"acc,none": 0.5360688285903376,
|
| 52 |
+
"acc_stderr,none": 0.012833602406620017
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"alias": " - xstorycloze_my",
|
| 56 |
+
"acc,none": 0.4930509596293845,
|
| 57 |
+
"acc_stderr,none": 0.01286588257096072
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"alias": " - xstorycloze_ru",
|
| 61 |
+
"acc,none": 0.5519523494374586,
|
| 62 |
+
"acc_stderr,none": 0.012797478885304733
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"alias": " - xstorycloze_sw",
|
| 66 |
+
"acc,none": 0.5029781601588352,
|
| 67 |
+
"acc_stderr,none": 0.012866897066011228
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"alias": " - xstorycloze_te",
|
| 71 |
+
"acc,none": 0.5473196558570483,
|
| 72 |
+
"acc_stderr,none": 0.012809372866181954
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"alias": " - xstorycloze_zh",
|
| 76 |
+
"acc,none": 0.5559232296492389,
|
| 77 |
+
"acc_stderr,none": 0.012786390539820832
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T19-47-11.016382_xwinograd.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6556529557203866,
|
| 21 |
+
"acc_stderr,none": 0.007021810569184652,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"alias": " - xwinograd_en",
|
| 26 |
+
"acc,none": 0.7290322580645161,
|
| 27 |
+
"acc_stderr,none": 0.009219643045448322
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"alias": " - xwinograd_fr",
|
| 31 |
+
"acc,none": 0.6144578313253012,
|
| 32 |
+
"acc_stderr,none": 0.0537495779731939
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"alias": " - xwinograd_jp",
|
| 36 |
+
"acc,none": 0.5411887382690302,
|
| 37 |
+
"acc_stderr,none": 0.01609936161806395
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"alias": " - xwinograd_pt",
|
| 41 |
+
"acc,none": 0.55893536121673,
|
| 42 |
+
"acc_stderr,none": 0.0306747666644263
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"alias": " - xwinograd_ru",
|
| 46 |
+
"acc,none": 0.5746031746031746,
|
| 47 |
+
"acc_stderr,none": 0.027900777694976245
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"alias": " - xwinograd_zh",
|
| 51 |
+
"acc,none": 0.6428571428571429,
|
| 52 |
+
"acc_stderr,none": 0.021364573561124416
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T20-21-44.743544_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 8.874438771398895,
|
| 22 |
+
"perplexity_stderr,none": 0.25110180223575285,
|
| 23 |
+
"acc,none": 0.5402678051620415,
|
| 24 |
+
"acc_stderr,none": 0.0069433502956647445
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-02-39.723956_xnli.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3847657295850067,
|
| 21 |
+
"acc_stderr,none": 0.0025079690852275174,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"alias": " - xnli_ar",
|
| 26 |
+
"acc,none": 0.3333333333333333,
|
| 27 |
+
"acc_stderr,none": 0.009448900914617616
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"alias": " - xnli_bg",
|
| 31 |
+
"acc,none": 0.37028112449799194,
|
| 32 |
+
"acc_stderr,none": 0.009678915409840288
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"alias": " - xnli_de",
|
| 36 |
+
"acc,none": 0.43012048192771085,
|
| 37 |
+
"acc_stderr,none": 0.00992371167540806
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"alias": " - xnli_el",
|
| 41 |
+
"acc,none": 0.3582329317269076,
|
| 42 |
+
"acc_stderr,none": 0.009610788482973929
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"alias": " - xnli_en",
|
| 46 |
+
"acc,none": 0.4943775100401606,
|
| 47 |
+
"acc_stderr,none": 0.010021439203777294
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"alias": " - xnli_es",
|
| 51 |
+
"acc,none": 0.39759036144578314,
|
| 52 |
+
"acc_stderr,none": 0.009809602996075804
|
| 53 |
+
},
|
| 54 |
+
"xnli_fr": {
|
| 55 |
+
"alias": " - xnli_fr",
|
| 56 |
+
"acc,none": 0.42248995983935744,
|
| 57 |
+
"acc_stderr,none": 0.009900919227857794
|
| 58 |
+
},
|
| 59 |
+
"xnli_hi": {
|
| 60 |
+
"alias": " - xnli_hi",
|
| 61 |
+
"acc,none": 0.3650602409638554,
|
| 62 |
+
"acc_stderr,none": 0.009650194822749635
|
| 63 |
+
},
|
| 64 |
+
"xnli_ru": {
|
| 65 |
+
"alias": " - xnli_ru",
|
| 66 |
+
"acc,none": 0.43775100401606426,
|
| 67 |
+
"acc_stderr,none": 0.009944099734290161
|
| 68 |
+
},
|
| 69 |
+
"xnli_sw": {
|
| 70 |
+
"alias": " - xnli_sw",
|
| 71 |
+
"acc,none": 0.3409638554216867,
|
| 72 |
+
"acc_stderr,none": 0.009501591178361544
|
| 73 |
+
},
|
| 74 |
+
"xnli_th": {
|
| 75 |
+
"alias": " - xnli_th",
|
| 76 |
+
"acc,none": 0.348995983935743,
|
| 77 |
+
"acc_stderr,none": 0.009554095988300678
|
| 78 |
+
},
|
| 79 |
+
"xnli_tr": {
|
| 80 |
+
"alias": " - xnli_tr",
|
| 81 |
+
"acc,none": 0.3718875502008032,
|
| 82 |
+
"acc_stderr,none": 0.009687507958631799
|
| 83 |
+
},
|
| 84 |
+
"xnli_ur": {
|
| 85 |
+
"alias": " - xnli_ur",
|
| 86 |
+
"acc,none": 0.3345381526104418,
|
| 87 |
+
"acc_stderr,none": 0.009457404390939166
|
| 88 |
+
},
|
| 89 |
+
"xnli_vi": {
|
| 90 |
+
"alias": " - xnli_vi",
|
| 91 |
+
"acc,none": 0.38393574297188754,
|
| 92 |
+
"acc_stderr,none": 0.009748321202534375
|
| 93 |
+
},
|
| 94 |
+
"xnli_zh": {
|
| 95 |
+
"alias": " - xnli_zh",
|
| 96 |
+
"acc,none": 0.3819277108433735,
|
| 97 |
+
"acc_stderr,none": 0.00973862791451752
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-35-32.643193_xstorycloze.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5407616870224415,
|
| 21 |
+
"acc_stderr,none": 0.003849468335294993,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"alias": " - xstorycloze_ar",
|
| 26 |
+
"acc,none": 0.48974189278623426,
|
| 27 |
+
"acc_stderr,none": 0.012864417047980472
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"alias": " - xstorycloze_en",
|
| 31 |
+
"acc,none": 0.6657842488418266,
|
| 32 |
+
"acc_stderr,none": 0.012139246810918228
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"alias": " - xstorycloze_es",
|
| 36 |
+
"acc,none": 0.5698213103904699,
|
| 37 |
+
"acc_stderr,none": 0.012741052817471078
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"alias": " - xstorycloze_eu",
|
| 41 |
+
"acc,none": 0.5188616810059563,
|
| 42 |
+
"acc_stderr,none": 0.012857966762464996
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"alias": " - xstorycloze_hi",
|
| 46 |
+
"acc,none": 0.5168762409000662,
|
| 47 |
+
"acc_stderr,none": 0.012859793919977608
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"alias": " - xstorycloze_id",
|
| 51 |
+
"acc,none": 0.5360688285903376,
|
| 52 |
+
"acc_stderr,none": 0.012833602406620017
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"alias": " - xstorycloze_my",
|
| 56 |
+
"acc,none": 0.4930509596293845,
|
| 57 |
+
"acc_stderr,none": 0.01286588257096072
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"alias": " - xstorycloze_ru",
|
| 61 |
+
"acc,none": 0.5519523494374586,
|
| 62 |
+
"acc_stderr,none": 0.012797478885304733
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"alias": " - xstorycloze_sw",
|
| 66 |
+
"acc,none": 0.5029781601588352,
|
| 67 |
+
"acc_stderr,none": 0.012866897066011228
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"alias": " - xstorycloze_te",
|
| 71 |
+
"acc,none": 0.5473196558570483,
|
| 72 |
+
"acc_stderr,none": 0.012809372866181954
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"alias": " - xstorycloze_zh",
|
| 76 |
+
"acc,none": 0.5559232296492389,
|
| 77 |
+
"acc_stderr,none": 0.012786390539820832
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T22-39-50.695003_xwinograd.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6556529557203866,
|
| 21 |
+
"acc_stderr,none": 0.007021810569184652,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"alias": " - xwinograd_en",
|
| 26 |
+
"acc,none": 0.7290322580645161,
|
| 27 |
+
"acc_stderr,none": 0.009219643045448322
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"alias": " - xwinograd_fr",
|
| 31 |
+
"acc,none": 0.6144578313253012,
|
| 32 |
+
"acc_stderr,none": 0.0537495779731939
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"alias": " - xwinograd_jp",
|
| 36 |
+
"acc,none": 0.5411887382690302,
|
| 37 |
+
"acc_stderr,none": 0.01609936161806395
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"alias": " - xwinograd_pt",
|
| 41 |
+
"acc,none": 0.55893536121673,
|
| 42 |
+
"acc_stderr,none": 0.0306747666644263
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"alias": " - xwinograd_ru",
|
| 46 |
+
"acc,none": 0.5746031746031746,
|
| 47 |
+
"acc_stderr,none": 0.027900777694976245
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"alias": " - xwinograd_zh",
|
| 51 |
+
"acc,none": 0.6428571428571429,
|
| 52 |
+
"acc_stderr,none": 0.021364573561124416
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-5-World-0.4B-v2-20231113-ctx4096/pad_11/0.4.8_2025-03-15T23-45-11.895035_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-5-World-0.4B-v2-20231113-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.23764421022646345,
|
| 21 |
+
"acc_stderr,none": 0.003587790530224594,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.24782146652497344,
|
| 26 |
+
"acc_stderr,none": 0.0062902893350153324,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3253968253968254,
|
| 32 |
+
"acc_stderr,none": 0.041905964388711366
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.22424242424242424,
|
| 37 |
+
"acc_stderr,none": 0.03256866661681102
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.2107843137254902,
|
| 42 |
+
"acc_stderr,none": 0.028626547912437388
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.270042194092827,
|
| 47 |
+
"acc_stderr,none": 0.028900721906293433
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2396694214876033,
|
| 52 |
+
"acc_stderr,none": 0.038968789850704164
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.32407407407407407,
|
| 57 |
+
"acc_stderr,none": 0.04524596007030048
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.20245398773006135,
|
| 62 |
+
"acc_stderr,none": 0.03157065078911902
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.2398843930635838,
|
| 67 |
+
"acc_stderr,none": 0.022989592543123567
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.25027932960893856,
|
| 72 |
+
"acc_stderr,none": 0.014487500852850412
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.19935691318327975,
|
| 77 |
+
"acc_stderr,none": 0.022691033780549656
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.23148148148148148,
|
| 82 |
+
"acc_stderr,none": 0.023468429832451152
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.25358539765319427,
|
| 87 |
+
"acc_stderr,none": 0.01111171533610114
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.2982456140350877,
|
| 92 |
+
"acc_stderr,none": 0.035087719298245654
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.24460894753781784,
|
| 96 |
+
"acc_stderr,none": 0.007691719275922855,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.31,
|
| 102 |
+
"acc_stderr,none": 0.04648231987117316
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.19245283018867926,
|
| 107 |
+
"acc_stderr,none": 0.02426297983937226
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.2023121387283237,
|
| 112 |
+
"acc_stderr,none": 0.03063114553919882
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.24,
|
| 117 |
+
"acc_stderr,none": 0.042923469599092816
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.3273542600896861,
|
| 122 |
+
"acc_stderr,none": 0.03149384670994132
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.20388349514563106,
|
| 127 |
+
"acc_stderr,none": 0.03989139859531771
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2905982905982906,
|
| 132 |
+
"acc_stderr,none": 0.029745048572674064
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.29,
|
| 137 |
+
"acc_stderr,none": 0.04560480215720684
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.23243933588761176,
|
| 142 |
+
"acc_stderr,none": 0.015104550008905704
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.25163398692810457,
|
| 147 |
+
"acc_stderr,none": 0.024848018263875195
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.2695035460992908,
|
| 152 |
+
"acc_stderr,none": 0.026469036818590627
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.1801470588235294,
|
| 157 |
+
"acc_stderr,none": 0.02334516361654486
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.26506024096385544,
|
| 162 |
+
"acc_stderr,none": 0.03436024037944967
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.22099447513812154,
|
| 166 |
+
"acc_stderr,none": 0.007486677746029817,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.21929824561403508,
|
| 172 |
+
"acc_stderr,none": 0.0389243110651875
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.19696969696969696,
|
| 177 |
+
"acc_stderr,none": 0.028335609732463355
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.23316062176165803,
|
| 182 |
+
"acc_stderr,none": 0.030516111371476008
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2,
|
| 187 |
+
"acc_stderr,none": 0.020280805062535726
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.20168067226890757,
|
| 192 |
+
"acc_stderr,none": 0.026064313406304534
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.21467889908256882,
|
| 197 |
+
"acc_stderr,none": 0.01760430414925649
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.22900763358778625,
|
| 202 |
+
"acc_stderr,none": 0.036853466317118506
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.24673202614379086,
|
| 207 |
+
"acc_stderr,none": 0.0174408203674025
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.22727272727272727,
|
| 212 |
+
"acc_stderr,none": 0.04013964554072775
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.21224489795918366,
|
| 217 |
+
"acc_stderr,none": 0.026176967197866767
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.21393034825870647,
|
| 222 |
+
"acc_stderr,none": 0.0289969096933289
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.27,
|
| 227 |
+
"acc_stderr,none": 0.0446196043338474
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.23184268950206152,
|
| 231 |
+
"acc_stderr,none": 0.00750773023393252,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.22,
|
| 237 |
+
"acc_stderr,none": 0.04163331998932269
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.23703703703703705,
|
| 242 |
+
"acc_stderr,none": 0.03673731683969506
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.21052631578947367,
|
| 247 |
+
"acc_stderr,none": 0.033176727875331574
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2916666666666667,
|
| 252 |
+
"acc_stderr,none": 0.03800968060554859
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.21,
|
| 257 |
+
"acc_stderr,none": 0.040936018074033256
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.25,
|
| 262 |
+
"acc_stderr,none": 0.04351941398892446
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.24,
|
| 267 |
+
"acc_stderr,none": 0.04292346959909284
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.21568627450980393,
|
| 272 |
+
"acc_stderr,none": 0.04092563958237655
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.3,
|
| 277 |
+
"acc_stderr,none": 0.046056618647183814
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.28936170212765955,
|
| 282 |
+
"acc_stderr,none": 0.02964400657700962
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.25517241379310346,
|
| 287 |
+
"acc_stderr,none": 0.03632984052707842
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.21693121693121692,
|
| 292 |
+
"acc_stderr,none": 0.02122708244944506
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.1935483870967742,
|
| 297 |
+
"acc_stderr,none": 0.022475258525536057
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.1921182266009852,
|
| 302 |
+
"acc_stderr,none": 0.027719315709614778
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.23,
|
| 307 |
+
"acc_stderr,none": 0.04229525846816506
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.26296296296296295,
|
| 312 |
+
"acc_stderr,none": 0.026842057873833706
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2251655629139073,
|
| 317 |
+
"acc_stderr,none": 0.03410435282008937
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.16203703703703703,
|
| 322 |
+
"acc_stderr,none": 0.02513045365226846
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.2857142857142857,
|
| 327 |
+
"acc_stderr,none": 0.04287858751340456
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T09-41-13.419478_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 4.637882037692772,
|
| 22 |
+
"perplexity_stderr,none": 0.10579675670275654,
|
| 23 |
+
"acc,none": 0.6741703861828061,
|
| 24 |
+
"acc_stderr,none": 0.0065296843174760975
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-43-54.649276_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.4644493128858793,
|
| 22 |
+
"acc_stderr,none": 0.004977152746478598,
|
| 23 |
+
"acc_norm,none": 0.6107349133638718,
|
| 24 |
+
"acc_norm_stderr,none": 0.004865871290143343
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-47-38.776674_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.7437431991294886,
|
| 22 |
+
"acc_stderr,none": 0.010185787831565084,
|
| 23 |
+
"acc_norm,none": 0.7453754080522307,
|
| 24 |
+
"acc_norm_stderr,none": 0.010164432237060482
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T10-55-41.507473_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.6426767676767676,
|
| 22 |
+
"acc_stderr,none": 0.00983320561246312,
|
| 23 |
+
"acc_norm,none": 0.6216329966329966,
|
| 24 |
+
"acc_norm_stderr,none": 0.009951575683331947
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-1B6-v2.1-20240328-ctx4096/pad_11/0.4.8_2025-03-16T11-00-09.309689_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-1B6-v2.1-20240328-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.3097269624573379,
|
| 22 |
+
"acc_stderr,none": 0.01351205841523836,
|
| 23 |
+
"acc_norm,none": 0.3370307167235495,
|
| 24 |
+
"acc_norm_stderr,none": 0.013813476652902274
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T00-45-26.618704_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 3.8612709402913734,
|
| 22 |
+
"perplexity_stderr,none": 0.08105989741229948,
|
| 23 |
+
"acc,none": 0.7172520861634,
|
| 24 |
+
"acc_stderr,none": 0.006274045840971217
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-36-50.849530_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.5053774148575981,
|
| 22 |
+
"acc_stderr,none": 0.004989492828168538,
|
| 23 |
+
"acc_norm,none": 0.6835291774546903,
|
| 24 |
+
"acc_norm_stderr,none": 0.004641484273335084
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-43-09.170340_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.763873775843308,
|
| 22 |
+
"acc_stderr,none": 0.009908965890558216,
|
| 23 |
+
"acc_norm,none": 0.7633297062023939,
|
| 24 |
+
"acc_norm_stderr,none": 0.009916841655042806
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T02-57-01.574833_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.7117003367003367,
|
| 22 |
+
"acc_stderr,none": 0.009294774252029625,
|
| 23 |
+
"acc_norm,none": 0.6717171717171717,
|
| 24 |
+
"acc_norm_stderr,none": 0.009635749509262161
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T03-04-40.682395_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.35580204778157,
|
| 22 |
+
"acc_stderr,none": 0.01399057113791876,
|
| 23 |
+
"acc_norm,none": 0.39078498293515357,
|
| 24 |
+
"acc_norm_stderr,none": 0.014258563880513778
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-11-37.430416_glue.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"cola": {
|
| 20 |
+
"alias": "cola",
|
| 21 |
+
"mcc,none": 0.07139041138969343,
|
| 22 |
+
"mcc_stderr,none": 0.03374782039086001
|
| 23 |
+
},
|
| 24 |
+
"mnli": {
|
| 25 |
+
"alias": "mnli",
|
| 26 |
+
"acc,none": 0.37748344370860926,
|
| 27 |
+
"acc_stderr,none": 0.004893294795433131
|
| 28 |
+
},
|
| 29 |
+
"mnli_mismatch": {
|
| 30 |
+
"alias": "mnli_mismatch",
|
| 31 |
+
"acc,none": 0.3817127746135069,
|
| 32 |
+
"acc_stderr,none": 0.004899645239995305
|
| 33 |
+
},
|
| 34 |
+
"mrpc": {
|
| 35 |
+
"alias": "mrpc",
|
| 36 |
+
"acc,none": 0.6397058823529411,
|
| 37 |
+
"acc_stderr,none": 0.023796963985532167,
|
| 38 |
+
"f1,none": 0.7111984282907662,
|
| 39 |
+
"f1_stderr,none": 0.02254781842194292
|
| 40 |
+
},
|
| 41 |
+
"qnli": {
|
| 42 |
+
"alias": "qnli",
|
| 43 |
+
"acc,none": 0.5317591067179206,
|
| 44 |
+
"acc_stderr,none": 0.006751749019242126
|
| 45 |
+
},
|
| 46 |
+
"qqp": {
|
| 47 |
+
"alias": "qqp",
|
| 48 |
+
"acc,none": 0.6318080633193174,
|
| 49 |
+
"acc_stderr,none": 0.002398740231240921,
|
| 50 |
+
"f1,none": 0.0,
|
| 51 |
+
"f1_stderr,none": 0.0
|
| 52 |
+
},
|
| 53 |
+
"rte": {
|
| 54 |
+
"alias": "rte",
|
| 55 |
+
"acc,none": 0.6137184115523465,
|
| 56 |
+
"acc_stderr,none": 0.029307720385270512
|
| 57 |
+
},
|
| 58 |
+
"sst2": {
|
| 59 |
+
"alias": "sst2",
|
| 60 |
+
"acc,none": 0.893348623853211,
|
| 61 |
+
"acc_stderr,none": 0.010458867008246879
|
| 62 |
+
},
|
| 63 |
+
"wnli": {
|
| 64 |
+
"alias": "wnli",
|
| 65 |
+
"acc,none": 0.43661971830985913,
|
| 66 |
+
"acc_stderr,none": 0.0592793555841297
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-14-56.977956_winogrande.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"alias": "winogrande",
|
| 21 |
+
"acc,none": 0.6629834254143646,
|
| 22 |
+
"acc_stderr,none": 0.01328495576939525
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T08-29-25.494712_sciq.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"alias": "sciq",
|
| 21 |
+
"acc,none": 0.922,
|
| 22 |
+
"acc_stderr,none": 0.008484573530118585,
|
| 23 |
+
"acc_norm,none": 0.886,
|
| 24 |
+
"acc_norm_stderr,none": 0.01005510343582333
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x060-World-3B-v2.1-20240417-ctx4096/pad_11/0.4.8_2025-03-15T16-15-02.568125_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x060-World-3B-v2.1-20240417-ctx4096.pth",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.28300811850163793,
|
| 21 |
+
"acc_stderr,none": 0.0037926401279525677,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.29330499468650373,
|
| 26 |
+
"acc_stderr,none": 0.00661831036231422,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.23809523809523808,
|
| 32 |
+
"acc_stderr,none": 0.03809523809523811
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.37575757575757573,
|
| 37 |
+
"acc_stderr,none": 0.03781887353205982
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.29411764705882354,
|
| 42 |
+
"acc_stderr,none": 0.031980016601150726
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.2616033755274262,
|
| 47 |
+
"acc_stderr,none": 0.028609516716994934
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.38016528925619836,
|
| 52 |
+
"acc_stderr,none": 0.04431324501968432
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.2777777777777778,
|
| 57 |
+
"acc_stderr,none": 0.04330043749650741
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.3312883435582822,
|
| 62 |
+
"acc_stderr,none": 0.03697983910025588
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.3063583815028902,
|
| 67 |
+
"acc_stderr,none": 0.024818350129436596
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.2324022346368715,
|
| 72 |
+
"acc_stderr,none": 0.014125968754673403
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.33762057877813506,
|
| 77 |
+
"acc_stderr,none": 0.02685882587948854
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.3425925925925926,
|
| 82 |
+
"acc_stderr,none": 0.02640614597362568
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2907431551499348,
|
| 87 |
+
"acc_stderr,none": 0.011598062372851981
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.3508771929824561,
|
| 92 |
+
"acc_stderr,none": 0.03660298834049163
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.28451882845188287,
|
| 96 |
+
"acc_stderr,none": 0.008058064756071836,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.32,
|
| 102 |
+
"acc_stderr,none": 0.046882617226215034
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.24150943396226415,
|
| 107 |
+
"acc_stderr,none": 0.02634148037111836
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.2543352601156069,
|
| 112 |
+
"acc_stderr,none": 0.0332055644308557
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.32,
|
| 117 |
+
"acc_stderr,none": 0.04688261722621505
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.2242152466367713,
|
| 122 |
+
"acc_stderr,none": 0.02799153425851952
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.20388349514563106,
|
| 127 |
+
"acc_stderr,none": 0.0398913985953177
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.3247863247863248,
|
| 132 |
+
"acc_stderr,none": 0.030679022765498835
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.19,
|
| 137 |
+
"acc_stderr,none": 0.03942772444036623
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.351213282247765,
|
| 142 |
+
"acc_stderr,none": 0.01706998205149943
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.29411764705882354,
|
| 147 |
+
"acc_stderr,none": 0.026090162504279046
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.2765957446808511,
|
| 152 |
+
"acc_stderr,none": 0.026684564340461
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.22794117647058823,
|
| 157 |
+
"acc_stderr,none": 0.025483081468029804
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.2469879518072289,
|
| 162 |
+
"acc_stderr,none": 0.03357351982064536
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.27494312642183943,
|
| 166 |
+
"acc_stderr,none": 0.008044129845426675,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.24561403508771928,
|
| 172 |
+
"acc_stderr,none": 0.040493392977481425
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.29797979797979796,
|
| 177 |
+
"acc_stderr,none": 0.03258630383836556
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.31088082901554404,
|
| 182 |
+
"acc_stderr,none": 0.03340361906276586
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.22564102564102564,
|
| 187 |
+
"acc_stderr,none": 0.021193632525148533
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.23529411764705882,
|
| 192 |
+
"acc_stderr,none": 0.02755361446786379
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.27155963302752295,
|
| 197 |
+
"acc_stderr,none": 0.019069098363191445
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2900763358778626,
|
| 202 |
+
"acc_stderr,none": 0.03980066246467766
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.29248366013071897,
|
| 207 |
+
"acc_stderr,none": 0.01840341571010978
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.36363636363636365,
|
| 212 |
+
"acc_stderr,none": 0.046075820907199756
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.2612244897959184,
|
| 217 |
+
"acc_stderr,none": 0.028123429335142787
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.3034825870646766,
|
| 222 |
+
"acc_stderr,none": 0.03251006816458619
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.25,
|
| 227 |
+
"acc_stderr,none": 0.04351941398892446
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.27402473834443386,
|
| 231 |
+
"acc_stderr,none": 0.007942050435080712,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.23,
|
| 237 |
+
"acc_stderr,none": 0.042295258468165065
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.35555555555555557,
|
| 242 |
+
"acc_stderr,none": 0.04135176749720386
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.2894736842105263,
|
| 247 |
+
"acc_stderr,none": 0.03690677986137282
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2777777777777778,
|
| 252 |
+
"acc_stderr,none": 0.03745554791462457
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.19,
|
| 257 |
+
"acc_stderr,none": 0.039427724440366234
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.27,
|
| 262 |
+
"acc_stderr,none": 0.0446196043338474
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.26,
|
| 267 |
+
"acc_stderr,none": 0.0440844002276808
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.21568627450980393,
|
| 272 |
+
"acc_stderr,none": 0.04092563958237656
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.31,
|
| 277 |
+
"acc_stderr,none": 0.04648231987117316
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.2425531914893617,
|
| 282 |
+
"acc_stderr,none": 0.028020226271200217
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.3103448275862069,
|
| 287 |
+
"acc_stderr,none": 0.038552896163789485
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2698412698412698,
|
| 292 |
+
"acc_stderr,none": 0.022860838309232072
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2870967741935484,
|
| 297 |
+
"acc_stderr,none": 0.02573654274559452
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.30049261083743845,
|
| 302 |
+
"acc_stderr,none": 0.03225799476233485
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.33,
|
| 307 |
+
"acc_stderr,none": 0.047258156262526045
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.25925925925925924,
|
| 312 |
+
"acc_stderr,none": 0.026719240783712184
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2913907284768212,
|
| 317 |
+
"acc_stderr,none": 0.03710185726119994
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.22685185185185186,
|
| 322 |
+
"acc_stderr,none": 0.028561650102422263
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.30357142857142855,
|
| 327 |
+
"acc_stderr,none": 0.04364226155841044
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|