Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-06-20.531501_lambada_openai.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-21-44.119690_hellaswag.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-23-17.145963_piqa.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-26-14.237824_arc_easy.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-27-50.078594_arc_challenge.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-26-56.754045_glue.json +69 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-27-55.978255_winogrande.json +25 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-29-37.701435_sciq.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-50-27.279294_mmlu.json +330 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-00-05.534236_lambada_multilingual.json +55 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-15-59.241658_pawsx.json +60 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-20-10.333493_xcopa.json +80 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-31-47.400032_xnli.json +100 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-48-34.071254_xstorycloze.json +80 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-51-40.216025_xwinograd.json +55 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T11-47-43.816593_mmlu.json +330 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T11-56-41.258584_lambada_openai.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-14-40.804741_hellaswag.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-16-12.146728_piqa.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-19-04.686563_arc_easy.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-20-40.573806_arc_challenge.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-19-33.146311_glue.json +69 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-20-31.009617_winogrande.json +25 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-22-11.575335_sciq.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-41-59.570521_mmlu.json +330 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-51-36.484339_lambada_multilingual.json +55 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-07-25.400235_pawsx.json +60 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-57-32.308002_xcopa.json +80 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-02-57.684170_xnli.json +100 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-19-00.595561_xstorycloze.json +80 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-21-46.468616_xwinograd.json +55 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T17-22-18.851434_mmlu.json +330 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T18-54-53.797865_lambada_openai_.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T09-57-49.932590_lambada_openai.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-13-21.086739_hellaswag.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-14-54.756945_piqa.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-17-57.209289_arc_easy.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-19-33.710789_arc_challenge.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-39-38.019272_xnli.json +100 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-56-32.162685_xstorycloze.json +80 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-59-30.016094_xwinograd.json +55 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T13-49-01.062852_mmlu.json +330 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-03-10.026654_glue.json +69 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-04-32.038053_winogrande.json +25 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-09-54.775610_sciq.json +27 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-31-43.872563_mmlu.json +330 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-07-36.031022_lambada_multilingual.json +55 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-24-29.617348_pawsx.json +60 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-29-15.015293_xcopa.json +80 -0
- lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T11-51-55.842699_lambada_openai.json +25 -0
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-06-20.531501_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 12.407350784089648,
|
| 22 |
+
"perplexity_stderr,none": 0.37458781533549945,
|
| 23 |
+
"acc,none": 0.48981176013972444,
|
| 24 |
+
"acc_stderr,none": 0.006964531366864929
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-21-44.119690_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.3467436765584545,
|
| 22 |
+
"acc_stderr,none": 0.004749606196363352,
|
| 23 |
+
"acc_norm,none": 0.4219279028082055,
|
| 24 |
+
"acc_norm_stderr,none": 0.004928578106026366
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-23-17.145963_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.6713819368879217,
|
| 22 |
+
"acc_stderr,none": 0.010959127105167044,
|
| 23 |
+
"acc_norm,none": 0.6637649619151251,
|
| 24 |
+
"acc_norm_stderr,none": 0.01102234670897023
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-26-14.237824_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.5660774410774411,
|
| 22 |
+
"acc_stderr,none": 0.010169795770462104,
|
| 23 |
+
"acc_norm,none": 0.4877946127946128,
|
| 24 |
+
"acc_norm_stderr,none": 0.01025672623512901
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T19-27-50.078594_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.2363481228668942,
|
| 22 |
+
"acc_stderr,none": 0.012414960524301853,
|
| 23 |
+
"acc_norm,none": 0.2764505119453925,
|
| 24 |
+
"acc_norm_stderr,none": 0.013069662474252428
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-26-56.754045_glue.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"cola": {
|
| 20 |
+
"alias": "cola",
|
| 21 |
+
"mcc,none": 0.0,
|
| 22 |
+
"mcc_stderr,none": 0.0
|
| 23 |
+
},
|
| 24 |
+
"mnli": {
|
| 25 |
+
"alias": "mnli",
|
| 26 |
+
"acc,none": 0.31818644931227713,
|
| 27 |
+
"acc_stderr,none": 0.004701653585969689
|
| 28 |
+
},
|
| 29 |
+
"mnli_mismatch": {
|
| 30 |
+
"alias": "mnli_mismatch",
|
| 31 |
+
"acc,none": 0.31834825061025224,
|
| 32 |
+
"acc_stderr,none": 0.004698223389253123
|
| 33 |
+
},
|
| 34 |
+
"mrpc": {
|
| 35 |
+
"alias": "mrpc",
|
| 36 |
+
"acc,none": 0.31862745098039214,
|
| 37 |
+
"acc_stderr,none": 0.023095996571841474,
|
| 38 |
+
"f1,none": 0.027972027972027972,
|
| 39 |
+
"f1_stderr,none": 0.013657795743166324
|
| 40 |
+
},
|
| 41 |
+
"qnli": {
|
| 42 |
+
"alias": "qnli",
|
| 43 |
+
"acc,none": 0.49203734211971445,
|
| 44 |
+
"acc_stderr,none": 0.006764552590269392
|
| 45 |
+
},
|
| 46 |
+
"qqp": {
|
| 47 |
+
"alias": "qqp",
|
| 48 |
+
"acc,none": 0.4093742270591145,
|
| 49 |
+
"acc_stderr,none": 0.0024455128327978796,
|
| 50 |
+
"f1,none": 0.5085108572604713,
|
| 51 |
+
"f1_stderr,none": 0.002764972824540013
|
| 52 |
+
},
|
| 53 |
+
"rte": {
|
| 54 |
+
"alias": "rte",
|
| 55 |
+
"acc,none": 0.5631768953068592,
|
| 56 |
+
"acc_stderr,none": 0.029855247390314938
|
| 57 |
+
},
|
| 58 |
+
"sst2": {
|
| 59 |
+
"alias": "sst2",
|
| 60 |
+
"acc,none": 0.8188073394495413,
|
| 61 |
+
"acc_stderr,none": 0.013051249343626403
|
| 62 |
+
},
|
| 63 |
+
"wnli": {
|
| 64 |
+
"alias": "wnli",
|
| 65 |
+
"acc,none": 0.4647887323943662,
|
| 66 |
+
"acc_stderr,none": 0.05961305784972239
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-27-55.978255_winogrande.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"alias": "winogrande",
|
| 21 |
+
"acc,none": 0.5256511444356748,
|
| 22 |
+
"acc_stderr,none": 0.014033980956108558
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-29-37.701435_sciq.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"alias": "sciq",
|
| 21 |
+
"acc,none": 0.862,
|
| 22 |
+
"acc_stderr,none": 0.010912152632504417,
|
| 23 |
+
"acc_norm,none": 0.79,
|
| 24 |
+
"acc_norm_stderr,none": 0.012886662332274527
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T20-50-27.279294_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2577980344680245,
|
| 21 |
+
"acc_stderr,none": 0.003687241789650295,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2459086078639745,
|
| 26 |
+
"acc_stderr,none": 0.006272632557690886,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3333333333333333,
|
| 32 |
+
"acc_stderr,none": 0.04216370213557836
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.3151515151515151,
|
| 37 |
+
"acc_stderr,none": 0.0362773057502241
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.25,
|
| 42 |
+
"acc_stderr,none": 0.03039153369274154
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.2742616033755274,
|
| 47 |
+
"acc_stderr,none": 0.02904133351059801
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2396694214876033,
|
| 52 |
+
"acc_stderr,none": 0.03896878985070417
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.21296296296296297,
|
| 57 |
+
"acc_stderr,none": 0.0395783547198098
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.2392638036809816,
|
| 62 |
+
"acc_stderr,none": 0.03351953879521271
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.22832369942196531,
|
| 67 |
+
"acc_stderr,none": 0.022598703804321635
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.2547486033519553,
|
| 72 |
+
"acc_stderr,none": 0.014572650383409158
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.1832797427652733,
|
| 77 |
+
"acc_stderr,none": 0.021974198848265805
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.21604938271604937,
|
| 82 |
+
"acc_stderr,none": 0.022899162918445785
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2503259452411995,
|
| 87 |
+
"acc_stderr,none": 0.011064151027165441
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.2222222222222222,
|
| 92 |
+
"acc_stderr,none": 0.031885780176863984
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.2581268104280657,
|
| 96 |
+
"acc_stderr,none": 0.007836786127741097,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.24,
|
| 102 |
+
"acc_stderr,none": 0.04292346959909283
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.2943396226415094,
|
| 107 |
+
"acc_stderr,none": 0.028049186315695245
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.2543352601156069,
|
| 112 |
+
"acc_stderr,none": 0.0332055644308557
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.2,
|
| 117 |
+
"acc_stderr,none": 0.04020151261036843
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.28699551569506726,
|
| 122 |
+
"acc_stderr,none": 0.03036037971029195
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.33980582524271846,
|
| 127 |
+
"acc_stderr,none": 0.046897659372781356
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2606837606837607,
|
| 132 |
+
"acc_stderr,none": 0.028760348956523414
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.22,
|
| 137 |
+
"acc_stderr,none": 0.04163331998932269
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.21966794380587484,
|
| 142 |
+
"acc_stderr,none": 0.014805384478371158
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.28104575163398693,
|
| 147 |
+
"acc_stderr,none": 0.025738854797818705
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.25886524822695034,
|
| 152 |
+
"acc_stderr,none": 0.026129572527180848
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.3235294117647059,
|
| 157 |
+
"acc_stderr,none": 0.028418208619406794
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.21084337349397592,
|
| 162 |
+
"acc_stderr,none": 0.031755547866299194
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.2612934676633084,
|
| 166 |
+
"acc_stderr,none": 0.007931489440202161,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.23684210526315788,
|
| 172 |
+
"acc_stderr,none": 0.039994238792813344
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2878787878787879,
|
| 177 |
+
"acc_stderr,none": 0.03225883512300992
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.25906735751295334,
|
| 182 |
+
"acc_stderr,none": 0.0316187791793541
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.26153846153846155,
|
| 187 |
+
"acc_stderr,none": 0.022282141204204426
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2857142857142857,
|
| 192 |
+
"acc_stderr,none": 0.029344572500634335
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.25504587155963304,
|
| 197 |
+
"acc_stderr,none": 0.018688500856535832
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2748091603053435,
|
| 202 |
+
"acc_stderr,none": 0.03915345408847836
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.25980392156862747,
|
| 207 |
+
"acc_stderr,none": 0.017740899509177795
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.2636363636363636,
|
| 212 |
+
"acc_stderr,none": 0.04220224692971987
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.24081632653061225,
|
| 217 |
+
"acc_stderr,none": 0.027372942201788163
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.2736318407960199,
|
| 222 |
+
"acc_stderr,none": 0.031524391865554016
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.23,
|
| 227 |
+
"acc_stderr,none": 0.04229525846816505
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.2718046305106248,
|
| 231 |
+
"acc_stderr,none": 0.00790588931133328,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.22,
|
| 237 |
+
"acc_stderr,none": 0.04163331998932269
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.2222222222222222,
|
| 242 |
+
"acc_stderr,none": 0.03591444084196969
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.27631578947368424,
|
| 247 |
+
"acc_stderr,none": 0.03639057569952924
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2569444444444444,
|
| 252 |
+
"acc_stderr,none": 0.03653946969442099
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.36,
|
| 257 |
+
"acc_stderr,none": 0.04824181513244218
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.27,
|
| 262 |
+
"acc_stderr,none": 0.04461960433384741
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.3,
|
| 267 |
+
"acc_stderr,none": 0.046056618647183814
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.3431372549019608,
|
| 272 |
+
"acc_stderr,none": 0.047240073523838896
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.23,
|
| 277 |
+
"acc_stderr,none": 0.04229525846816506
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.20851063829787234,
|
| 282 |
+
"acc_stderr,none": 0.026556982117838725
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.2,
|
| 287 |
+
"acc_stderr,none": 0.033333333333333284
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2724867724867725,
|
| 292 |
+
"acc_stderr,none": 0.022930973071633356
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2903225806451613,
|
| 297 |
+
"acc_stderr,none": 0.025822106119415898
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2561576354679803,
|
| 302 |
+
"acc_stderr,none": 0.030712730070982592
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.23,
|
| 307 |
+
"acc_stderr,none": 0.04229525846816508
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.3,
|
| 312 |
+
"acc_stderr,none": 0.027940457136228416
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2781456953642384,
|
| 317 |
+
"acc_stderr,none": 0.03658603262763743
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.36574074074074076,
|
| 322 |
+
"acc_stderr,none": 0.032847388576472056
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.24107142857142858,
|
| 327 |
+
"acc_stderr,none": 0.04059867246952687
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-00-05.534236_lambada_multilingual.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai_mt_de": {
|
| 20 |
+
"alias": "lambada_openai_mt_de",
|
| 21 |
+
"perplexity,none": 217.60668191767158,
|
| 22 |
+
"perplexity_stderr,none": 13.386879667864363,
|
| 23 |
+
"acc,none": 0.24238307781874635,
|
| 24 |
+
"acc_stderr,none": 0.00597018864415414
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_en": {
|
| 27 |
+
"alias": "lambada_openai_mt_en",
|
| 28 |
+
"perplexity,none": 12.40735077022245,
|
| 29 |
+
"perplexity_stderr,none": 0.37458781378621286,
|
| 30 |
+
"acc,none": 0.48981176013972444,
|
| 31 |
+
"acc_stderr,none": 0.006964531366864929
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_es": {
|
| 34 |
+
"alias": "lambada_openai_mt_es",
|
| 35 |
+
"perplexity,none": 266.1166411896921,
|
| 36 |
+
"perplexity_stderr,none": 15.92044089608634,
|
| 37 |
+
"acc,none": 0.2501455462837182,
|
| 38 |
+
"acc_stderr,none": 0.006033883877757193
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_fr": {
|
| 41 |
+
"alias": "lambada_openai_mt_fr",
|
| 42 |
+
"perplexity,none": 128.38541768361372,
|
| 43 |
+
"perplexity_stderr,none": 7.512376165987988,
|
| 44 |
+
"acc,none": 0.31476809625460894,
|
| 45 |
+
"acc_stderr,none": 0.0064703267662255814
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_it": {
|
| 48 |
+
"alias": "lambada_openai_mt_it",
|
| 49 |
+
"perplexity,none": 209.98639759161097,
|
| 50 |
+
"perplexity_stderr,none": 13.315525534037729,
|
| 51 |
+
"acc,none": 0.2850766543760916,
|
| 52 |
+
"acc_stderr,none": 0.006289594388741717
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-15-59.241658_pawsx.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.4645714285714286,
|
| 21 |
+
"acc_stderr,none": 0.004212978865599906,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"alias": " - paws_de",
|
| 26 |
+
"acc,none": 0.4535,
|
| 27 |
+
"acc_stderr,none": 0.011134669525078671
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"alias": " - paws_en",
|
| 31 |
+
"acc,none": 0.466,
|
| 32 |
+
"acc_stderr,none": 0.01115725065242577
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"alias": " - paws_es",
|
| 36 |
+
"acc,none": 0.508,
|
| 37 |
+
"acc_stderr,none": 0.01118170448803001
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"alias": " - paws_fr",
|
| 41 |
+
"acc,none": 0.4715,
|
| 42 |
+
"acc_stderr,none": 0.011164954236428791
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"alias": " - paws_ja",
|
| 46 |
+
"acc,none": 0.4455,
|
| 47 |
+
"acc_stderr,none": 0.011116504096687392
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"alias": " - paws_ko",
|
| 51 |
+
"acc,none": 0.4555,
|
| 52 |
+
"acc_stderr,none": 0.011138757154883975
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"alias": " - paws_zh",
|
| 56 |
+
"acc,none": 0.452,
|
| 57 |
+
"acc_stderr,none": 0.01113148485052578
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-13T21-20-10.333493_xcopa.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5296363636363637,
|
| 21 |
+
"acc_stderr,none": 0.006732007807068195,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"alias": " - xcopa_et",
|
| 26 |
+
"acc,none": 0.496,
|
| 27 |
+
"acc_stderr,none": 0.02238235778196214
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"alias": " - xcopa_ht",
|
| 31 |
+
"acc,none": 0.526,
|
| 32 |
+
"acc_stderr,none": 0.02235279165091416
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"alias": " - xcopa_id",
|
| 36 |
+
"acc,none": 0.56,
|
| 37 |
+
"acc_stderr,none": 0.02222133153414303
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"alias": " - xcopa_it",
|
| 41 |
+
"acc,none": 0.524,
|
| 42 |
+
"acc_stderr,none": 0.022357273881016403
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"alias": " - xcopa_qu",
|
| 46 |
+
"acc,none": 0.5,
|
| 47 |
+
"acc_stderr,none": 0.022383074051792257
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"alias": " - xcopa_sw",
|
| 51 |
+
"acc,none": 0.528,
|
| 52 |
+
"acc_stderr,none": 0.022347949832668093
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"alias": " - xcopa_ta",
|
| 56 |
+
"acc,none": 0.534,
|
| 57 |
+
"acc_stderr,none": 0.02233126442325838
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"alias": " - xcopa_th",
|
| 61 |
+
"acc,none": 0.554,
|
| 62 |
+
"acc_stderr,none": 0.022252153078595897
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"alias": " - xcopa_tr",
|
| 66 |
+
"acc,none": 0.524,
|
| 67 |
+
"acc_stderr,none": 0.0223572738810164
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"alias": " - xcopa_vi",
|
| 71 |
+
"acc,none": 0.532,
|
| 72 |
+
"acc_stderr,none": 0.022337186479044296
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"alias": " - xcopa_zh",
|
| 76 |
+
"acc,none": 0.548,
|
| 77 |
+
"acc_stderr,none": 0.02227969410784342
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-31-47.400032_xnli.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3738152610441767,
|
| 21 |
+
"acc_stderr,none": 0.0024958379883944465,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"alias": " - xnli_ar",
|
| 26 |
+
"acc,none": 0.3385542168674699,
|
| 27 |
+
"acc_stderr,none": 0.009485250208516881
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"alias": " - xnli_bg",
|
| 31 |
+
"acc,none": 0.3646586345381526,
|
| 32 |
+
"acc_stderr,none": 0.009647934990250467
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"alias": " - xnli_de",
|
| 36 |
+
"acc,none": 0.41405622489959837,
|
| 37 |
+
"acc_stderr,none": 0.009872910116421196
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"alias": " - xnli_el",
|
| 41 |
+
"acc,none": 0.3417670682730924,
|
| 42 |
+
"acc_stderr,none": 0.009506977398287627
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"alias": " - xnli_en",
|
| 46 |
+
"acc,none": 0.4682730923694779,
|
| 47 |
+
"acc_stderr,none": 0.010001876146466708
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"alias": " - xnli_es",
|
| 51 |
+
"acc,none": 0.37751004016064255,
|
| 52 |
+
"acc_stderr,none": 0.009716681793584016
|
| 53 |
+
},
|
| 54 |
+
"xnli_fr": {
|
| 55 |
+
"alias": " - xnli_fr",
|
| 56 |
+
"acc,none": 0.42610441767068274,
|
| 57 |
+
"acc_stderr,none": 0.00991201637745908
|
| 58 |
+
},
|
| 59 |
+
"xnli_hi": {
|
| 60 |
+
"alias": " - xnli_hi",
|
| 61 |
+
"acc,none": 0.3542168674698795,
|
| 62 |
+
"acc_stderr,none": 0.009586620142951845
|
| 63 |
+
},
|
| 64 |
+
"xnli_ru": {
|
| 65 |
+
"alias": " - xnli_ru",
|
| 66 |
+
"acc,none": 0.42289156626506025,
|
| 67 |
+
"acc_stderr,none": 0.009902179034797433
|
| 68 |
+
},
|
| 69 |
+
"xnli_sw": {
|
| 70 |
+
"alias": " - xnli_sw",
|
| 71 |
+
"acc,none": 0.351004016064257,
|
| 72 |
+
"acc_stderr,none": 0.0095667538348033
|
| 73 |
+
},
|
| 74 |
+
"xnli_th": {
|
| 75 |
+
"alias": " - xnli_th",
|
| 76 |
+
"acc,none": 0.3485943775100402,
|
| 77 |
+
"acc_stderr,none": 0.009551542053301817
|
| 78 |
+
},
|
| 79 |
+
"xnli_tr": {
|
| 80 |
+
"alias": " - xnli_tr",
|
| 81 |
+
"acc,none": 0.3550200803212851,
|
| 82 |
+
"acc_stderr,none": 0.009591512730974291
|
| 83 |
+
},
|
| 84 |
+
"xnli_ur": {
|
| 85 |
+
"alias": " - xnli_ur",
|
| 86 |
+
"acc,none": 0.336144578313253,
|
| 87 |
+
"acc_stderr,none": 0.009468634669293527
|
| 88 |
+
},
|
| 89 |
+
"xnli_vi": {
|
| 90 |
+
"alias": " - xnli_vi",
|
| 91 |
+
"acc,none": 0.37028112449799194,
|
| 92 |
+
"acc_stderr,none": 0.009678915409840288
|
| 93 |
+
},
|
| 94 |
+
"xnli_zh": {
|
| 95 |
+
"alias": " - xnli_zh",
|
| 96 |
+
"acc,none": 0.3381526104417671,
|
| 97 |
+
"acc_stderr,none": 0.009482500057981024
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-48-34.071254_xstorycloze.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5253594849888695,
|
| 21 |
+
"acc_stderr,none": 0.0038631877983218758,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"alias": " - xstorycloze_ar",
|
| 26 |
+
"acc,none": 0.47650562541363334,
|
| 27 |
+
"acc_stderr,none": 0.012852912530051752
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"alias": " - xstorycloze_en",
|
| 31 |
+
"acc,none": 0.6307081403044341,
|
| 32 |
+
"acc_stderr,none": 0.01241968588127358
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"alias": " - xstorycloze_es",
|
| 36 |
+
"acc,none": 0.5433487756452681,
|
| 37 |
+
"acc_stderr,none": 0.01281867645248196
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"alias": " - xstorycloze_eu",
|
| 41 |
+
"acc,none": 0.5201853077432164,
|
| 42 |
+
"acc_stderr,none": 0.012856635706498292
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"alias": " - xstorycloze_hi",
|
| 46 |
+
"acc,none": 0.5129053606882858,
|
| 47 |
+
"acc_stderr,none": 0.012862838605728474
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"alias": " - xstorycloze_id",
|
| 51 |
+
"acc,none": 0.5069490403706155,
|
| 52 |
+
"acc_stderr,none": 0.01286588257096072
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"alias": " - xstorycloze_my",
|
| 56 |
+
"acc,none": 0.5016545334215751,
|
| 57 |
+
"acc_stderr,none": 0.012867054869163341
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"alias": " - xstorycloze_ru",
|
| 61 |
+
"acc,none": 0.5155526141628061,
|
| 62 |
+
"acc_stderr,none": 0.012860899111470784
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"alias": " - xstorycloze_sw",
|
| 66 |
+
"acc,none": 0.49636002647253474,
|
| 67 |
+
"acc_stderr,none": 0.01286678434828923
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"alias": " - xstorycloze_te",
|
| 71 |
+
"acc,none": 0.5360688285903376,
|
| 72 |
+
"acc_stderr,none": 0.012833602406620017
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"alias": " - xstorycloze_zh",
|
| 76 |
+
"acc,none": 0.5387160820648577,
|
| 77 |
+
"acc_stderr,none": 0.012828493353271535
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T10-51-40.216025_xwinograd.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.640143852551135,
|
| 21 |
+
"acc_stderr,none": 0.007122015764479138,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"alias": " - xwinograd_en",
|
| 26 |
+
"acc,none": 0.7002150537634408,
|
| 27 |
+
"acc_stderr,none": 0.00950391558599881
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"alias": " - xwinograd_fr",
|
| 31 |
+
"acc,none": 0.6144578313253012,
|
| 32 |
+
"acc_stderr,none": 0.0537495779731939
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"alias": " - xwinograd_jp",
|
| 36 |
+
"acc,none": 0.5464025026068822,
|
| 37 |
+
"acc_stderr,none": 0.016084549821364785
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"alias": " - xwinograd_pt",
|
| 41 |
+
"acc,none": 0.5247148288973384,
|
| 42 |
+
"acc_stderr,none": 0.030852343325490784
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"alias": " - xwinograd_ru",
|
| 46 |
+
"acc,none": 0.5650793650793651,
|
| 47 |
+
"acc_stderr,none": 0.027976598287184684
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"alias": " - xwinograd_zh",
|
| 51 |
+
"acc,none": 0.6527777777777778,
|
| 52 |
+
"acc_stderr,none": 0.021227675707409237
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_0/0.4.8_2025-03-14T11-47-43.816593_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 0,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
0
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2577980344680245,
|
| 21 |
+
"acc_stderr,none": 0.003687241789650295,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2459086078639745,
|
| 26 |
+
"acc_stderr,none": 0.006272632557690886,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3333333333333333,
|
| 32 |
+
"acc_stderr,none": 0.04216370213557836
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.3151515151515151,
|
| 37 |
+
"acc_stderr,none": 0.0362773057502241
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.25,
|
| 42 |
+
"acc_stderr,none": 0.03039153369274154
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.2742616033755274,
|
| 47 |
+
"acc_stderr,none": 0.02904133351059801
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2396694214876033,
|
| 52 |
+
"acc_stderr,none": 0.03896878985070417
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.21296296296296297,
|
| 57 |
+
"acc_stderr,none": 0.0395783547198098
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.2392638036809816,
|
| 62 |
+
"acc_stderr,none": 0.03351953879521271
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.22832369942196531,
|
| 67 |
+
"acc_stderr,none": 0.022598703804321635
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.2547486033519553,
|
| 72 |
+
"acc_stderr,none": 0.014572650383409158
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.1832797427652733,
|
| 77 |
+
"acc_stderr,none": 0.021974198848265805
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.21604938271604937,
|
| 82 |
+
"acc_stderr,none": 0.022899162918445785
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2503259452411995,
|
| 87 |
+
"acc_stderr,none": 0.011064151027165441
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.2222222222222222,
|
| 92 |
+
"acc_stderr,none": 0.031885780176863984
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.2581268104280657,
|
| 96 |
+
"acc_stderr,none": 0.007836786127741097,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.24,
|
| 102 |
+
"acc_stderr,none": 0.04292346959909283
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.2943396226415094,
|
| 107 |
+
"acc_stderr,none": 0.028049186315695245
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.2543352601156069,
|
| 112 |
+
"acc_stderr,none": 0.0332055644308557
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.2,
|
| 117 |
+
"acc_stderr,none": 0.04020151261036843
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.28699551569506726,
|
| 122 |
+
"acc_stderr,none": 0.03036037971029195
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.33980582524271846,
|
| 127 |
+
"acc_stderr,none": 0.046897659372781356
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2606837606837607,
|
| 132 |
+
"acc_stderr,none": 0.028760348956523414
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.22,
|
| 137 |
+
"acc_stderr,none": 0.04163331998932269
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.21966794380587484,
|
| 142 |
+
"acc_stderr,none": 0.014805384478371158
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.28104575163398693,
|
| 147 |
+
"acc_stderr,none": 0.025738854797818705
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.25886524822695034,
|
| 152 |
+
"acc_stderr,none": 0.026129572527180848
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.3235294117647059,
|
| 157 |
+
"acc_stderr,none": 0.028418208619406794
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.21084337349397592,
|
| 162 |
+
"acc_stderr,none": 0.031755547866299194
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.2612934676633084,
|
| 166 |
+
"acc_stderr,none": 0.007931489440202161,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.23684210526315788,
|
| 172 |
+
"acc_stderr,none": 0.039994238792813344
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2878787878787879,
|
| 177 |
+
"acc_stderr,none": 0.03225883512300992
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.25906735751295334,
|
| 182 |
+
"acc_stderr,none": 0.0316187791793541
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.26153846153846155,
|
| 187 |
+
"acc_stderr,none": 0.022282141204204426
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2857142857142857,
|
| 192 |
+
"acc_stderr,none": 0.029344572500634335
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.25504587155963304,
|
| 197 |
+
"acc_stderr,none": 0.018688500856535832
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2748091603053435,
|
| 202 |
+
"acc_stderr,none": 0.03915345408847836
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.25980392156862747,
|
| 207 |
+
"acc_stderr,none": 0.017740899509177795
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.2636363636363636,
|
| 212 |
+
"acc_stderr,none": 0.04220224692971987
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.24081632653061225,
|
| 217 |
+
"acc_stderr,none": 0.027372942201788163
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.2736318407960199,
|
| 222 |
+
"acc_stderr,none": 0.031524391865554016
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.23,
|
| 227 |
+
"acc_stderr,none": 0.04229525846816505
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.2718046305106248,
|
| 231 |
+
"acc_stderr,none": 0.00790588931133328,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.22,
|
| 237 |
+
"acc_stderr,none": 0.04163331998932269
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.2222222222222222,
|
| 242 |
+
"acc_stderr,none": 0.03591444084196969
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.27631578947368424,
|
| 247 |
+
"acc_stderr,none": 0.03639057569952924
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2569444444444444,
|
| 252 |
+
"acc_stderr,none": 0.03653946969442099
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.36,
|
| 257 |
+
"acc_stderr,none": 0.04824181513244218
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.27,
|
| 262 |
+
"acc_stderr,none": 0.04461960433384741
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.3,
|
| 267 |
+
"acc_stderr,none": 0.046056618647183814
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.3431372549019608,
|
| 272 |
+
"acc_stderr,none": 0.047240073523838896
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.23,
|
| 277 |
+
"acc_stderr,none": 0.04229525846816506
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.20851063829787234,
|
| 282 |
+
"acc_stderr,none": 0.026556982117838725
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.2,
|
| 287 |
+
"acc_stderr,none": 0.033333333333333284
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2724867724867725,
|
| 292 |
+
"acc_stderr,none": 0.022930973071633356
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2903225806451613,
|
| 297 |
+
"acc_stderr,none": 0.025822106119415898
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2561576354679803,
|
| 302 |
+
"acc_stderr,none": 0.030712730070982592
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.23,
|
| 307 |
+
"acc_stderr,none": 0.04229525846816508
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.3,
|
| 312 |
+
"acc_stderr,none": 0.027940457136228416
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2781456953642384,
|
| 317 |
+
"acc_stderr,none": 0.03658603262763743
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.36574074074074076,
|
| 322 |
+
"acc_stderr,none": 0.032847388576472056
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.24107142857142858,
|
| 327 |
+
"acc_stderr,none": 0.04059867246952687
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T11-56-41.258584_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 12.642543091460501,
|
| 22 |
+
"perplexity_stderr,none": 0.38123879021241674,
|
| 23 |
+
"acc,none": 0.4814671065398797,
|
| 24 |
+
"acc_stderr,none": 0.006961190829726007
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-14-40.804741_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.34475204142601074,
|
| 22 |
+
"acc_stderr,none": 0.00474316003427116,
|
| 23 |
+
"acc_norm,none": 0.4213304122684724,
|
| 24 |
+
"acc_norm_stderr,none": 0.0049276318064775575
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-16-12.146728_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.6730141458106638,
|
| 22 |
+
"acc_stderr,none": 0.010945157126978227,
|
| 23 |
+
"acc_norm,none": 0.6653971708378672,
|
| 24 |
+
"acc_norm_stderr,none": 0.011009071725162503
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-19-04.686563_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.593013468013468,
|
| 22 |
+
"acc_stderr,none": 0.010080695355466593,
|
| 23 |
+
"acc_norm,none": 0.5437710437710438,
|
| 24 |
+
"acc_norm_stderr,none": 0.010220394383722018
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T12-20-40.573806_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.2551194539249147,
|
| 22 |
+
"acc_stderr,none": 0.0127390386952021,
|
| 23 |
+
"acc_norm,none": 0.2909556313993174,
|
| 24 |
+
"acc_norm_stderr,none": 0.013273077865907593
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-19-33.146311_glue.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"cola": {
|
| 20 |
+
"alias": "cola",
|
| 21 |
+
"mcc,none": 0.0,
|
| 22 |
+
"mcc_stderr,none": 0.0
|
| 23 |
+
},
|
| 24 |
+
"mnli": {
|
| 25 |
+
"alias": "mnli",
|
| 26 |
+
"acc,none": 0.31900152827305145,
|
| 27 |
+
"acc_stderr,none": 0.00470485695337838
|
| 28 |
+
},
|
| 29 |
+
"mnli_mismatch": {
|
| 30 |
+
"alias": "mnli_mismatch",
|
| 31 |
+
"acc,none": 0.31682262001627337,
|
| 32 |
+
"acc_stderr,none": 0.0046921942364258435
|
| 33 |
+
},
|
| 34 |
+
"mrpc": {
|
| 35 |
+
"alias": "mrpc",
|
| 36 |
+
"acc,none": 0.3799019607843137,
|
| 37 |
+
"acc_stderr,none": 0.024058510831539842,
|
| 38 |
+
"f1,none": 0.24925816023738873,
|
| 39 |
+
"f1_stderr,none": 0.031299043218421745
|
| 40 |
+
},
|
| 41 |
+
"qnli": {
|
| 42 |
+
"alias": "qnli",
|
| 43 |
+
"acc,none": 0.48892549881017755,
|
| 44 |
+
"acc_stderr,none": 0.006763750866374647
|
| 45 |
+
},
|
| 46 |
+
"qqp": {
|
| 47 |
+
"alias": "qqp",
|
| 48 |
+
"acc,none": 0.4644076181053673,
|
| 49 |
+
"acc_stderr,none": 0.002480392237462389,
|
| 50 |
+
"f1,none": 0.46043057908900625,
|
| 51 |
+
"f1_stderr,none": 0.0030958101543491746
|
| 52 |
+
},
|
| 53 |
+
"rte": {
|
| 54 |
+
"alias": "rte",
|
| 55 |
+
"acc,none": 0.5523465703971119,
|
| 56 |
+
"acc_stderr,none": 0.02993107036293953
|
| 57 |
+
},
|
| 58 |
+
"sst2": {
|
| 59 |
+
"alias": "sst2",
|
| 60 |
+
"acc,none": 0.801605504587156,
|
| 61 |
+
"acc_stderr,none": 0.013512511513295078
|
| 62 |
+
},
|
| 63 |
+
"wnli": {
|
| 64 |
+
"alias": "wnli",
|
| 65 |
+
"acc,none": 0.5211267605633803,
|
| 66 |
+
"acc_stderr,none": 0.05970805879899504
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-20-31.009617_winogrande.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"alias": "winogrande",
|
| 21 |
+
"acc,none": 0.5272296764009471,
|
| 22 |
+
"acc_stderr,none": 0.014031631629827698
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-22-11.575335_sciq.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"alias": "sciq",
|
| 21 |
+
"acc,none": 0.863,
|
| 22 |
+
"acc_stderr,none": 0.010878848714333299,
|
| 23 |
+
"acc_norm,none": 0.81,
|
| 24 |
+
"acc_norm_stderr,none": 0.012411851354816324
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-41-59.570521_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.25409485828229594,
|
| 21 |
+
"acc_stderr,none": 0.003673285974361475,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2454835281615303,
|
| 26 |
+
"acc_stderr,none": 0.006269554905683639,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3412698412698413,
|
| 32 |
+
"acc_stderr,none": 0.04240799327574924
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.3151515151515151,
|
| 37 |
+
"acc_stderr,none": 0.0362773057502241
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.24509803921568626,
|
| 42 |
+
"acc_stderr,none": 0.030190282453501943
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.2742616033755274,
|
| 47 |
+
"acc_stderr,none": 0.02904133351059801
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.23140495867768596,
|
| 52 |
+
"acc_stderr,none": 0.03849856098794088
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.21296296296296297,
|
| 57 |
+
"acc_stderr,none": 0.039578354719809805
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.26380368098159507,
|
| 62 |
+
"acc_stderr,none": 0.03462419931615624
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.2398843930635838,
|
| 67 |
+
"acc_stderr,none": 0.022989592543123567
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.24581005586592178,
|
| 72 |
+
"acc_stderr,none": 0.01440029642922559
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.1864951768488746,
|
| 77 |
+
"acc_stderr,none": 0.022122439772480764
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.2191358024691358,
|
| 82 |
+
"acc_stderr,none": 0.023016705640262175
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.24771838331160365,
|
| 87 |
+
"acc_stderr,none": 0.011025499291443735
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.22807017543859648,
|
| 92 |
+
"acc_stderr,none": 0.03218093795602357
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.25523012552301255,
|
| 96 |
+
"acc_stderr,none": 0.00781936823628676,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.22,
|
| 102 |
+
"acc_stderr,none": 0.04163331998932269
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.26037735849056604,
|
| 107 |
+
"acc_stderr,none": 0.02700876609070809
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.22,
|
| 117 |
+
"acc_stderr,none": 0.0416333199893227
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.32286995515695066,
|
| 122 |
+
"acc_stderr,none": 0.03138147637575498
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.2912621359223301,
|
| 127 |
+
"acc_stderr,none": 0.04498676320572924
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2863247863247863,
|
| 132 |
+
"acc_stderr,none": 0.029614323690456648
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.24,
|
| 137 |
+
"acc_stderr,none": 0.04292346959909283
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.23371647509578544,
|
| 142 |
+
"acc_stderr,none": 0.015133383278988832
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.23529411764705882,
|
| 147 |
+
"acc_stderr,none": 0.024288619466046116
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.2553191489361702,
|
| 152 |
+
"acc_stderr,none": 0.02601199293090201
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.2977941176470588,
|
| 157 |
+
"acc_stderr,none": 0.027778298701545443
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.22289156626506024,
|
| 162 |
+
"acc_stderr,none": 0.03240004825594688
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.25901852453688656,
|
| 166 |
+
"acc_stderr,none": 0.007907951917759793,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.2719298245614035,
|
| 172 |
+
"acc_stderr,none": 0.041857744240220575
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2828282828282828,
|
| 177 |
+
"acc_stderr,none": 0.032087795587867514
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.26424870466321243,
|
| 182 |
+
"acc_stderr,none": 0.03182155050916647
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.25384615384615383,
|
| 187 |
+
"acc_stderr,none": 0.022066054378726253
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2689075630252101,
|
| 192 |
+
"acc_stderr,none": 0.028801392193631276
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.24403669724770644,
|
| 197 |
+
"acc_stderr,none": 0.018415286351416406
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2900763358778626,
|
| 202 |
+
"acc_stderr,none": 0.03980066246467765
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.26143790849673204,
|
| 207 |
+
"acc_stderr,none": 0.017776947157528027
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.22727272727272727,
|
| 212 |
+
"acc_stderr,none": 0.04013964554072774
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.24489795918367346,
|
| 217 |
+
"acc_stderr,none": 0.027529637440174923
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.2835820895522388,
|
| 222 |
+
"acc_stderr,none": 0.03187187537919797
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.23,
|
| 227 |
+
"acc_stderr,none": 0.042295258468165065
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.26102124960355216,
|
| 231 |
+
"acc_stderr,none": 0.007821062178349759,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.19,
|
| 237 |
+
"acc_stderr,none": 0.039427724440366234
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.21481481481481482,
|
| 242 |
+
"acc_stderr,none": 0.035478541985608236
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.2894736842105263,
|
| 247 |
+
"acc_stderr,none": 0.036906779861372814
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2361111111111111,
|
| 252 |
+
"acc_stderr,none": 0.03551446610810826
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.34,
|
| 257 |
+
"acc_stderr,none": 0.047609522856952365
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.28,
|
| 262 |
+
"acc_stderr,none": 0.04512608598542128
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.25,
|
| 267 |
+
"acc_stderr,none": 0.04351941398892446
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.28431372549019607,
|
| 272 |
+
"acc_stderr,none": 0.04488482852329017
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.24,
|
| 277 |
+
"acc_stderr,none": 0.042923469599092816
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.2170212765957447,
|
| 282 |
+
"acc_stderr,none": 0.02694748312149622
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.2206896551724138,
|
| 287 |
+
"acc_stderr,none": 0.03455930201924813
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2671957671957672,
|
| 292 |
+
"acc_stderr,none": 0.022789673145776575
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2806451612903226,
|
| 297 |
+
"acc_stderr,none": 0.025560604721022888
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2561576354679803,
|
| 302 |
+
"acc_stderr,none": 0.030712730070982592
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.24,
|
| 307 |
+
"acc_stderr,none": 0.04292346959909282
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.2740740740740741,
|
| 312 |
+
"acc_stderr,none": 0.027195934804085622
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.24503311258278146,
|
| 317 |
+
"acc_stderr,none": 0.03511807571804726
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.3287037037037037,
|
| 322 |
+
"acc_stderr,none": 0.03203614084670058
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.25,
|
| 327 |
+
"acc_stderr,none": 0.04109974682633932
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T13-51-36.484339_lambada_multilingual.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai_mt_de": {
|
| 20 |
+
"alias": "lambada_openai_mt_de",
|
| 21 |
+
"perplexity,none": 212.25983991266492,
|
| 22 |
+
"perplexity_stderr,none": 13.036191156227552,
|
| 23 |
+
"acc,none": 0.2456821269163594,
|
| 24 |
+
"acc_stderr,none": 0.0059975800540142
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_en": {
|
| 27 |
+
"alias": "lambada_openai_mt_en",
|
| 28 |
+
"perplexity,none": 12.64254308002806,
|
| 29 |
+
"perplexity_stderr,none": 0.3812387886051039,
|
| 30 |
+
"acc,none": 0.4814671065398797,
|
| 31 |
+
"acc_stderr,none": 0.006961190829726007
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_es": {
|
| 34 |
+
"alias": "lambada_openai_mt_es",
|
| 35 |
+
"perplexity,none": 266.87295366430163,
|
| 36 |
+
"perplexity_stderr,none": 15.932942020346013,
|
| 37 |
+
"acc,none": 0.2536386570929556,
|
| 38 |
+
"acc_stderr,none": 0.006061698956508257
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_fr": {
|
| 41 |
+
"alias": "lambada_openai_mt_fr",
|
| 42 |
+
"perplexity,none": 129.37466016136514,
|
| 43 |
+
"perplexity_stderr,none": 7.571204962328844,
|
| 44 |
+
"acc,none": 0.31593246652435475,
|
| 45 |
+
"acc_stderr,none": 0.0064767732277837935
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_it": {
|
| 48 |
+
"alias": "lambada_openai_mt_it",
|
| 49 |
+
"perplexity,none": 209.52126390403453,
|
| 50 |
+
"perplexity_stderr,none": 13.299710278444714,
|
| 51 |
+
"acc,none": 0.2856588395109645,
|
| 52 |
+
"acc_stderr,none": 0.00629344939005611
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-07-25.400235_pawsx.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.4612857142857143,
|
| 21 |
+
"acc_stderr,none": 0.004212518419096327,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"alias": " - paws_de",
|
| 26 |
+
"acc,none": 0.451,
|
| 27 |
+
"acc_stderr,none": 0.011129305041886329
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"alias": " - paws_en",
|
| 31 |
+
"acc,none": 0.4705,
|
| 32 |
+
"acc_stderr,none": 0.011163654804511655
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"alias": " - paws_es",
|
| 36 |
+
"acc,none": 0.4885,
|
| 37 |
+
"acc_stderr,none": 0.011180177690296084
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"alias": " - paws_fr",
|
| 41 |
+
"acc,none": 0.4585,
|
| 42 |
+
"acc_stderr,none": 0.011144549137930344
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"alias": " - paws_ja",
|
| 46 |
+
"acc,none": 0.4415,
|
| 47 |
+
"acc_stderr,none": 0.011106329288974695
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"alias": " - paws_ko",
|
| 51 |
+
"acc,none": 0.459,
|
| 52 |
+
"acc_stderr,none": 0.011145474902641254
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"alias": " - paws_zh",
|
| 56 |
+
"acc,none": 0.46,
|
| 57 |
+
"acc_stderr,none": 0.01114729254418001
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T14-57-32.308002_xcopa.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5330909090909091,
|
| 21 |
+
"acc_stderr,none": 0.0067289909324144215,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"alias": " - xcopa_et",
|
| 26 |
+
"acc,none": 0.496,
|
| 27 |
+
"acc_stderr,none": 0.02238235778196214
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"alias": " - xcopa_ht",
|
| 31 |
+
"acc,none": 0.544,
|
| 32 |
+
"acc_stderr,none": 0.022296238348407053
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"alias": " - xcopa_id",
|
| 36 |
+
"acc,none": 0.556,
|
| 37 |
+
"acc_stderr,none": 0.022242244375731024
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"alias": " - xcopa_it",
|
| 41 |
+
"acc,none": 0.532,
|
| 42 |
+
"acc_stderr,none": 0.022337186479044292
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"alias": " - xcopa_qu",
|
| 46 |
+
"acc,none": 0.502,
|
| 47 |
+
"acc_stderr,none": 0.022382894986483524
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"alias": " - xcopa_sw",
|
| 51 |
+
"acc,none": 0.528,
|
| 52 |
+
"acc_stderr,none": 0.022347949832668093
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"alias": " - xcopa_ta",
|
| 56 |
+
"acc,none": 0.538,
|
| 57 |
+
"acc_stderr,none": 0.022318338119870523
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"alias": " - xcopa_th",
|
| 61 |
+
"acc,none": 0.56,
|
| 62 |
+
"acc_stderr,none": 0.022221331534143015
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"alias": " - xcopa_tr",
|
| 66 |
+
"acc,none": 0.526,
|
| 67 |
+
"acc_stderr,none": 0.022352791650914167
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"alias": " - xcopa_vi",
|
| 71 |
+
"acc,none": 0.534,
|
| 72 |
+
"acc_stderr,none": 0.022331264423258383
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"alias": " - xcopa_zh",
|
| 76 |
+
"acc,none": 0.548,
|
| 77 |
+
"acc_stderr,none": 0.02227969410784342
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-02-57.684170_xnli.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.37566265060240966,
|
| 21 |
+
"acc_stderr,none": 0.0024964839295963676,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"alias": " - xnli_ar",
|
| 26 |
+
"acc,none": 0.3405622489959839,
|
| 27 |
+
"acc_stderr,none": 0.009498886690274442
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"alias": " - xnli_bg",
|
| 31 |
+
"acc,none": 0.37269076305220883,
|
| 32 |
+
"acc_stderr,none": 0.009691761259693463
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"alias": " - xnli_de",
|
| 36 |
+
"acc,none": 0.41686746987951806,
|
| 37 |
+
"acc_stderr,none": 0.009882576606533236
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"alias": " - xnli_el",
|
| 41 |
+
"acc,none": 0.3369477911646586,
|
| 42 |
+
"acc_stderr,none": 0.009474203778757712
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"alias": " - xnli_en",
|
| 46 |
+
"acc,none": 0.4827309236947791,
|
| 47 |
+
"acc_stderr,none": 0.010016093498409703
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"alias": " - xnli_es",
|
| 51 |
+
"acc,none": 0.3815261044176707,
|
| 52 |
+
"acc_stderr,none": 0.00973666813309817
|
| 53 |
+
},
|
| 54 |
+
"xnli_fr": {
|
| 55 |
+
"alias": " - xnli_fr",
|
| 56 |
+
"acc,none": 0.42730923694779116,
|
| 57 |
+
"acc_stderr,none": 0.009915595034908124
|
| 58 |
+
},
|
| 59 |
+
"xnli_hi": {
|
| 60 |
+
"alias": " - xnli_hi",
|
| 61 |
+
"acc,none": 0.3514056224899598,
|
| 62 |
+
"acc_stderr,none": 0.00956926307982396
|
| 63 |
+
},
|
| 64 |
+
"xnli_ru": {
|
| 65 |
+
"alias": " - xnli_ru",
|
| 66 |
+
"acc,none": 0.43333333333333335,
|
| 67 |
+
"acc_stderr,none": 0.009932588282324236
|
| 68 |
+
},
|
| 69 |
+
"xnli_sw": {
|
| 70 |
+
"alias": " - xnli_sw",
|
| 71 |
+
"acc,none": 0.3465863453815261,
|
| 72 |
+
"acc_stderr,none": 0.009538660220459
|
| 73 |
+
},
|
| 74 |
+
"xnli_th": {
|
| 75 |
+
"alias": " - xnli_th",
|
| 76 |
+
"acc,none": 0.3397590361445783,
|
| 77 |
+
"acc_stderr,none": 0.009493454925438249
|
| 78 |
+
},
|
| 79 |
+
"xnli_tr": {
|
| 80 |
+
"alias": " - xnli_tr",
|
| 81 |
+
"acc,none": 0.3598393574297189,
|
| 82 |
+
"acc_stderr,none": 0.009620250217765997
|
| 83 |
+
},
|
| 84 |
+
"xnli_ur": {
|
| 85 |
+
"alias": " - xnli_ur",
|
| 86 |
+
"acc,none": 0.3345381526104418,
|
| 87 |
+
"acc_stderr,none": 0.009457404390939166
|
| 88 |
+
},
|
| 89 |
+
"xnli_vi": {
|
| 90 |
+
"alias": " - xnli_vi",
|
| 91 |
+
"acc,none": 0.3690763052208835,
|
| 92 |
+
"acc_stderr,none": 0.00967239564447043
|
| 93 |
+
},
|
| 94 |
+
"xnli_zh": {
|
| 95 |
+
"alias": " - xnli_zh",
|
| 96 |
+
"acc,none": 0.3417670682730924,
|
| 97 |
+
"acc_stderr,none": 0.009506977398287621
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-19-00.595561_xstorycloze.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5257204740990313,
|
| 21 |
+
"acc_stderr,none": 0.003863789045312555,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"alias": " - xstorycloze_ar",
|
| 26 |
+
"acc,none": 0.4818001323626737,
|
| 27 |
+
"acc_stderr,none": 0.012858598401831848
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"alias": " - xstorycloze_en",
|
| 31 |
+
"acc,none": 0.6293845135671741,
|
| 32 |
+
"acc_stderr,none": 0.012428861084065903
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"alias": " - xstorycloze_es",
|
| 36 |
+
"acc,none": 0.5400397088021178,
|
| 37 |
+
"acc_stderr,none": 0.012825802370083987
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"alias": " - xstorycloze_eu",
|
| 41 |
+
"acc,none": 0.5201853077432164,
|
| 42 |
+
"acc_stderr,none": 0.012856635706498292
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"alias": " - xstorycloze_hi",
|
| 46 |
+
"acc,none": 0.514890800794176,
|
| 47 |
+
"acc_stderr,none": 0.012861417842074004
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"alias": " - xstorycloze_id",
|
| 51 |
+
"acc,none": 0.5069490403706155,
|
| 52 |
+
"acc_stderr,none": 0.012865882570960722
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"alias": " - xstorycloze_my",
|
| 56 |
+
"acc,none": 0.5029781601588352,
|
| 57 |
+
"acc_stderr,none": 0.012866897066011233
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"alias": " - xstorycloze_ru",
|
| 61 |
+
"acc,none": 0.5129053606882858,
|
| 62 |
+
"acc_stderr,none": 0.012862838605728476
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"alias": " - xstorycloze_sw",
|
| 66 |
+
"acc,none": 0.4990072799470549,
|
| 67 |
+
"acc_stderr,none": 0.012867099955422926
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"alias": " - xstorycloze_te",
|
| 71 |
+
"acc,none": 0.5340833884844474,
|
| 72 |
+
"acc_stderr,none": 0.012837195610619434
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"alias": " - xstorycloze_zh",
|
| 76 |
+
"acc,none": 0.5407015221707479,
|
| 77 |
+
"acc_stderr,none": 0.012824422739625578
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T16-21-46.468616_xwinograd.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6414924702180266,
|
| 21 |
+
"acc_stderr,none": 0.007097604851218048,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"alias": " - xwinograd_en",
|
| 26 |
+
"acc,none": 0.7088172043010753,
|
| 27 |
+
"acc_stderr,none": 0.009423927122193903
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"alias": " - xwinograd_fr",
|
| 31 |
+
"acc,none": 0.6144578313253012,
|
| 32 |
+
"acc_stderr,none": 0.0537495779731939
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"alias": " - xwinograd_jp",
|
| 36 |
+
"acc,none": 0.5338894681960376,
|
| 37 |
+
"acc_stderr,none": 0.016117117806017902
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"alias": " - xwinograd_pt",
|
| 41 |
+
"acc,none": 0.532319391634981,
|
| 42 |
+
"acc_stderr,none": 0.030825503526303782
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"alias": " - xwinograd_ru",
|
| 46 |
+
"acc,none": 0.5523809523809524,
|
| 47 |
+
"acc_stderr,none": 0.0280613656383537
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"alias": " - xwinograd_zh",
|
| 51 |
+
"acc,none": 0.6527777777777778,
|
| 52 |
+
"acc_stderr,none": 0.021227675707409233
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T17-22-18.851434_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.25409485828229594,
|
| 21 |
+
"acc_stderr,none": 0.003673285974361475,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2454835281615303,
|
| 26 |
+
"acc_stderr,none": 0.006269554905683639,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3412698412698413,
|
| 32 |
+
"acc_stderr,none": 0.04240799327574924
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.3151515151515151,
|
| 37 |
+
"acc_stderr,none": 0.0362773057502241
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.24509803921568626,
|
| 42 |
+
"acc_stderr,none": 0.030190282453501943
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.2742616033755274,
|
| 47 |
+
"acc_stderr,none": 0.02904133351059801
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.23140495867768596,
|
| 52 |
+
"acc_stderr,none": 0.03849856098794088
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.21296296296296297,
|
| 57 |
+
"acc_stderr,none": 0.039578354719809805
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.26380368098159507,
|
| 62 |
+
"acc_stderr,none": 0.03462419931615624
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.2398843930635838,
|
| 67 |
+
"acc_stderr,none": 0.022989592543123567
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.24581005586592178,
|
| 72 |
+
"acc_stderr,none": 0.01440029642922559
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.1864951768488746,
|
| 77 |
+
"acc_stderr,none": 0.022122439772480764
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.2191358024691358,
|
| 82 |
+
"acc_stderr,none": 0.023016705640262175
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.24771838331160365,
|
| 87 |
+
"acc_stderr,none": 0.011025499291443735
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.22807017543859648,
|
| 92 |
+
"acc_stderr,none": 0.03218093795602357
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.25523012552301255,
|
| 96 |
+
"acc_stderr,none": 0.00781936823628676,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.22,
|
| 102 |
+
"acc_stderr,none": 0.04163331998932269
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.26037735849056604,
|
| 107 |
+
"acc_stderr,none": 0.02700876609070809
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.22,
|
| 117 |
+
"acc_stderr,none": 0.0416333199893227
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.32286995515695066,
|
| 122 |
+
"acc_stderr,none": 0.03138147637575498
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.2912621359223301,
|
| 127 |
+
"acc_stderr,none": 0.04498676320572924
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2863247863247863,
|
| 132 |
+
"acc_stderr,none": 0.029614323690456648
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.24,
|
| 137 |
+
"acc_stderr,none": 0.04292346959909283
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.23371647509578544,
|
| 142 |
+
"acc_stderr,none": 0.015133383278988832
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.23529411764705882,
|
| 147 |
+
"acc_stderr,none": 0.024288619466046116
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.2553191489361702,
|
| 152 |
+
"acc_stderr,none": 0.02601199293090201
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.2977941176470588,
|
| 157 |
+
"acc_stderr,none": 0.027778298701545443
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.22289156626506024,
|
| 162 |
+
"acc_stderr,none": 0.03240004825594688
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.25901852453688656,
|
| 166 |
+
"acc_stderr,none": 0.007907951917759795,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.2719298245614035,
|
| 172 |
+
"acc_stderr,none": 0.041857744240220575
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2828282828282828,
|
| 177 |
+
"acc_stderr,none": 0.032087795587867514
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.26424870466321243,
|
| 182 |
+
"acc_stderr,none": 0.03182155050916647
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.25384615384615383,
|
| 187 |
+
"acc_stderr,none": 0.022066054378726253
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2689075630252101,
|
| 192 |
+
"acc_stderr,none": 0.028801392193631276
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.24403669724770644,
|
| 197 |
+
"acc_stderr,none": 0.018415286351416406
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.2900763358778626,
|
| 202 |
+
"acc_stderr,none": 0.03980066246467765
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.26143790849673204,
|
| 207 |
+
"acc_stderr,none": 0.017776947157528027
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.22727272727272727,
|
| 212 |
+
"acc_stderr,none": 0.04013964554072774
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.24489795918367346,
|
| 217 |
+
"acc_stderr,none": 0.027529637440174923
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.2835820895522388,
|
| 222 |
+
"acc_stderr,none": 0.03187187537919797
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.23,
|
| 227 |
+
"acc_stderr,none": 0.042295258468165065
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.26102124960355216,
|
| 231 |
+
"acc_stderr,none": 0.007821062178349759,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.19,
|
| 237 |
+
"acc_stderr,none": 0.039427724440366234
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.21481481481481482,
|
| 242 |
+
"acc_stderr,none": 0.035478541985608236
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.2894736842105263,
|
| 247 |
+
"acc_stderr,none": 0.036906779861372814
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2361111111111111,
|
| 252 |
+
"acc_stderr,none": 0.03551446610810826
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.34,
|
| 257 |
+
"acc_stderr,none": 0.047609522856952365
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.28,
|
| 262 |
+
"acc_stderr,none": 0.04512608598542128
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.25,
|
| 267 |
+
"acc_stderr,none": 0.04351941398892446
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.28431372549019607,
|
| 272 |
+
"acc_stderr,none": 0.04488482852329017
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.24,
|
| 277 |
+
"acc_stderr,none": 0.042923469599092816
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.2170212765957447,
|
| 282 |
+
"acc_stderr,none": 0.02694748312149622
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.2206896551724138,
|
| 287 |
+
"acc_stderr,none": 0.03455930201924813
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2671957671957672,
|
| 292 |
+
"acc_stderr,none": 0.022789673145776575
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2806451612903226,
|
| 297 |
+
"acc_stderr,none": 0.025560604721022888
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2561576354679803,
|
| 302 |
+
"acc_stderr,none": 0.030712730070982592
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.24,
|
| 307 |
+
"acc_stderr,none": 0.04292346959909282
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.2740740740740741,
|
| 312 |
+
"acc_stderr,none": 0.027195934804085622
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.24503311258278146,
|
| 317 |
+
"acc_stderr,none": 0.03511807571804726
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.3287037037037037,
|
| 322 |
+
"acc_stderr,none": 0.03203614084670058
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.25,
|
| 327 |
+
"acc_stderr,none": 0.04109974682633932
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_11/0.4.8_2025-03-13T18-54-53.797865_lambada_openai_.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 12.642543877737143,
|
| 22 |
+
"perplexity_stderr,none": 0.3812387951513916,
|
| 23 |
+
"acc,none": 0.4814671065398797,
|
| 24 |
+
"acc_stderr,none": 0.006961190829726007
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T09-57-49.932590_lambada_openai.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"alias": "lambada_openai",
|
| 21 |
+
"perplexity,none": 12.62972464596384,
|
| 22 |
+
"perplexity_stderr,none": 0.3806139731054351,
|
| 23 |
+
"acc,none": 0.48282553852124976,
|
| 24 |
+
"acc_stderr,none": 0.006961867045185065
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-13-21.086739_hellaswag.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"alias": "hellaswag",
|
| 21 |
+
"acc,none": 0.3451503684524995,
|
| 22 |
+
"acc_stderr,none": 0.004744456628455124,
|
| 23 |
+
"acc_norm,none": 0.4197371041625174,
|
| 24 |
+
"acc_norm_stderr,none": 0.004925072159723834
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-14-54.756945_piqa.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"alias": "piqa",
|
| 21 |
+
"acc,none": 0.6735582154515778,
|
| 22 |
+
"acc_stderr,none": 0.010940467046177306,
|
| 23 |
+
"acc_norm,none": 0.6664853101196954,
|
| 24 |
+
"acc_norm_stderr,none": 0.011000139592184566
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-17-57.209289_arc_easy.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"alias": "arc_easy",
|
| 21 |
+
"acc,none": 0.5854377104377104,
|
| 22 |
+
"acc_stderr,none": 0.010108889212447767,
|
| 23 |
+
"acc_norm,none": 0.5420875420875421,
|
| 24 |
+
"acc_norm_stderr,none": 0.010223371342195895
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T10-19-33.710789_arc_challenge.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"alias": "arc_challenge",
|
| 21 |
+
"acc,none": 0.2551194539249147,
|
| 22 |
+
"acc_stderr,none": 0.012739038695202102,
|
| 23 |
+
"acc_norm,none": 0.2935153583617747,
|
| 24 |
+
"acc_norm_stderr,none": 0.013307250444941113
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-39-38.019272_xnli.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3731191432396252,
|
| 21 |
+
"acc_stderr,none": 0.0024949681237011402,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"alias": " - xnli_ar",
|
| 26 |
+
"acc,none": 0.3373493975903614,
|
| 27 |
+
"acc_stderr,none": 0.009476976849778591
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"alias": " - xnli_bg",
|
| 31 |
+
"acc,none": 0.3682730923694779,
|
| 32 |
+
"acc_stderr,none": 0.009668013178998446
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"alias": " - xnli_de",
|
| 36 |
+
"acc,none": 0.40602409638554215,
|
| 37 |
+
"acc_stderr,none": 0.00984346200738422
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"alias": " - xnli_el",
|
| 41 |
+
"acc,none": 0.3369477911646586,
|
| 42 |
+
"acc_stderr,none": 0.009474203778757713
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"alias": " - xnli_en",
|
| 46 |
+
"acc,none": 0.4674698795180723,
|
| 47 |
+
"acc_stderr,none": 0.010000839483876027
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"alias": " - xnli_es",
|
| 51 |
+
"acc,none": 0.3923694779116466,
|
| 52 |
+
"acc_stderr,none": 0.009787120838990105
|
| 53 |
+
},
|
| 54 |
+
"xnli_fr": {
|
| 55 |
+
"alias": " - xnli_fr",
|
| 56 |
+
"acc,none": 0.42208835341365464,
|
| 57 |
+
"acc_stderr,none": 0.009899652714895416
|
| 58 |
+
},
|
| 59 |
+
"xnli_hi": {
|
| 60 |
+
"alias": " - xnli_hi",
|
| 61 |
+
"acc,none": 0.351004016064257,
|
| 62 |
+
"acc_stderr,none": 0.009566753834803288
|
| 63 |
+
},
|
| 64 |
+
"xnli_ru": {
|
| 65 |
+
"alias": " - xnli_ru",
|
| 66 |
+
"acc,none": 0.42208835341365464,
|
| 67 |
+
"acc_stderr,none": 0.009899652714895424
|
| 68 |
+
},
|
| 69 |
+
"xnli_sw": {
|
| 70 |
+
"alias": " - xnli_sw",
|
| 71 |
+
"acc,none": 0.3481927710843373,
|
| 72 |
+
"acc_stderr,none": 0.009548980649153391
|
| 73 |
+
},
|
| 74 |
+
"xnli_th": {
|
| 75 |
+
"alias": " - xnli_th",
|
| 76 |
+
"acc,none": 0.35180722891566263,
|
| 77 |
+
"acc_stderr,none": 0.009571764897113621
|
| 78 |
+
},
|
| 79 |
+
"xnli_tr": {
|
| 80 |
+
"alias": " - xnli_tr",
|
| 81 |
+
"acc,none": 0.35180722891566263,
|
| 82 |
+
"acc_stderr,none": 0.009571764897113625
|
| 83 |
+
},
|
| 84 |
+
"xnli_ur": {
|
| 85 |
+
"alias": " - xnli_ur",
|
| 86 |
+
"acc,none": 0.3349397590361446,
|
| 87 |
+
"acc_stderr,none": 0.009460223484996469
|
| 88 |
+
},
|
| 89 |
+
"xnli_vi": {
|
| 90 |
+
"alias": " - xnli_vi",
|
| 91 |
+
"acc,none": 0.3674698795180723,
|
| 92 |
+
"acc_stderr,none": 0.009663601903728034
|
| 93 |
+
},
|
| 94 |
+
"xnli_zh": {
|
| 95 |
+
"alias": " - xnli_zh",
|
| 96 |
+
"acc,none": 0.3389558232931727,
|
| 97 |
+
"acc_stderr,none": 0.009487992732201522
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-56-32.162685_xstorycloze.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5243366825100776,
|
| 21 |
+
"acc_stderr,none": 0.0038648541830817408,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"alias": " - xstorycloze_ar",
|
| 26 |
+
"acc,none": 0.4824619457313038,
|
| 27 |
+
"acc_stderr,none": 0.012859207453266304
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"alias": " - xstorycloze_en",
|
| 31 |
+
"acc,none": 0.6260754467240238,
|
| 32 |
+
"acc_stderr,none": 0.012451361842944465
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"alias": " - xstorycloze_es",
|
| 36 |
+
"acc,none": 0.5367306419589676,
|
| 37 |
+
"acc_stderr,none": 0.012832359240206969
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"alias": " - xstorycloze_eu",
|
| 41 |
+
"acc,none": 0.5195234943745863,
|
| 42 |
+
"acc_stderr,none": 0.01285731253183686
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"alias": " - xstorycloze_hi",
|
| 46 |
+
"acc,none": 0.514228987425546,
|
| 47 |
+
"acc_stderr,none": 0.012861913999596129
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"alias": " - xstorycloze_id",
|
| 51 |
+
"acc,none": 0.5049636002647253,
|
| 52 |
+
"acc_stderr,none": 0.012866491277589943
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"alias": " - xstorycloze_my",
|
| 56 |
+
"acc,none": 0.5036399735274653,
|
| 57 |
+
"acc_stderr,none": 0.01286678434828923
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"alias": " - xstorycloze_ru",
|
| 61 |
+
"acc,none": 0.5122435473196558,
|
| 62 |
+
"acc_stderr,none": 0.012863267059205548
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"alias": " - xstorycloze_sw",
|
| 66 |
+
"acc,none": 0.49702183984116477,
|
| 67 |
+
"acc_stderr,none": 0.012866897066011233
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"alias": " - xstorycloze_te",
|
| 71 |
+
"acc,none": 0.5340833884844474,
|
| 72 |
+
"acc_stderr,none": 0.012837195610619434
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"alias": " - xstorycloze_zh",
|
| 76 |
+
"acc,none": 0.5367306419589676,
|
| 77 |
+
"acc_stderr,none": 0.01283235924020697
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T12-59-30.016094_xwinograd.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6439649359406608,
|
| 21 |
+
"acc_stderr,none": 0.007096653855893872,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"alias": " - xwinograd_en",
|
| 26 |
+
"acc,none": 0.7066666666666667,
|
| 27 |
+
"acc_stderr,none": 0.00944430382490117
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"alias": " - xwinograd_fr",
|
| 31 |
+
"acc,none": 0.6626506024096386,
|
| 32 |
+
"acc_stderr,none": 0.052212602620321284
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"alias": " - xwinograd_jp",
|
| 36 |
+
"acc,none": 0.5380604796663191,
|
| 37 |
+
"acc_stderr,none": 0.016107396603808045
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"alias": " - xwinograd_pt",
|
| 41 |
+
"acc,none": 0.5513307984790875,
|
| 42 |
+
"acc_stderr,none": 0.030726890349707915
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"alias": " - xwinograd_ru",
|
| 46 |
+
"acc,none": 0.5587301587301587,
|
| 47 |
+
"acc_stderr,none": 0.02802130493237513
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"alias": " - xwinograd_zh",
|
| 51 |
+
"acc,none": 0.6547619047619048,
|
| 52 |
+
"acc_stderr,none": 0.021199082505488055
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T13-49-01.062852_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2547357926221336,
|
| 21 |
+
"acc_stderr,none": 0.0036750666019376636,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2454835281615303,
|
| 26 |
+
"acc_stderr,none": 0.006267346149858851,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3412698412698413,
|
| 32 |
+
"acc_stderr,none": 0.04240799327574925
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.3151515151515151,
|
| 37 |
+
"acc_stderr,none": 0.0362773057502241
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.24509803921568626,
|
| 42 |
+
"acc_stderr,none": 0.03019028245350194
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.29535864978902954,
|
| 47 |
+
"acc_stderr,none": 0.02969633871342286
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2644628099173554,
|
| 52 |
+
"acc_stderr,none": 0.04026187527591206
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.2222222222222222,
|
| 57 |
+
"acc_stderr,none": 0.0401910747255735
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.24539877300613497,
|
| 62 |
+
"acc_stderr,none": 0.03380939813943354
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.23410404624277456,
|
| 67 |
+
"acc_stderr,none": 0.022797110278071128
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.2435754189944134,
|
| 72 |
+
"acc_stderr,none": 0.014355911964767864
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.18006430868167203,
|
| 77 |
+
"acc_stderr,none": 0.02182342285774495
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.21604938271604937,
|
| 82 |
+
"acc_stderr,none": 0.022899162918445785
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2470664928292047,
|
| 87 |
+
"acc_stderr,none": 0.011015752255279338
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.23391812865497075,
|
| 92 |
+
"acc_stderr,none": 0.03246721765117827
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.2603797875764403,
|
| 96 |
+
"acc_stderr,none": 0.007871165717920162,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.23,
|
| 102 |
+
"acc_stderr,none": 0.04229525846816505
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.2830188679245283,
|
| 107 |
+
"acc_stderr,none": 0.027724236492700904
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.2,
|
| 117 |
+
"acc_stderr,none": 0.04020151261036843
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.31390134529147984,
|
| 122 |
+
"acc_stderr,none": 0.031146796482972465
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.2912621359223301,
|
| 127 |
+
"acc_stderr,none": 0.04498676320572924
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.29914529914529914,
|
| 132 |
+
"acc_stderr,none": 0.029996951858349483
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.22,
|
| 137 |
+
"acc_stderr,none": 0.04163331998932269
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.23754789272030652,
|
| 142 |
+
"acc_stderr,none": 0.015218733046150195
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.27124183006535946,
|
| 147 |
+
"acc_stderr,none": 0.025457756696667864
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.25177304964539005,
|
| 152 |
+
"acc_stderr,none": 0.025892151156709405
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.29044117647058826,
|
| 157 |
+
"acc_stderr,none": 0.027576468622740526
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.2289156626506024,
|
| 162 |
+
"acc_stderr,none": 0.03270745277352477
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.2577185570360741,
|
| 166 |
+
"acc_stderr,none": 0.007892283265392744,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.2543859649122807,
|
| 172 |
+
"acc_stderr,none": 0.040969851398436716
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2727272727272727,
|
| 177 |
+
"acc_stderr,none": 0.03173071239071724
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.27461139896373055,
|
| 182 |
+
"acc_stderr,none": 0.03221024508041154
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2512820512820513,
|
| 187 |
+
"acc_stderr,none": 0.021992016662370564
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2605042016806723,
|
| 192 |
+
"acc_stderr,none": 0.028510251512341937
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.24403669724770644,
|
| 197 |
+
"acc_stderr,none": 0.018415286351416413
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.26717557251908397,
|
| 202 |
+
"acc_stderr,none": 0.038808483010823944
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.2696078431372549,
|
| 207 |
+
"acc_stderr,none": 0.017952449196987862
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.22727272727272727,
|
| 212 |
+
"acc_stderr,none": 0.04013964554072775
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.22448979591836735,
|
| 217 |
+
"acc_stderr,none": 0.02671143055553841
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.3034825870646766,
|
| 222 |
+
"acc_stderr,none": 0.032510068164586174
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.23,
|
| 227 |
+
"acc_stderr,none": 0.04229525846816506
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.260069774817634,
|
| 231 |
+
"acc_stderr,none": 0.0078061861482534595,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.2,
|
| 237 |
+
"acc_stderr,none": 0.04020151261036845
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.2074074074074074,
|
| 242 |
+
"acc_stderr,none": 0.03502553170678318
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.27631578947368424,
|
| 247 |
+
"acc_stderr,none": 0.03639057569952925
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.24305555555555555,
|
| 252 |
+
"acc_stderr,none": 0.03586879280080342
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.35,
|
| 257 |
+
"acc_stderr,none": 0.047937248544110196
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.29,
|
| 262 |
+
"acc_stderr,none": 0.04560480215720684
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.26,
|
| 267 |
+
"acc_stderr,none": 0.0440844002276808
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.3333333333333333,
|
| 272 |
+
"acc_stderr,none": 0.04690650298201943
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.22,
|
| 277 |
+
"acc_stderr,none": 0.04163331998932269
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.2170212765957447,
|
| 282 |
+
"acc_stderr,none": 0.02694748312149622
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.19310344827586207,
|
| 287 |
+
"acc_stderr,none": 0.032894455221274
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2566137566137566,
|
| 292 |
+
"acc_stderr,none": 0.022494510767503154
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2870967741935484,
|
| 297 |
+
"acc_stderr,none": 0.025736542745594525
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2512315270935961,
|
| 302 |
+
"acc_stderr,none": 0.030516530732694436
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.24,
|
| 307 |
+
"acc_stderr,none": 0.04292346959909283
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.27037037037037037,
|
| 312 |
+
"acc_stderr,none": 0.027080372815145658
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2582781456953642,
|
| 317 |
+
"acc_stderr,none": 0.035737053147634576
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.3194444444444444,
|
| 322 |
+
"acc_stderr,none": 0.03179876342176851
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.25,
|
| 327 |
+
"acc_stderr,none": 0.04109974682633932
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-03-10.026654_glue.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"cola": {
|
| 20 |
+
"alias": "cola",
|
| 21 |
+
"mcc,none": -0.08703457213373123,
|
| 22 |
+
"mcc_stderr,none": 0.030163539085824434
|
| 23 |
+
},
|
| 24 |
+
"mnli": {
|
| 25 |
+
"alias": "mnli",
|
| 26 |
+
"acc,none": 0.3307182883341824,
|
| 27 |
+
"acc_stderr,none": 0.0047490917271714875
|
| 28 |
+
},
|
| 29 |
+
"mnli_mismatch": {
|
| 30 |
+
"alias": "mnli_mismatch",
|
| 31 |
+
"acc,none": 0.3360455655004068,
|
| 32 |
+
"acc_stderr,none": 0.004763973908606825
|
| 33 |
+
},
|
| 34 |
+
"mrpc": {
|
| 35 |
+
"alias": "mrpc",
|
| 36 |
+
"acc,none": 0.4019607843137255,
|
| 37 |
+
"acc_stderr,none": 0.024302976642371528,
|
| 38 |
+
"f1,none": 0.34759358288770054,
|
| 39 |
+
"f1_stderr,none": 0.0313439979522004
|
| 40 |
+
},
|
| 41 |
+
"qnli": {
|
| 42 |
+
"alias": "qnli",
|
| 43 |
+
"acc,none": 0.4962474830679114,
|
| 44 |
+
"acc_stderr,none": 0.006765220016415222
|
| 45 |
+
},
|
| 46 |
+
"qqp": {
|
| 47 |
+
"alias": "qqp",
|
| 48 |
+
"acc,none": 0.5283452881523621,
|
| 49 |
+
"acc_stderr,none": 0.002482701510086862,
|
| 50 |
+
"f1,none": 0.41103252308737687,
|
| 51 |
+
"f1_stderr,none": 0.0034173131749060475
|
| 52 |
+
},
|
| 53 |
+
"rte": {
|
| 54 |
+
"alias": "rte",
|
| 55 |
+
"acc,none": 0.48736462093862815,
|
| 56 |
+
"acc_stderr,none": 0.030086851767188564
|
| 57 |
+
},
|
| 58 |
+
"sst2": {
|
| 59 |
+
"alias": "sst2",
|
| 60 |
+
"acc,none": 0.6284403669724771,
|
| 61 |
+
"acc_stderr,none": 0.016373337800737308
|
| 62 |
+
},
|
| 63 |
+
"wnli": {
|
| 64 |
+
"alias": "wnli",
|
| 65 |
+
"acc,none": 0.5774647887323944,
|
| 66 |
+
"acc_stderr,none": 0.05903984205682581
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-04-32.038053_winogrande.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"alias": "winogrande",
|
| 21 |
+
"acc,none": 0.5374901341752171,
|
| 22 |
+
"acc_stderr,none": 0.014012928183336573
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-09-54.775610_sciq.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"alias": "sciq",
|
| 21 |
+
"acc,none": 0.92,
|
| 22 |
+
"acc_stderr,none": 0.00858333697775365,
|
| 23 |
+
"acc_norm,none": 0.908,
|
| 24 |
+
"acc_norm_stderr,none": 0.00914437639315114
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T19-31-43.872563_mmlu.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2547357926221336,
|
| 21 |
+
"acc_stderr,none": 0.0036750666019376636,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"acc,none": 0.2454835281615303,
|
| 26 |
+
"acc_stderr,none": 0.006267346149858851,
|
| 27 |
+
"alias": " - humanities"
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.3412698412698413,
|
| 32 |
+
"acc_stderr,none": 0.04240799327574925
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.3151515151515151,
|
| 37 |
+
"acc_stderr,none": 0.0362773057502241
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.24509803921568626,
|
| 42 |
+
"acc_stderr,none": 0.03019028245350194
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.29535864978902954,
|
| 47 |
+
"acc_stderr,none": 0.02969633871342286
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2644628099173554,
|
| 52 |
+
"acc_stderr,none": 0.04026187527591206
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.2222222222222222,
|
| 57 |
+
"acc_stderr,none": 0.0401910747255735
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.24539877300613497,
|
| 62 |
+
"acc_stderr,none": 0.03380939813943354
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.23410404624277456,
|
| 67 |
+
"acc_stderr,none": 0.022797110278071128
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.2435754189944134,
|
| 72 |
+
"acc_stderr,none": 0.014355911964767864
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.18006430868167203,
|
| 77 |
+
"acc_stderr,none": 0.02182342285774495
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.21604938271604937,
|
| 82 |
+
"acc_stderr,none": 0.022899162918445785
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2470664928292047,
|
| 87 |
+
"acc_stderr,none": 0.011015752255279338
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.23391812865497075,
|
| 92 |
+
"acc_stderr,none": 0.03246721765117827
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"acc,none": 0.2603797875764403,
|
| 96 |
+
"acc_stderr,none": 0.007871165717920162,
|
| 97 |
+
"alias": " - other"
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.23,
|
| 102 |
+
"acc_stderr,none": 0.04229525846816505
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.2830188679245283,
|
| 107 |
+
"acc_stderr,none": 0.027724236492700904
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.2,
|
| 117 |
+
"acc_stderr,none": 0.04020151261036843
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.31390134529147984,
|
| 122 |
+
"acc_stderr,none": 0.031146796482972465
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.2912621359223301,
|
| 127 |
+
"acc_stderr,none": 0.04498676320572924
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.29914529914529914,
|
| 132 |
+
"acc_stderr,none": 0.029996951858349483
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.22,
|
| 137 |
+
"acc_stderr,none": 0.04163331998932269
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.23754789272030652,
|
| 142 |
+
"acc_stderr,none": 0.015218733046150195
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.27124183006535946,
|
| 147 |
+
"acc_stderr,none": 0.025457756696667864
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.25177304964539005,
|
| 152 |
+
"acc_stderr,none": 0.025892151156709405
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.29044117647058826,
|
| 157 |
+
"acc_stderr,none": 0.027576468622740526
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.2289156626506024,
|
| 162 |
+
"acc_stderr,none": 0.03270745277352477
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"acc,none": 0.2577185570360741,
|
| 166 |
+
"acc_stderr,none": 0.007892283265392744,
|
| 167 |
+
"alias": " - social sciences"
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.2543859649122807,
|
| 172 |
+
"acc_stderr,none": 0.040969851398436716
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2727272727272727,
|
| 177 |
+
"acc_stderr,none": 0.03173071239071724
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.27461139896373055,
|
| 182 |
+
"acc_stderr,none": 0.03221024508041154
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2512820512820513,
|
| 187 |
+
"acc_stderr,none": 0.021992016662370564
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2605042016806723,
|
| 192 |
+
"acc_stderr,none": 0.028510251512341937
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.24403669724770644,
|
| 197 |
+
"acc_stderr,none": 0.018415286351416413
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.26717557251908397,
|
| 202 |
+
"acc_stderr,none": 0.038808483010823944
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.2696078431372549,
|
| 207 |
+
"acc_stderr,none": 0.017952449196987862
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.22727272727272727,
|
| 212 |
+
"acc_stderr,none": 0.04013964554072775
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.22448979591836735,
|
| 217 |
+
"acc_stderr,none": 0.02671143055553841
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.3034825870646766,
|
| 222 |
+
"acc_stderr,none": 0.032510068164586174
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.23,
|
| 227 |
+
"acc_stderr,none": 0.04229525846816506
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"acc,none": 0.260069774817634,
|
| 231 |
+
"acc_stderr,none": 0.0078061861482534595,
|
| 232 |
+
"alias": " - stem"
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.2,
|
| 237 |
+
"acc_stderr,none": 0.04020151261036845
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.2074074074074074,
|
| 242 |
+
"acc_stderr,none": 0.03502553170678318
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.27631578947368424,
|
| 247 |
+
"acc_stderr,none": 0.03639057569952925
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.24305555555555555,
|
| 252 |
+
"acc_stderr,none": 0.03586879280080342
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.35,
|
| 257 |
+
"acc_stderr,none": 0.047937248544110196
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.29,
|
| 262 |
+
"acc_stderr,none": 0.04560480215720684
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.26,
|
| 267 |
+
"acc_stderr,none": 0.0440844002276808
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.3333333333333333,
|
| 272 |
+
"acc_stderr,none": 0.04690650298201943
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.22,
|
| 277 |
+
"acc_stderr,none": 0.04163331998932269
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.2170212765957447,
|
| 282 |
+
"acc_stderr,none": 0.02694748312149622
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.19310344827586207,
|
| 287 |
+
"acc_stderr,none": 0.032894455221274
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2566137566137566,
|
| 292 |
+
"acc_stderr,none": 0.022494510767503154
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.2870967741935484,
|
| 297 |
+
"acc_stderr,none": 0.025736542745594525
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2512315270935961,
|
| 302 |
+
"acc_stderr,none": 0.030516530732694436
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.24,
|
| 307 |
+
"acc_stderr,none": 0.04292346959909283
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.27037037037037037,
|
| 312 |
+
"acc_stderr,none": 0.027080372815145658
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.2582781456953642,
|
| 317 |
+
"acc_stderr,none": 0.035737053147634576
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.3194444444444444,
|
| 322 |
+
"acc_stderr,none": 0.03179876342176851
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.25,
|
| 327 |
+
"acc_stderr,none": 0.04109974682633932
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-07-36.031022_lambada_multilingual.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai_mt_de": {
|
| 20 |
+
"alias": "lambada_openai_mt_de",
|
| 21 |
+
"perplexity,none": 267.28408723088717,
|
| 22 |
+
"perplexity_stderr,none": 16.35740283931253,
|
| 23 |
+
"acc,none": 0.20958664855424025,
|
| 24 |
+
"acc_stderr,none": 0.005670495539426846
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_en": {
|
| 27 |
+
"alias": "lambada_openai_mt_en",
|
| 28 |
+
"perplexity,none": 21.37572561784123,
|
| 29 |
+
"perplexity_stderr,none": 0.6808300296181946,
|
| 30 |
+
"acc,none": 0.38676499126722297,
|
| 31 |
+
"acc_stderr,none": 0.006784988579985175
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_es": {
|
| 34 |
+
"alias": "lambada_openai_mt_es",
|
| 35 |
+
"perplexity,none": 370.8411076274339,
|
| 36 |
+
"perplexity_stderr,none": 21.595221630946366,
|
| 37 |
+
"acc,none": 0.21424412963322337,
|
| 38 |
+
"acc_stderr,none": 0.005716238694447705
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_fr": {
|
| 41 |
+
"alias": "lambada_openai_mt_fr",
|
| 42 |
+
"perplexity,none": 196.3477203013756,
|
| 43 |
+
"perplexity_stderr,none": 11.305248298408568,
|
| 44 |
+
"acc,none": 0.2450999417814865,
|
| 45 |
+
"acc_stderr,none": 0.005992780988422183
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_it": {
|
| 48 |
+
"alias": "lambada_openai_mt_it",
|
| 49 |
+
"perplexity,none": 323.19218684985276,
|
| 50 |
+
"perplexity_stderr,none": 20.29110209885659,
|
| 51 |
+
"acc,none": 0.2109450805356103,
|
| 52 |
+
"acc_stderr,none": 0.005683951840704777
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-24-29.617348_pawsx.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.46485714285714286,
|
| 21 |
+
"acc_stderr,none": 0.004213340523157351,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"alias": " - paws_de",
|
| 26 |
+
"acc,none": 0.4565,
|
| 27 |
+
"acc_stderr,none": 0.011140733053371418
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"alias": " - paws_en",
|
| 31 |
+
"acc,none": 0.48,
|
| 32 |
+
"acc_stderr,none": 0.011174185930778313
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"alias": " - paws_es",
|
| 36 |
+
"acc,none": 0.503,
|
| 37 |
+
"acc_stderr,none": 0.011182934722804556
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"alias": " - paws_fr",
|
| 41 |
+
"acc,none": 0.467,
|
| 42 |
+
"acc_stderr,none": 0.011158752568250661
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"alias": " - paws_ja",
|
| 46 |
+
"acc,none": 0.448,
|
| 47 |
+
"acc_stderr,none": 0.01112249319745629
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"alias": " - paws_ko",
|
| 51 |
+
"acc,none": 0.4515,
|
| 52 |
+
"acc_stderr,none": 0.011130400617630763
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"alias": " - paws_zh",
|
| 56 |
+
"acc,none": 0.448,
|
| 57 |
+
"acc_stderr,none": 0.01112249319745629
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_261/0.4.8_2025-03-15T20-29-15.015293_xcopa.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
261
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5294545454545454,
|
| 21 |
+
"acc_stderr,none": 0.00673238666638665,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"alias": " - xcopa_et",
|
| 26 |
+
"acc,none": 0.496,
|
| 27 |
+
"acc_stderr,none": 0.02238235778196214
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"alias": " - xcopa_ht",
|
| 31 |
+
"acc,none": 0.528,
|
| 32 |
+
"acc_stderr,none": 0.02234794983266809
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"alias": " - xcopa_id",
|
| 36 |
+
"acc,none": 0.546,
|
| 37 |
+
"acc_stderr,none": 0.02228814759117695
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"alias": " - xcopa_it",
|
| 41 |
+
"acc,none": 0.526,
|
| 42 |
+
"acc_stderr,none": 0.02235279165091416
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"alias": " - xcopa_qu",
|
| 46 |
+
"acc,none": 0.5,
|
| 47 |
+
"acc_stderr,none": 0.022383074051792257
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"alias": " - xcopa_sw",
|
| 51 |
+
"acc,none": 0.528,
|
| 52 |
+
"acc_stderr,none": 0.022347949832668093
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"alias": " - xcopa_ta",
|
| 56 |
+
"acc,none": 0.532,
|
| 57 |
+
"acc_stderr,none": 0.0223371864790443
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"alias": " - xcopa_th",
|
| 61 |
+
"acc,none": 0.566,
|
| 62 |
+
"acc_stderr,none": 0.02218721580302901
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"alias": " - xcopa_tr",
|
| 66 |
+
"acc,none": 0.526,
|
| 67 |
+
"acc_stderr,none": 0.022352791650914167
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"alias": " - xcopa_vi",
|
| 71 |
+
"acc,none": 0.538,
|
| 72 |
+
"acc_stderr,none": 0.022318338119870527
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"alias": " - xcopa_zh",
|
| 76 |
+
"acc,none": 0.538,
|
| 77 |
+
"acc_stderr,none": 0.02231833811987053
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096/pad_none/0.4.8_2025-03-14T11-51-55.842699_lambada_openai.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/RWKV-x070-World-0.1B-v2.8-20241210-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.8",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": null,
|
| 11 |
+
"pad_token_ids": [],
|
| 12 |
+
"stop_token_ids": [
|
| 13 |
+
11,
|
| 14 |
+
261
|
| 15 |
+
],
|
| 16 |
+
"results": {
|
| 17 |
+
"lambada_openai": {
|
| 18 |
+
"alias": "lambada_openai",
|
| 19 |
+
"perplexity,none": 14.00703003680488,
|
| 20 |
+
"perplexity_stderr,none": 0.4392984349795351,
|
| 21 |
+
"acc,none": 0.4737046380749078,
|
| 22 |
+
"acc_stderr,none": 0.006956337791536673
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|