Upload folder using huggingface_hub
Browse files- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/README.md +0 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_challenge_2025-03-10T17-01-44.603102.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_challenge_2025-03-11T22-06-12.454703.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_easy_2025-03-10T17-00-09.413507.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_easy_2025-03-11T22-04-44.986766.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/glue_2025-03-10T18-04-42.259056.json +78 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/glue_2025-03-12T10-56-43.655674.json +78 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/hellaswag_2025-03-10T16-52-48.967622.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/hellaswag_2025-03-11T22-00-37.986152.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_multilingual_2025-03-10T18-36-31.528678.json +62 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_multilingual_2025-03-12T11-43-36.700098.json +62 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T11-51-01.125594.json +16 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T11-56-47.764011.json +16 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T12-29-06.749140.json +16 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T13-05-32.078254.json +16 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T13-34-48.078208.json +16 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-11T21-36-27.701706.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/mmlu_2025-03-10T18-27-09.707005.json +330 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/mmlu_2025-03-12T11-34-06.812808.json +330 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/mmlu_2025-03-12T18-34-12.292504.json +330 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/pawsx_2025-03-10T18-52-23.966216.json +60 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/pawsx_2025-03-12T12-00-24.813841.json +60 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/piqa_2025-03-10T16-57-25.214478.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/piqa_2025-03-11T22-02-02.648211.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/rwkv7-g1-0.1b-20250307-ctx4096_blimp.json +360 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/sciq_2025-03-10T18-07-00.999754.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/sciq_2025-03-12T11-12-57.165000.json +27 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/winogrande_2025-03-10T18-05-31.048143.json +25 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/winogrande_2025-03-12T11-11-17.816730.json +25 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xcopa_2025-03-10T18-56-23.087378.json +80 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xcopa_2025-03-12T12-05-27.136379.json +80 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xnli_2025-03-10T22-52-22.630412.json +105 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xnli_2025-03-12T16-39-24.982478.json +105 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xstorycloze_2025-03-10T23-08-53.855464.json +80 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xstorycloze_2025-03-12T17-01-39.345687.json +80 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xwinograd_2025-03-10T23-11-45.464852.json +55 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xwinograd_2025-03-11T21-17-31.358555.json +55 -0
- lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xwinograd_2025-03-12T17-04-51.338405.json +55 -0
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/README.md
ADDED
|
File without changes
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_challenge_2025-03-10T17-01-44.603102.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"acc,none": 0.25,
|
| 21 |
+
"acc_stderr,none": 0.012653835621466646,
|
| 22 |
+
"acc_norm,none": 0.2815699658703072,
|
| 23 |
+
"acc_norm_stderr,none": 0.013143376735009022,
|
| 24 |
+
"alias": "arc_challenge"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_challenge_2025-03-11T22-06-12.454703.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_challenge"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_challenge": {
|
| 20 |
+
"acc,none": 0.2636518771331058,
|
| 21 |
+
"acc_stderr,none": 0.012875929151297044,
|
| 22 |
+
"acc_norm,none": 0.29436860068259385,
|
| 23 |
+
"acc_norm_stderr,none": 0.013318528460539422,
|
| 24 |
+
"alias": "arc_challenge"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_easy_2025-03-10T17-00-09.413507.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"acc,none": 0.5597643097643098,
|
| 21 |
+
"acc_stderr,none": 0.010186228624515655,
|
| 22 |
+
"acc_norm,none": 0.47895622895622897,
|
| 23 |
+
"acc_norm_stderr,none": 0.010250692602022576,
|
| 24 |
+
"alias": "arc_easy"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/arc_easy_2025-03-11T22-04-44.986766.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"arc_easy"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"arc_easy": {
|
| 20 |
+
"acc,none": 0.5841750841750841,
|
| 21 |
+
"acc_stderr,none": 0.010113348244647876,
|
| 22 |
+
"acc_norm,none": 0.5294612794612794,
|
| 23 |
+
"acc_norm_stderr,none": 0.010241957728409676,
|
| 24 |
+
"alias": "arc_easy"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/glue_2025-03-10T18-04-42.259056.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"glue": {
|
| 20 |
+
"acc,none": 0.37153108623153885,
|
| 21 |
+
"acc_stderr,none": 0.001850330650439922,
|
| 22 |
+
"f1,none": 0.535035415960775,
|
| 23 |
+
"f1_stderr,none": 0.002562079856834694,
|
| 24 |
+
"mcc,none": 0.03494082007962207,
|
| 25 |
+
"mcc_stderr,none": 0.031408601572730226,
|
| 26 |
+
"alias": "glue"
|
| 27 |
+
},
|
| 28 |
+
"cola": {
|
| 29 |
+
"mcc,none": 0.03494082007962207,
|
| 30 |
+
"mcc_stderr,none": 0.031408601572730226,
|
| 31 |
+
"alias": " - cola"
|
| 32 |
+
},
|
| 33 |
+
"mnli": {
|
| 34 |
+
"acc,none": 0.3182883341823739,
|
| 35 |
+
"acc_stderr,none": 0.004702054913568257,
|
| 36 |
+
"alias": " - mnli"
|
| 37 |
+
},
|
| 38 |
+
"mnli_mismatch": {
|
| 39 |
+
"acc,none": 0.31814483319772174,
|
| 40 |
+
"acc_stderr,none": 0.004697422861392529,
|
| 41 |
+
"alias": " - mnli_mismatch"
|
| 42 |
+
},
|
| 43 |
+
"mrpc": {
|
| 44 |
+
"acc,none": 0.4117647058823529,
|
| 45 |
+
"acc_stderr,none": 0.024395116363488303,
|
| 46 |
+
"f1,none": 0.38461538461538464,
|
| 47 |
+
"f1_stderr,none": 0.031397994005677686,
|
| 48 |
+
"alias": " - mrpc"
|
| 49 |
+
},
|
| 50 |
+
"qnli": {
|
| 51 |
+
"acc,none": 0.5167490389895661,
|
| 52 |
+
"acc_stderr,none": 0.006761613680941321,
|
| 53 |
+
"alias": " - qnli"
|
| 54 |
+
},
|
| 55 |
+
"qqp": {
|
| 56 |
+
"acc,none": 0.3706158792975513,
|
| 57 |
+
"acc_stderr,none": 0.002402002209823611,
|
| 58 |
+
"f1,none": 0.5365533821440279,
|
| 59 |
+
"f1_stderr,none": 0.002568480759283663,
|
| 60 |
+
"alias": " - qqp"
|
| 61 |
+
},
|
| 62 |
+
"rte": {
|
| 63 |
+
"acc,none": 0.49097472924187724,
|
| 64 |
+
"acc_stderr,none": 0.030091559826331334,
|
| 65 |
+
"alias": " - rte"
|
| 66 |
+
},
|
| 67 |
+
"sst2": {
|
| 68 |
+
"acc,none": 0.6387614678899083,
|
| 69 |
+
"acc_stderr,none": 0.01627636093868883,
|
| 70 |
+
"alias": " - sst2"
|
| 71 |
+
},
|
| 72 |
+
"wnli": {
|
| 73 |
+
"acc,none": 0.49295774647887325,
|
| 74 |
+
"acc_stderr,none": 0.05975550263548289,
|
| 75 |
+
"alias": " - wnli"
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/glue_2025-03-12T10-56-43.655674.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"glue"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"glue": {
|
| 20 |
+
"f1,none": 0.5346106647322794,
|
| 21 |
+
"f1_stderr,none": 0.002567335583973214,
|
| 22 |
+
"acc,none": 0.37517865650309673,
|
| 23 |
+
"acc_stderr,none": 0.0018509720446141336,
|
| 24 |
+
"mcc,none": 0.0024285045843052164,
|
| 25 |
+
"mcc_stderr,none": 0.030955008759027437,
|
| 26 |
+
"alias": "glue"
|
| 27 |
+
},
|
| 28 |
+
"cola": {
|
| 29 |
+
"mcc,none": 0.0024285045843052164,
|
| 30 |
+
"mcc_stderr,none": 0.03095500875902744,
|
| 31 |
+
"alias": " - cola"
|
| 32 |
+
},
|
| 33 |
+
"mnli": {
|
| 34 |
+
"acc,none": 0.31798267957208354,
|
| 35 |
+
"acc_stderr,none": 0.004700850152962885,
|
| 36 |
+
"alias": " - mnli"
|
| 37 |
+
},
|
| 38 |
+
"mnli_mismatch": {
|
| 39 |
+
"acc,none": 0.31814483319772174,
|
| 40 |
+
"acc_stderr,none": 0.004697422861392529,
|
| 41 |
+
"alias": " - mnli_mismatch"
|
| 42 |
+
},
|
| 43 |
+
"mrpc": {
|
| 44 |
+
"acc,none": 0.40441176470588236,
|
| 45 |
+
"acc_stderr,none": 0.024326954407515665,
|
| 46 |
+
"f1,none": 0.3816793893129771,
|
| 47 |
+
"f1_stderr,none": 0.03140466535806043,
|
| 48 |
+
"alias": " - mrpc"
|
| 49 |
+
},
|
| 50 |
+
"qnli": {
|
| 51 |
+
"acc,none": 0.518762584660443,
|
| 52 |
+
"acc_stderr,none": 0.006760645556775843,
|
| 53 |
+
"alias": " - qnli"
|
| 54 |
+
},
|
| 55 |
+
"qqp": {
|
| 56 |
+
"acc,none": 0.3746970071728914,
|
| 57 |
+
"acc_stderr,none": 0.0024073479491393064,
|
| 58 |
+
"f1,none": 0.5361539731757885,
|
| 59 |
+
"f1_stderr,none": 0.0025738212531611635,
|
| 60 |
+
"alias": " - qqp"
|
| 61 |
+
},
|
| 62 |
+
"rte": {
|
| 63 |
+
"acc,none": 0.4620938628158845,
|
| 64 |
+
"acc_stderr,none": 0.030009848912529113,
|
| 65 |
+
"alias": " - rte"
|
| 66 |
+
},
|
| 67 |
+
"sst2": {
|
| 68 |
+
"acc,none": 0.7282110091743119,
|
| 69 |
+
"acc_stderr,none": 0.01507424165684193,
|
| 70 |
+
"alias": " - sst2"
|
| 71 |
+
},
|
| 72 |
+
"wnli": {
|
| 73 |
+
"acc,none": 0.5633802816901409,
|
| 74 |
+
"acc_stderr,none": 0.0592793555841297,
|
| 75 |
+
"alias": " - wnli"
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/hellaswag_2025-03-10T16-52-48.967622.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"acc,none": 0.34793865763792076,
|
| 21 |
+
"acc_stderr,none": 0.0047534298066454345,
|
| 22 |
+
"acc_norm,none": 0.4287990440151364,
|
| 23 |
+
"acc_norm_stderr,none": 0.004938930143234452,
|
| 24 |
+
"alias": "hellaswag"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/hellaswag_2025-03-11T22-00-37.986152.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"hellaswag"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"hellaswag": {
|
| 20 |
+
"acc,none": 0.34793865763792076,
|
| 21 |
+
"acc_stderr,none": 0.004753429806645434,
|
| 22 |
+
"acc_norm,none": 0.4279028082055367,
|
| 23 |
+
"acc_norm_stderr,none": 0.004937635112830291,
|
| 24 |
+
"alias": "hellaswag"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_multilingual_2025-03-10T18-36-31.528678.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_multilingual": {
|
| 20 |
+
"acc,none": 0.3149233456239084,
|
| 21 |
+
"acc_stderr,none": 0.0028367730218709125,
|
| 22 |
+
"perplexity,none": 172.520659113721,
|
| 23 |
+
"perplexity_stderr,none": 5.367739924435885,
|
| 24 |
+
"alias": "lambada_multilingual"
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_de": {
|
| 27 |
+
"perplexity,none": 217.9862646485565,
|
| 28 |
+
"perplexity_stderr,none": 13.330594833675963,
|
| 29 |
+
"acc,none": 0.24180089268387348,
|
| 30 |
+
"acc_stderr,none": 0.005965305048434238,
|
| 31 |
+
"alias": " - lambada_openai_mt_de"
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_en": {
|
| 34 |
+
"perplexity,none": 12.364936956333898,
|
| 35 |
+
"perplexity_stderr,none": 0.3764505210612126,
|
| 36 |
+
"acc,none": 0.49117019212109453,
|
| 37 |
+
"acc_stderr,none": 0.006964891360529504,
|
| 38 |
+
"alias": " - lambada_openai_mt_en"
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_es": {
|
| 41 |
+
"perplexity,none": 270.97982131432593,
|
| 42 |
+
"perplexity_stderr,none": 16.264671005675716,
|
| 43 |
+
"acc,none": 0.24936929943722103,
|
| 44 |
+
"acc_stderr,none": 0.006027631959331146,
|
| 45 |
+
"alias": " - lambada_openai_mt_es"
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_fr": {
|
| 48 |
+
"perplexity,none": 131.45772697351666,
|
| 49 |
+
"perplexity_stderr,none": 7.755992287298453,
|
| 50 |
+
"acc,none": 0.31651465165922765,
|
| 51 |
+
"acc_stderr,none": 0.006479978824925184,
|
| 52 |
+
"alias": " - lambada_openai_mt_fr"
|
| 53 |
+
},
|
| 54 |
+
"lambada_openai_mt_it": {
|
| 55 |
+
"perplexity,none": 229.81454567587204,
|
| 56 |
+
"perplexity_stderr,none": 14.757180777900542,
|
| 57 |
+
"acc,none": 0.27576169221812535,
|
| 58 |
+
"acc_stderr,none": 0.0062261529732960475,
|
| 59 |
+
"alias": " - lambada_openai_mt_it"
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_multilingual_2025-03-12T11-43-36.700098.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_multilingual"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_multilingual": {
|
| 20 |
+
"acc,none": 0.3162817776052785,
|
| 21 |
+
"acc_stderr,none": 0.0028416091446470397,
|
| 22 |
+
"perplexity,none": 170.41860290665036,
|
| 23 |
+
"perplexity_stderr,none": 5.292397572601908,
|
| 24 |
+
"alias": "lambada_multilingual"
|
| 25 |
+
},
|
| 26 |
+
"lambada_openai_mt_de": {
|
| 27 |
+
"perplexity,none": 213.62853047491745,
|
| 28 |
+
"perplexity_stderr,none": 13.048252432195138,
|
| 29 |
+
"acc,none": 0.246070250339608,
|
| 30 |
+
"acc_stderr,none": 0.006000771208158281,
|
| 31 |
+
"alias": " - lambada_openai_mt_de"
|
| 32 |
+
},
|
| 33 |
+
"lambada_openai_mt_en": {
|
| 34 |
+
"perplexity,none": 12.595873481361684,
|
| 35 |
+
"perplexity_stderr,none": 0.38225300601901746,
|
| 36 |
+
"acc,none": 0.48981176013972444,
|
| 37 |
+
"acc_stderr,none": 0.006964531366864929,
|
| 38 |
+
"alias": " - lambada_openai_mt_en"
|
| 39 |
+
},
|
| 40 |
+
"lambada_openai_mt_es": {
|
| 41 |
+
"perplexity,none": 270.8065908859296,
|
| 42 |
+
"perplexity_stderr,none": 16.220288193051754,
|
| 43 |
+
"acc,none": 0.24665243547448087,
|
| 44 |
+
"acc_stderr,none": 0.00600554563121515,
|
| 45 |
+
"alias": " - lambada_openai_mt_es"
|
| 46 |
+
},
|
| 47 |
+
"lambada_openai_mt_fr": {
|
| 48 |
+
"perplexity,none": 129.6995246749841,
|
| 49 |
+
"perplexity_stderr,none": 7.629690056718077,
|
| 50 |
+
"acc,none": 0.3188433921987192,
|
| 51 |
+
"acc_stderr,none": 0.006492684061449839,
|
| 52 |
+
"alias": " - lambada_openai_mt_fr"
|
| 53 |
+
},
|
| 54 |
+
"lambada_openai_mt_it": {
|
| 55 |
+
"perplexity,none": 225.36249501605897,
|
| 56 |
+
"perplexity_stderr,none": 14.440355720685359,
|
| 57 |
+
"acc,none": 0.2800310498738599,
|
| 58 |
+
"acc_stderr,none": 0.006255644360929012,
|
| 59 |
+
"alias": " - lambada_openai_mt_it"
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T11-51-01.125594.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"results": {
|
| 8 |
+
"lambada_openai": {
|
| 9 |
+
"perplexity,none": 12.595873463983008,
|
| 10 |
+
"perplexity_stderr,none": 0.38225300476755747,
|
| 11 |
+
"acc,none": 0.48981176013972444,
|
| 12 |
+
"acc_stderr,none": 0.006964531366864929,
|
| 13 |
+
"alias": "lambada_openai"
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T11-56-47.764011.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"results": {
|
| 8 |
+
"lambada_openai": {
|
| 9 |
+
"perplexity,none": 12.595873481361684,
|
| 10 |
+
"perplexity_stderr,none": 0.38225300601901746,
|
| 11 |
+
"acc,none": 0.48981176013972444,
|
| 12 |
+
"acc_stderr,none": 0.006964531366864929,
|
| 13 |
+
"alias": "lambada_openai"
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T12-29-06.749140.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"results": {
|
| 8 |
+
"lambada_openai": {
|
| 9 |
+
"perplexity,none": 12.595873463983008,
|
| 10 |
+
"perplexity_stderr,none": 0.38225300476755747,
|
| 11 |
+
"acc,none": 0.48981176013972444,
|
| 12 |
+
"acc_stderr,none": 0.006964531366864929,
|
| 13 |
+
"alias": "lambada_openai"
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T13-05-32.078254.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"results": {
|
| 8 |
+
"lambada_openai": {
|
| 9 |
+
"perplexity,none": 12.595873481361684,
|
| 10 |
+
"perplexity_stderr,none": 0.38225300601901746,
|
| 11 |
+
"acc,none": 0.48981176013972444,
|
| 12 |
+
"acc_stderr,none": 0.006964531366864929,
|
| 13 |
+
"alias": "lambada_openai"
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-10T13-34-48.078208.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"results": {
|
| 8 |
+
"lambada_openai": {
|
| 9 |
+
"perplexity,none": 12.364936961529702,
|
| 10 |
+
"perplexity_stderr,none": 0.376450521079655,
|
| 11 |
+
"acc,none": 0.49117019212109453,
|
| 12 |
+
"acc_stderr,none": 0.006964891360529504,
|
| 13 |
+
"alias": "lambada_openai"
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/lambada_openai_2025-03-11T21-36-27.701706.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"lambada_openai"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"lambada_openai": {
|
| 20 |
+
"perplexity,none": 12.595873481361684,
|
| 21 |
+
"perplexity_stderr,none": 0.38225300601901746,
|
| 22 |
+
"acc,none": 0.48981176013972444,
|
| 23 |
+
"acc_stderr,none": 0.006964531366864929,
|
| 24 |
+
"alias": "lambada_openai"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/mmlu_2025-03-10T18-27-09.707005.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.2506053268765133,
|
| 21 |
+
"acc_stderr,none": 0.0036568725365202018,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"alias": " - humanities",
|
| 26 |
+
"acc,none": 0.2516471838469713,
|
| 27 |
+
"acc_stderr,none": 0.006330269741621689
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.25396825396825395,
|
| 32 |
+
"acc_stderr,none": 0.03893259610604673
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.28484848484848485,
|
| 37 |
+
"acc_stderr,none": 0.03524390844511782
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.25980392156862747,
|
| 42 |
+
"acc_stderr,none": 0.030778554678693268
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.2616033755274262,
|
| 47 |
+
"acc_stderr,none": 0.028609516716994927
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.256198347107438,
|
| 52 |
+
"acc_stderr,none": 0.03984979653302871
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.26851851851851855,
|
| 57 |
+
"acc_stderr,none": 0.04284467968052191
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.2392638036809816,
|
| 62 |
+
"acc_stderr,none": 0.0335195387952127
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.2514450867052023,
|
| 67 |
+
"acc_stderr,none": 0.02335736578587403
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.264804469273743,
|
| 72 |
+
"acc_stderr,none": 0.014756906483260666
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.20257234726688103,
|
| 77 |
+
"acc_stderr,none": 0.022827317491059672
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.23148148148148148,
|
| 82 |
+
"acc_stderr,none": 0.023468429832451166
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.2542372881355932,
|
| 87 |
+
"acc_stderr,none": 0.011121129007840671
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.22807017543859648,
|
| 92 |
+
"acc_stderr,none": 0.032180937956023566
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"alias": " - other",
|
| 96 |
+
"acc,none": 0.26359832635983266,
|
| 97 |
+
"acc_stderr,none": 0.007894767937155992
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.23,
|
| 102 |
+
"acc_stderr,none": 0.04229525846816506
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.22641509433962265,
|
| 107 |
+
"acc_stderr,none": 0.025757559893106723
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24277456647398843,
|
| 112 |
+
"acc_stderr,none": 0.0326926380614177
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.28,
|
| 117 |
+
"acc_stderr,none": 0.04512608598542127
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.3273542600896861,
|
| 122 |
+
"acc_stderr,none": 0.03149384670994131
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.2912621359223301,
|
| 127 |
+
"acc_stderr,none": 0.044986763205729224
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.27350427350427353,
|
| 132 |
+
"acc_stderr,none": 0.029202540153431166
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.3,
|
| 137 |
+
"acc_stderr,none": 0.046056618647183814
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.27458492975734355,
|
| 142 |
+
"acc_stderr,none": 0.015959829933084032
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.2875816993464052,
|
| 147 |
+
"acc_stderr,none": 0.02591780611714716
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.25177304964539005,
|
| 152 |
+
"acc_stderr,none": 0.0258921511567094
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.1801470588235294,
|
| 157 |
+
"acc_stderr,none": 0.02334516361654484
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.27710843373493976,
|
| 162 |
+
"acc_stderr,none": 0.034843315926805875
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"alias": " - social_sciences",
|
| 166 |
+
"acc,none": 0.24406889827754305,
|
| 167 |
+
"acc_stderr,none": 0.007741968672346153
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.21929824561403508,
|
| 172 |
+
"acc_stderr,none": 0.03892431106518752
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.25252525252525254,
|
| 177 |
+
"acc_stderr,none": 0.030954055470365907
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.24870466321243523,
|
| 182 |
+
"acc_stderr,none": 0.03119584087770029
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2230769230769231,
|
| 187 |
+
"acc_stderr,none": 0.02110773012724399
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.27310924369747897,
|
| 192 |
+
"acc_stderr,none": 0.028942004040998164
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.22752293577981653,
|
| 197 |
+
"acc_stderr,none": 0.0179744635787765
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.19083969465648856,
|
| 202 |
+
"acc_stderr,none": 0.03446513350752598
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.28104575163398693,
|
| 207 |
+
"acc_stderr,none": 0.018185218954318075
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.2909090909090909,
|
| 212 |
+
"acc_stderr,none": 0.04350271442923243
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.21224489795918366,
|
| 217 |
+
"acc_stderr,none": 0.026176967197866767
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.24875621890547264,
|
| 222 |
+
"acc_stderr,none": 0.030567675938916718
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.21,
|
| 227 |
+
"acc_stderr,none": 0.040936018074033256
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"alias": " - stem",
|
| 231 |
+
"acc,none": 0.24262607040913417,
|
| 232 |
+
"acc_stderr,none": 0.007641847233625673
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.25,
|
| 237 |
+
"acc_stderr,none": 0.04351941398892446
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.2074074074074074,
|
| 242 |
+
"acc_stderr,none": 0.03502553170678317
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.21052631578947367,
|
| 247 |
+
"acc_stderr,none": 0.03317672787533157
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2986111111111111,
|
| 252 |
+
"acc_stderr,none": 0.03827052357950756
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.2,
|
| 257 |
+
"acc_stderr,none": 0.040201512610368445
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.25,
|
| 262 |
+
"acc_stderr,none": 0.04351941398892446
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.27,
|
| 267 |
+
"acc_stderr,none": 0.044619604333847394
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.23529411764705882,
|
| 272 |
+
"acc_stderr,none": 0.04220773659171451
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.26,
|
| 277 |
+
"acc_stderr,none": 0.04408440022768079
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.2297872340425532,
|
| 282 |
+
"acc_stderr,none": 0.02750175294441242
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.25517241379310346,
|
| 287 |
+
"acc_stderr,none": 0.03632984052707841
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2671957671957672,
|
| 292 |
+
"acc_stderr,none": 0.02278967314577656
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.24193548387096775,
|
| 297 |
+
"acc_stderr,none": 0.024362599693031093
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2315270935960591,
|
| 302 |
+
"acc_stderr,none": 0.029678333141444455
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.26,
|
| 307 |
+
"acc_stderr,none": 0.04408440022768078
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.24074074074074073,
|
| 312 |
+
"acc_stderr,none": 0.026067159222275798
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.23841059602649006,
|
| 317 |
+
"acc_stderr,none": 0.0347918557259966
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.18981481481481483,
|
| 322 |
+
"acc_stderr,none": 0.026744714834691933
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.29464285714285715,
|
| 327 |
+
"acc_stderr,none": 0.04327040932578728
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/mmlu_2025-03-12T11-34-06.812808.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.25231448511608034,
|
| 21 |
+
"acc_stderr,none": 0.003663630582486529,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"alias": " - humanities",
|
| 26 |
+
"acc,none": 0.24930924548352817,
|
| 27 |
+
"acc_stderr,none": 0.006310523103517653
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.2222222222222222,
|
| 32 |
+
"acc_stderr,none": 0.037184890068181146
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.2727272727272727,
|
| 37 |
+
"acc_stderr,none": 0.03477691162163659
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.2647058823529412,
|
| 42 |
+
"acc_stderr,none": 0.03096451792692341
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.26582278481012656,
|
| 47 |
+
"acc_stderr,none": 0.028756799629658342
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2644628099173554,
|
| 52 |
+
"acc_stderr,none": 0.04026187527591206
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.2962962962962963,
|
| 57 |
+
"acc_stderr,none": 0.04414343666854933
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.26380368098159507,
|
| 62 |
+
"acc_stderr,none": 0.03462419931615624
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.21676300578034682,
|
| 67 |
+
"acc_stderr,none": 0.022183477668412856
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.25139664804469275,
|
| 72 |
+
"acc_stderr,none": 0.014508979453553979
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.21864951768488747,
|
| 77 |
+
"acc_stderr,none": 0.023475581417861102
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.23765432098765432,
|
| 82 |
+
"acc_stderr,none": 0.023683591837008557
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.25554106910039115,
|
| 87 |
+
"acc_stderr,none": 0.011139857833598511
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.22807017543859648,
|
| 92 |
+
"acc_stderr,none": 0.03218093795602357
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"alias": " - other",
|
| 96 |
+
"acc,none": 0.26263276472481495,
|
| 97 |
+
"acc_stderr,none": 0.007885730301877262
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.27,
|
| 102 |
+
"acc_stderr,none": 0.044619604333847394
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.2490566037735849,
|
| 107 |
+
"acc_stderr,none": 0.026616482980501704
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.24855491329479767,
|
| 112 |
+
"acc_stderr,none": 0.03295304696818318
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.25,
|
| 117 |
+
"acc_stderr,none": 0.04351941398892446
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.34977578475336324,
|
| 122 |
+
"acc_stderr,none": 0.03200736719484503
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.3106796116504854,
|
| 127 |
+
"acc_stderr,none": 0.045821241601615506
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.27350427350427353,
|
| 132 |
+
"acc_stderr,none": 0.029202540153431166
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.25,
|
| 137 |
+
"acc_stderr,none": 0.04351941398892446
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.27330779054916987,
|
| 142 |
+
"acc_stderr,none": 0.01593668106262856
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.25163398692810457,
|
| 147 |
+
"acc_stderr,none": 0.024848018263875192
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.24113475177304963,
|
| 152 |
+
"acc_stderr,none": 0.025518731049537766
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.1875,
|
| 157 |
+
"acc_stderr,none": 0.023709788253811766
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.27710843373493976,
|
| 162 |
+
"acc_stderr,none": 0.034843315926805875
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"alias": " - social_sciences",
|
| 166 |
+
"acc,none": 0.2463438414039649,
|
| 167 |
+
"acc_stderr,none": 0.007756497563842694
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.23684210526315788,
|
| 172 |
+
"acc_stderr,none": 0.039994238792813365
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.26262626262626265,
|
| 177 |
+
"acc_stderr,none": 0.031353050095330855
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.21761658031088082,
|
| 182 |
+
"acc_stderr,none": 0.02977866303775295
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.2512820512820513,
|
| 187 |
+
"acc_stderr,none": 0.02199201666237056
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.2184873949579832,
|
| 192 |
+
"acc_stderr,none": 0.026841514322958945
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.23669724770642203,
|
| 197 |
+
"acc_stderr,none": 0.01822407811729909
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.1450381679389313,
|
| 202 |
+
"acc_stderr,none": 0.03088466108951539
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.2908496732026144,
|
| 207 |
+
"acc_stderr,none": 0.018373116915903966
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.3090909090909091,
|
| 212 |
+
"acc_stderr,none": 0.044262946482000985
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.21224489795918366,
|
| 217 |
+
"acc_stderr,none": 0.026176967197866767
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.263681592039801,
|
| 222 |
+
"acc_stderr,none": 0.03115715086935556
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.22,
|
| 227 |
+
"acc_stderr,none": 0.04163331998932269
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"alias": " - stem",
|
| 231 |
+
"acc,none": 0.2524579765302886,
|
| 232 |
+
"acc_stderr,none": 0.0077368608002292985
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.28,
|
| 237 |
+
"acc_stderr,none": 0.04512608598542127
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.22962962962962963,
|
| 242 |
+
"acc_stderr,none": 0.036333844140734636
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.17105263157894737,
|
| 247 |
+
"acc_stderr,none": 0.030643607071677098
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.3194444444444444,
|
| 252 |
+
"acc_stderr,none": 0.03899073687357335
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.22,
|
| 257 |
+
"acc_stderr,none": 0.041633319989322716
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.26,
|
| 262 |
+
"acc_stderr,none": 0.0440844002276808
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.19,
|
| 267 |
+
"acc_stderr,none": 0.03942772444036623
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.24509803921568626,
|
| 272 |
+
"acc_stderr,none": 0.042801058373643945
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.25,
|
| 277 |
+
"acc_stderr,none": 0.04351941398892446
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.24680851063829787,
|
| 282 |
+
"acc_stderr,none": 0.028185441301234106
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.2413793103448276,
|
| 287 |
+
"acc_stderr,none": 0.03565998174135303
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2804232804232804,
|
| 292 |
+
"acc_stderr,none": 0.023135287974325625
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.24838709677419354,
|
| 297 |
+
"acc_stderr,none": 0.024580028921481006
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.2857142857142857,
|
| 302 |
+
"acc_stderr,none": 0.03178529710642749
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.32,
|
| 307 |
+
"acc_stderr,none": 0.04688261722621504
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.25925925925925924,
|
| 312 |
+
"acc_stderr,none": 0.02671924078371217
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.24503311258278146,
|
| 317 |
+
"acc_stderr,none": 0.03511807571804724
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.2222222222222222,
|
| 322 |
+
"acc_stderr,none": 0.02835321286686344
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.24107142857142858,
|
| 327 |
+
"acc_stderr,none": 0.04059867246952687
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/mmlu_2025-03-12T18-34-12.292504.json
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"mmlu"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 5,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"mmlu": {
|
| 20 |
+
"acc,none": 0.25324027916251246,
|
| 21 |
+
"acc_stderr,none": 0.003666930438461816,
|
| 22 |
+
"alias": "mmlu"
|
| 23 |
+
},
|
| 24 |
+
"mmlu_humanities": {
|
| 25 |
+
"alias": " - humanities",
|
| 26 |
+
"acc,none": 0.2501594048884166,
|
| 27 |
+
"acc_stderr,none": 0.00631067205291355
|
| 28 |
+
},
|
| 29 |
+
"mmlu_formal_logic": {
|
| 30 |
+
"alias": " - formal_logic",
|
| 31 |
+
"acc,none": 0.16666666666666666,
|
| 32 |
+
"acc_stderr,none": 0.03333333333333336
|
| 33 |
+
},
|
| 34 |
+
"mmlu_high_school_european_history": {
|
| 35 |
+
"alias": " - high_school_european_history",
|
| 36 |
+
"acc,none": 0.21212121212121213,
|
| 37 |
+
"acc_stderr,none": 0.031922715695482995
|
| 38 |
+
},
|
| 39 |
+
"mmlu_high_school_us_history": {
|
| 40 |
+
"alias": " - high_school_us_history",
|
| 41 |
+
"acc,none": 0.27941176470588236,
|
| 42 |
+
"acc_stderr,none": 0.031493281045079556
|
| 43 |
+
},
|
| 44 |
+
"mmlu_high_school_world_history": {
|
| 45 |
+
"alias": " - high_school_world_history",
|
| 46 |
+
"acc,none": 0.25316455696202533,
|
| 47 |
+
"acc_stderr,none": 0.028304657943035313
|
| 48 |
+
},
|
| 49 |
+
"mmlu_international_law": {
|
| 50 |
+
"alias": " - international_law",
|
| 51 |
+
"acc,none": 0.2644628099173554,
|
| 52 |
+
"acc_stderr,none": 0.04026187527591204
|
| 53 |
+
},
|
| 54 |
+
"mmlu_jurisprudence": {
|
| 55 |
+
"alias": " - jurisprudence",
|
| 56 |
+
"acc,none": 0.3425925925925926,
|
| 57 |
+
"acc_stderr,none": 0.04587904741301812
|
| 58 |
+
},
|
| 59 |
+
"mmlu_logical_fallacies": {
|
| 60 |
+
"alias": " - logical_fallacies",
|
| 61 |
+
"acc,none": 0.3006134969325153,
|
| 62 |
+
"acc_stderr,none": 0.03602511318806771
|
| 63 |
+
},
|
| 64 |
+
"mmlu_moral_disputes": {
|
| 65 |
+
"alias": " - moral_disputes",
|
| 66 |
+
"acc,none": 0.24855491329479767,
|
| 67 |
+
"acc_stderr,none": 0.023267528432100174
|
| 68 |
+
},
|
| 69 |
+
"mmlu_moral_scenarios": {
|
| 70 |
+
"alias": " - moral_scenarios",
|
| 71 |
+
"acc,none": 0.23016759776536314,
|
| 72 |
+
"acc_stderr,none": 0.014078339253425819
|
| 73 |
+
},
|
| 74 |
+
"mmlu_philosophy": {
|
| 75 |
+
"alias": " - philosophy",
|
| 76 |
+
"acc,none": 0.24758842443729903,
|
| 77 |
+
"acc_stderr,none": 0.024513879973621967
|
| 78 |
+
},
|
| 79 |
+
"mmlu_prehistory": {
|
| 80 |
+
"alias": " - prehistory",
|
| 81 |
+
"acc,none": 0.2839506172839506,
|
| 82 |
+
"acc_stderr,none": 0.025089478523765127
|
| 83 |
+
},
|
| 84 |
+
"mmlu_professional_law": {
|
| 85 |
+
"alias": " - professional_law",
|
| 86 |
+
"acc,none": 0.24837027379400262,
|
| 87 |
+
"acc_stderr,none": 0.011035212598034503
|
| 88 |
+
},
|
| 89 |
+
"mmlu_world_religions": {
|
| 90 |
+
"alias": " - world_religions",
|
| 91 |
+
"acc,none": 0.2573099415204678,
|
| 92 |
+
"acc_stderr,none": 0.03352799844161865
|
| 93 |
+
},
|
| 94 |
+
"mmlu_other": {
|
| 95 |
+
"alias": " - other",
|
| 96 |
+
"acc,none": 0.271000965561635,
|
| 97 |
+
"acc_stderr,none": 0.007979305318220201
|
| 98 |
+
},
|
| 99 |
+
"mmlu_business_ethics": {
|
| 100 |
+
"alias": " - business_ethics",
|
| 101 |
+
"acc,none": 0.26,
|
| 102 |
+
"acc_stderr,none": 0.0440844002276808
|
| 103 |
+
},
|
| 104 |
+
"mmlu_clinical_knowledge": {
|
| 105 |
+
"alias": " - clinical_knowledge",
|
| 106 |
+
"acc,none": 0.23018867924528302,
|
| 107 |
+
"acc_stderr,none": 0.025907897122408173
|
| 108 |
+
},
|
| 109 |
+
"mmlu_college_medicine": {
|
| 110 |
+
"alias": " - college_medicine",
|
| 111 |
+
"acc,none": 0.2543352601156069,
|
| 112 |
+
"acc_stderr,none": 0.0332055644308557
|
| 113 |
+
},
|
| 114 |
+
"mmlu_global_facts": {
|
| 115 |
+
"alias": " - global_facts",
|
| 116 |
+
"acc,none": 0.27,
|
| 117 |
+
"acc_stderr,none": 0.04461960433384739
|
| 118 |
+
},
|
| 119 |
+
"mmlu_human_aging": {
|
| 120 |
+
"alias": " - human_aging",
|
| 121 |
+
"acc,none": 0.32286995515695066,
|
| 122 |
+
"acc_stderr,none": 0.03138147637575498
|
| 123 |
+
},
|
| 124 |
+
"mmlu_management": {
|
| 125 |
+
"alias": " - management",
|
| 126 |
+
"acc,none": 0.2524271844660194,
|
| 127 |
+
"acc_stderr,none": 0.04301250399690877
|
| 128 |
+
},
|
| 129 |
+
"mmlu_marketing": {
|
| 130 |
+
"alias": " - marketing",
|
| 131 |
+
"acc,none": 0.2606837606837607,
|
| 132 |
+
"acc_stderr,none": 0.028760348956523414
|
| 133 |
+
},
|
| 134 |
+
"mmlu_medical_genetics": {
|
| 135 |
+
"alias": " - medical_genetics",
|
| 136 |
+
"acc,none": 0.27,
|
| 137 |
+
"acc_stderr,none": 0.044619604333847394
|
| 138 |
+
},
|
| 139 |
+
"mmlu_miscellaneous": {
|
| 140 |
+
"alias": " - miscellaneous",
|
| 141 |
+
"acc,none": 0.2707535121328225,
|
| 142 |
+
"acc_stderr,none": 0.01588988836256049
|
| 143 |
+
},
|
| 144 |
+
"mmlu_nutrition": {
|
| 145 |
+
"alias": " - nutrition",
|
| 146 |
+
"acc,none": 0.2973856209150327,
|
| 147 |
+
"acc_stderr,none": 0.02617390850671858
|
| 148 |
+
},
|
| 149 |
+
"mmlu_professional_accounting": {
|
| 150 |
+
"alias": " - professional_accounting",
|
| 151 |
+
"acc,none": 0.2801418439716312,
|
| 152 |
+
"acc_stderr,none": 0.02678917235114025
|
| 153 |
+
},
|
| 154 |
+
"mmlu_professional_medicine": {
|
| 155 |
+
"alias": " - professional_medicine",
|
| 156 |
+
"acc,none": 0.2867647058823529,
|
| 157 |
+
"acc_stderr,none": 0.02747227447323382
|
| 158 |
+
},
|
| 159 |
+
"mmlu_virology": {
|
| 160 |
+
"alias": " - virology",
|
| 161 |
+
"acc,none": 0.2289156626506024,
|
| 162 |
+
"acc_stderr,none": 0.03270745277352477
|
| 163 |
+
},
|
| 164 |
+
"mmlu_social_sciences": {
|
| 165 |
+
"alias": " - social_sciences",
|
| 166 |
+
"acc,none": 0.24861878453038674,
|
| 167 |
+
"acc_stderr,none": 0.007797463983867868
|
| 168 |
+
},
|
| 169 |
+
"mmlu_econometrics": {
|
| 170 |
+
"alias": " - econometrics",
|
| 171 |
+
"acc,none": 0.3157894736842105,
|
| 172 |
+
"acc_stderr,none": 0.043727482902780064
|
| 173 |
+
},
|
| 174 |
+
"mmlu_high_school_geography": {
|
| 175 |
+
"alias": " - high_school_geography",
|
| 176 |
+
"acc,none": 0.2474747474747475,
|
| 177 |
+
"acc_stderr,none": 0.03074630074212451
|
| 178 |
+
},
|
| 179 |
+
"mmlu_high_school_government_and_politics": {
|
| 180 |
+
"alias": " - high_school_government_and_politics",
|
| 181 |
+
"acc,none": 0.24870466321243523,
|
| 182 |
+
"acc_stderr,none": 0.0311958408777003
|
| 183 |
+
},
|
| 184 |
+
"mmlu_high_school_macroeconomics": {
|
| 185 |
+
"alias": " - high_school_macroeconomics",
|
| 186 |
+
"acc,none": 0.26153846153846155,
|
| 187 |
+
"acc_stderr,none": 0.022282141204204423
|
| 188 |
+
},
|
| 189 |
+
"mmlu_high_school_microeconomics": {
|
| 190 |
+
"alias": " - high_school_microeconomics",
|
| 191 |
+
"acc,none": 0.23529411764705882,
|
| 192 |
+
"acc_stderr,none": 0.02755361446786381
|
| 193 |
+
},
|
| 194 |
+
"mmlu_high_school_psychology": {
|
| 195 |
+
"alias": " - high_school_psychology",
|
| 196 |
+
"acc,none": 0.22935779816513763,
|
| 197 |
+
"acc_stderr,none": 0.018025349724618684
|
| 198 |
+
},
|
| 199 |
+
"mmlu_human_sexuality": {
|
| 200 |
+
"alias": " - human_sexuality",
|
| 201 |
+
"acc,none": 0.22137404580152673,
|
| 202 |
+
"acc_stderr,none": 0.03641297081313729
|
| 203 |
+
},
|
| 204 |
+
"mmlu_professional_psychology": {
|
| 205 |
+
"alias": " - professional_psychology",
|
| 206 |
+
"acc,none": 0.26143790849673204,
|
| 207 |
+
"acc_stderr,none": 0.01777694715752805
|
| 208 |
+
},
|
| 209 |
+
"mmlu_public_relations": {
|
| 210 |
+
"alias": " - public_relations",
|
| 211 |
+
"acc,none": 0.2818181818181818,
|
| 212 |
+
"acc_stderr,none": 0.043091187099464585
|
| 213 |
+
},
|
| 214 |
+
"mmlu_security_studies": {
|
| 215 |
+
"alias": " - security_studies",
|
| 216 |
+
"acc,none": 0.2530612244897959,
|
| 217 |
+
"acc_stderr,none": 0.027833023871399677
|
| 218 |
+
},
|
| 219 |
+
"mmlu_sociology": {
|
| 220 |
+
"alias": " - sociology",
|
| 221 |
+
"acc,none": 0.21393034825870647,
|
| 222 |
+
"acc_stderr,none": 0.02899690969332891
|
| 223 |
+
},
|
| 224 |
+
"mmlu_us_foreign_policy": {
|
| 225 |
+
"alias": " - us_foreign_policy",
|
| 226 |
+
"acc,none": 0.24,
|
| 227 |
+
"acc_stderr,none": 0.042923469599092816
|
| 228 |
+
},
|
| 229 |
+
"mmlu_stem": {
|
| 230 |
+
"alias": " - stem",
|
| 231 |
+
"acc,none": 0.24484617824294322,
|
| 232 |
+
"acc_stderr,none": 0.007634541481754456
|
| 233 |
+
},
|
| 234 |
+
"mmlu_abstract_algebra": {
|
| 235 |
+
"alias": " - abstract_algebra",
|
| 236 |
+
"acc,none": 0.22,
|
| 237 |
+
"acc_stderr,none": 0.0416333199893227
|
| 238 |
+
},
|
| 239 |
+
"mmlu_anatomy": {
|
| 240 |
+
"alias": " - anatomy",
|
| 241 |
+
"acc,none": 0.2962962962962963,
|
| 242 |
+
"acc_stderr,none": 0.03944624162501117
|
| 243 |
+
},
|
| 244 |
+
"mmlu_astronomy": {
|
| 245 |
+
"alias": " - astronomy",
|
| 246 |
+
"acc,none": 0.15789473684210525,
|
| 247 |
+
"acc_stderr,none": 0.02967416752010144
|
| 248 |
+
},
|
| 249 |
+
"mmlu_college_biology": {
|
| 250 |
+
"alias": " - college_biology",
|
| 251 |
+
"acc,none": 0.2708333333333333,
|
| 252 |
+
"acc_stderr,none": 0.037161774375660164
|
| 253 |
+
},
|
| 254 |
+
"mmlu_college_chemistry": {
|
| 255 |
+
"alias": " - college_chemistry",
|
| 256 |
+
"acc,none": 0.22,
|
| 257 |
+
"acc_stderr,none": 0.041633319989322695
|
| 258 |
+
},
|
| 259 |
+
"mmlu_college_computer_science": {
|
| 260 |
+
"alias": " - college_computer_science",
|
| 261 |
+
"acc,none": 0.18,
|
| 262 |
+
"acc_stderr,none": 0.038612291966536955
|
| 263 |
+
},
|
| 264 |
+
"mmlu_college_mathematics": {
|
| 265 |
+
"alias": " - college_mathematics",
|
| 266 |
+
"acc,none": 0.26,
|
| 267 |
+
"acc_stderr,none": 0.04408440022768079
|
| 268 |
+
},
|
| 269 |
+
"mmlu_college_physics": {
|
| 270 |
+
"alias": " - college_physics",
|
| 271 |
+
"acc,none": 0.19607843137254902,
|
| 272 |
+
"acc_stderr,none": 0.03950581861179964
|
| 273 |
+
},
|
| 274 |
+
"mmlu_computer_security": {
|
| 275 |
+
"alias": " - computer_security",
|
| 276 |
+
"acc,none": 0.3,
|
| 277 |
+
"acc_stderr,none": 0.046056618647183814
|
| 278 |
+
},
|
| 279 |
+
"mmlu_conceptual_physics": {
|
| 280 |
+
"alias": " - conceptual_physics",
|
| 281 |
+
"acc,none": 0.28936170212765955,
|
| 282 |
+
"acc_stderr,none": 0.029644006577009618
|
| 283 |
+
},
|
| 284 |
+
"mmlu_electrical_engineering": {
|
| 285 |
+
"alias": " - electrical_engineering",
|
| 286 |
+
"acc,none": 0.2,
|
| 287 |
+
"acc_stderr,none": 0.0333333333333333
|
| 288 |
+
},
|
| 289 |
+
"mmlu_elementary_mathematics": {
|
| 290 |
+
"alias": " - elementary_mathematics",
|
| 291 |
+
"acc,none": 0.2777777777777778,
|
| 292 |
+
"acc_stderr,none": 0.023068188848261124
|
| 293 |
+
},
|
| 294 |
+
"mmlu_high_school_biology": {
|
| 295 |
+
"alias": " - high_school_biology",
|
| 296 |
+
"acc,none": 0.22903225806451613,
|
| 297 |
+
"acc_stderr,none": 0.02390491431178266
|
| 298 |
+
},
|
| 299 |
+
"mmlu_high_school_chemistry": {
|
| 300 |
+
"alias": " - high_school_chemistry",
|
| 301 |
+
"acc,none": 0.19704433497536947,
|
| 302 |
+
"acc_stderr,none": 0.027986724666736212
|
| 303 |
+
},
|
| 304 |
+
"mmlu_high_school_computer_science": {
|
| 305 |
+
"alias": " - high_school_computer_science",
|
| 306 |
+
"acc,none": 0.33,
|
| 307 |
+
"acc_stderr,none": 0.04725815626252605
|
| 308 |
+
},
|
| 309 |
+
"mmlu_high_school_mathematics": {
|
| 310 |
+
"alias": " - high_school_mathematics",
|
| 311 |
+
"acc,none": 0.2074074074074074,
|
| 312 |
+
"acc_stderr,none": 0.024720713193952155
|
| 313 |
+
},
|
| 314 |
+
"mmlu_high_school_physics": {
|
| 315 |
+
"alias": " - high_school_physics",
|
| 316 |
+
"acc,none": 0.1986754966887417,
|
| 317 |
+
"acc_stderr,none": 0.03257847384436776
|
| 318 |
+
},
|
| 319 |
+
"mmlu_high_school_statistics": {
|
| 320 |
+
"alias": " - high_school_statistics",
|
| 321 |
+
"acc,none": 0.3148148148148148,
|
| 322 |
+
"acc_stderr,none": 0.0316746870682898
|
| 323 |
+
},
|
| 324 |
+
"mmlu_machine_learning": {
|
| 325 |
+
"alias": " - machine_learning",
|
| 326 |
+
"acc,none": 0.2767857142857143,
|
| 327 |
+
"acc_stderr,none": 0.04246624336697624
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/pawsx_2025-03-10T18-52-23.966216.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.5330714285714285,
|
| 21 |
+
"acc_stderr,none": 0.004214214249376192,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"acc,none": 0.5185,
|
| 26 |
+
"acc_stderr,none": 0.011175478542788575,
|
| 27 |
+
"alias": " - paws_de"
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"acc,none": 0.5315,
|
| 31 |
+
"acc_stderr,none": 0.011160921022883286,
|
| 32 |
+
"alias": " - paws_en"
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"acc,none": 0.493,
|
| 36 |
+
"acc_stderr,none": 0.011182040020027774,
|
| 37 |
+
"alias": " - paws_es"
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"acc,none": 0.5445,
|
| 41 |
+
"acc_stderr,none": 0.011138757154883975,
|
| 42 |
+
"alias": " - paws_fr"
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"acc,none": 0.5565,
|
| 46 |
+
"acc_stderr,none": 0.011111507899646485,
|
| 47 |
+
"alias": " - paws_ja"
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"acc,none": 0.539,
|
| 51 |
+
"acc_stderr,none": 0.01114906502023434,
|
| 52 |
+
"alias": " - paws_ko"
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"acc,none": 0.5485,
|
| 56 |
+
"acc_stderr,none": 0.011130400617630761,
|
| 57 |
+
"alias": " - paws_zh"
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/pawsx_2025-03-12T12-00-24.813841.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"pawsx"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"pawsx": {
|
| 20 |
+
"acc,none": 0.5385714285714286,
|
| 21 |
+
"acc_stderr,none": 0.004212723202834257,
|
| 22 |
+
"alias": "pawsx"
|
| 23 |
+
},
|
| 24 |
+
"paws_de": {
|
| 25 |
+
"acc,none": 0.535,
|
| 26 |
+
"acc_stderr,none": 0.011155703691943108,
|
| 27 |
+
"alias": " - paws_de"
|
| 28 |
+
},
|
| 29 |
+
"paws_en": {
|
| 30 |
+
"acc,none": 0.5275,
|
| 31 |
+
"acc_stderr,none": 0.01116620871686354,
|
| 32 |
+
"alias": " - paws_en"
|
| 33 |
+
},
|
| 34 |
+
"paws_es": {
|
| 35 |
+
"acc,none": 0.5125,
|
| 36 |
+
"acc_stderr,none": 0.011179640744835734,
|
| 37 |
+
"alias": " - paws_es"
|
| 38 |
+
},
|
| 39 |
+
"paws_fr": {
|
| 40 |
+
"acc,none": 0.548,
|
| 41 |
+
"acc_stderr,none": 0.011131484850525782,
|
| 42 |
+
"alias": " - paws_fr"
|
| 43 |
+
},
|
| 44 |
+
"paws_ja": {
|
| 45 |
+
"acc,none": 0.5515,
|
| 46 |
+
"acc_stderr,none": 0.011123656901911277,
|
| 47 |
+
"alias": " - paws_ja"
|
| 48 |
+
},
|
| 49 |
+
"paws_ko": {
|
| 50 |
+
"acc,none": 0.5495,
|
| 51 |
+
"acc_stderr,none": 0.011128198119942874,
|
| 52 |
+
"alias": " - paws_ko"
|
| 53 |
+
},
|
| 54 |
+
"paws_zh": {
|
| 55 |
+
"acc,none": 0.546,
|
| 56 |
+
"acc_stderr,none": 0.011135708419359798,
|
| 57 |
+
"alias": " - paws_zh"
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/piqa_2025-03-10T16-57-25.214478.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"acc,none": 0.6800870511425462,
|
| 21 |
+
"acc_stderr,none": 0.01088287358209206,
|
| 22 |
+
"acc_norm,none": 0.6828073993471164,
|
| 23 |
+
"acc_norm_stderr,none": 0.01085815545438087,
|
| 24 |
+
"alias": "piqa"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/piqa_2025-03-11T22-02-02.648211.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"piqa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"piqa": {
|
| 20 |
+
"acc,none": 0.6871599564744287,
|
| 21 |
+
"acc_stderr,none": 0.010817714425701086,
|
| 22 |
+
"acc_norm,none": 0.6866158868335147,
|
| 23 |
+
"acc_norm_stderr,none": 0.010822829929195487,
|
| 24 |
+
"alias": "piqa"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/rwkv7-g1-0.1b-20250307-ctx4096_blimp.json
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"blimp"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"blimp": {
|
| 20 |
+
"acc,none": 0.817820895522388,
|
| 21 |
+
"acc_stderr,none": 0.0013328875580462674,
|
| 22 |
+
"alias": "blimp"
|
| 23 |
+
},
|
| 24 |
+
"blimp_adjunct_island": {
|
| 25 |
+
"acc,none": 0.897,
|
| 26 |
+
"acc_stderr,none": 0.00961683333969581,
|
| 27 |
+
"alias": " - blimp_adjunct_island"
|
| 28 |
+
},
|
| 29 |
+
"blimp_anaphor_gender_agreement": {
|
| 30 |
+
"acc,none": 0.98,
|
| 31 |
+
"acc_stderr,none": 0.004429403980178343,
|
| 32 |
+
"alias": " - blimp_anaphor_gender_agreement"
|
| 33 |
+
},
|
| 34 |
+
"blimp_anaphor_number_agreement": {
|
| 35 |
+
"acc,none": 0.994,
|
| 36 |
+
"acc_stderr,none": 0.0024433521993298276,
|
| 37 |
+
"alias": " - blimp_anaphor_number_agreement"
|
| 38 |
+
},
|
| 39 |
+
"blimp_animate_subject_passive": {
|
| 40 |
+
"acc,none": 0.792,
|
| 41 |
+
"acc_stderr,none": 0.01284137457209692,
|
| 42 |
+
"alias": " - blimp_animate_subject_passive"
|
| 43 |
+
},
|
| 44 |
+
"blimp_animate_subject_trans": {
|
| 45 |
+
"acc,none": 0.895,
|
| 46 |
+
"acc_stderr,none": 0.00969892102602495,
|
| 47 |
+
"alias": " - blimp_animate_subject_trans"
|
| 48 |
+
},
|
| 49 |
+
"blimp_causative": {
|
| 50 |
+
"acc,none": 0.766,
|
| 51 |
+
"acc_stderr,none": 0.01339490288966001,
|
| 52 |
+
"alias": " - blimp_causative"
|
| 53 |
+
},
|
| 54 |
+
"blimp_complex_NP_island": {
|
| 55 |
+
"acc,none": 0.604,
|
| 56 |
+
"acc_stderr,none": 0.015473313265859401,
|
| 57 |
+
"alias": " - blimp_complex_NP_island"
|
| 58 |
+
},
|
| 59 |
+
"blimp_coordinate_structure_constraint_complex_left_branch": {
|
| 60 |
+
"acc,none": 0.759,
|
| 61 |
+
"acc_stderr,none": 0.013531522534515455,
|
| 62 |
+
"alias": " - blimp_coordinate_structure_constraint_complex_left_branch"
|
| 63 |
+
},
|
| 64 |
+
"blimp_coordinate_structure_constraint_object_extraction": {
|
| 65 |
+
"acc,none": 0.833,
|
| 66 |
+
"acc_stderr,none": 0.011800434324644579,
|
| 67 |
+
"alias": " - blimp_coordinate_structure_constraint_object_extraction"
|
| 68 |
+
},
|
| 69 |
+
"blimp_determiner_noun_agreement_1": {
|
| 70 |
+
"acc,none": 0.989,
|
| 71 |
+
"acc_stderr,none": 0.003299983316607816,
|
| 72 |
+
"alias": " - blimp_determiner_noun_agreement_1"
|
| 73 |
+
},
|
| 74 |
+
"blimp_determiner_noun_agreement_2": {
|
| 75 |
+
"acc,none": 0.973,
|
| 76 |
+
"acc_stderr,none": 0.005128089049275289,
|
| 77 |
+
"alias": " - blimp_determiner_noun_agreement_2"
|
| 78 |
+
},
|
| 79 |
+
"blimp_determiner_noun_agreement_irregular_1": {
|
| 80 |
+
"acc,none": 0.937,
|
| 81 |
+
"acc_stderr,none": 0.00768700787628643,
|
| 82 |
+
"alias": " - blimp_determiner_noun_agreement_irregular_1"
|
| 83 |
+
},
|
| 84 |
+
"blimp_determiner_noun_agreement_irregular_2": {
|
| 85 |
+
"acc,none": 0.954,
|
| 86 |
+
"acc_stderr,none": 0.006627814717380713,
|
| 87 |
+
"alias": " - blimp_determiner_noun_agreement_irregular_2"
|
| 88 |
+
},
|
| 89 |
+
"blimp_determiner_noun_agreement_with_adj_2": {
|
| 90 |
+
"acc,none": 0.946,
|
| 91 |
+
"acc_stderr,none": 0.007150883521295433,
|
| 92 |
+
"alias": " - blimp_determiner_noun_agreement_with_adj_2"
|
| 93 |
+
},
|
| 94 |
+
"blimp_determiner_noun_agreement_with_adj_irregular_1": {
|
| 95 |
+
"acc,none": 0.916,
|
| 96 |
+
"acc_stderr,none": 0.008776162089491115,
|
| 97 |
+
"alias": " - blimp_determiner_noun_agreement_with_adj_irregular_1"
|
| 98 |
+
},
|
| 99 |
+
"blimp_determiner_noun_agreement_with_adj_irregular_2": {
|
| 100 |
+
"acc,none": 0.937,
|
| 101 |
+
"acc_stderr,none": 0.00768700787628643,
|
| 102 |
+
"alias": " - blimp_determiner_noun_agreement_with_adj_irregular_2"
|
| 103 |
+
},
|
| 104 |
+
"blimp_determiner_noun_agreement_with_adjective_1": {
|
| 105 |
+
"acc,none": 0.964,
|
| 106 |
+
"acc_stderr,none": 0.0058939578161655475,
|
| 107 |
+
"alias": " - blimp_determiner_noun_agreement_with_adjective_1"
|
| 108 |
+
},
|
| 109 |
+
"blimp_distractor_agreement_relational_noun": {
|
| 110 |
+
"acc,none": 0.864,
|
| 111 |
+
"acc_stderr,none": 0.010845350230472992,
|
| 112 |
+
"alias": " - blimp_distractor_agreement_relational_noun"
|
| 113 |
+
},
|
| 114 |
+
"blimp_distractor_agreement_relative_clause": {
|
| 115 |
+
"acc,none": 0.716,
|
| 116 |
+
"acc_stderr,none": 0.014267009061031313,
|
| 117 |
+
"alias": " - blimp_distractor_agreement_relative_clause"
|
| 118 |
+
},
|
| 119 |
+
"blimp_drop_argument": {
|
| 120 |
+
"acc,none": 0.761,
|
| 121 |
+
"acc_stderr,none": 0.01349300044693759,
|
| 122 |
+
"alias": " - blimp_drop_argument"
|
| 123 |
+
},
|
| 124 |
+
"blimp_ellipsis_n_bar_1": {
|
| 125 |
+
"acc,none": 0.83,
|
| 126 |
+
"acc_stderr,none": 0.01188449583454167,
|
| 127 |
+
"alias": " - blimp_ellipsis_n_bar_1"
|
| 128 |
+
},
|
| 129 |
+
"blimp_ellipsis_n_bar_2": {
|
| 130 |
+
"acc,none": 0.89,
|
| 131 |
+
"acc_stderr,none": 0.009899393819724439,
|
| 132 |
+
"alias": " - blimp_ellipsis_n_bar_2"
|
| 133 |
+
},
|
| 134 |
+
"blimp_existential_there_object_raising": {
|
| 135 |
+
"acc,none": 0.878,
|
| 136 |
+
"acc_stderr,none": 0.010354864712936687,
|
| 137 |
+
"alias": " - blimp_existential_there_object_raising"
|
| 138 |
+
},
|
| 139 |
+
"blimp_existential_there_quantifiers_1": {
|
| 140 |
+
"acc,none": 0.989,
|
| 141 |
+
"acc_stderr,none": 0.0032999833166078153,
|
| 142 |
+
"alias": " - blimp_existential_there_quantifiers_1"
|
| 143 |
+
},
|
| 144 |
+
"blimp_existential_there_quantifiers_2": {
|
| 145 |
+
"acc,none": 0.3,
|
| 146 |
+
"acc_stderr,none": 0.014498627873361427,
|
| 147 |
+
"alias": " - blimp_existential_there_quantifiers_2"
|
| 148 |
+
},
|
| 149 |
+
"blimp_existential_there_subject_raising": {
|
| 150 |
+
"acc,none": 0.904,
|
| 151 |
+
"acc_stderr,none": 0.009320454434783226,
|
| 152 |
+
"alias": " - blimp_existential_there_subject_raising"
|
| 153 |
+
},
|
| 154 |
+
"blimp_expletive_it_object_raising": {
|
| 155 |
+
"acc,none": 0.799,
|
| 156 |
+
"acc_stderr,none": 0.012679107214617322,
|
| 157 |
+
"alias": " - blimp_expletive_it_object_raising"
|
| 158 |
+
},
|
| 159 |
+
"blimp_inchoative": {
|
| 160 |
+
"acc,none": 0.666,
|
| 161 |
+
"acc_stderr,none": 0.014922019523732961,
|
| 162 |
+
"alias": " - blimp_inchoative"
|
| 163 |
+
},
|
| 164 |
+
"blimp_intransitive": {
|
| 165 |
+
"acc,none": 0.817,
|
| 166 |
+
"acc_stderr,none": 0.012233587399477825,
|
| 167 |
+
"alias": " - blimp_intransitive"
|
| 168 |
+
},
|
| 169 |
+
"blimp_irregular_past_participle_adjectives": {
|
| 170 |
+
"acc,none": 0.995,
|
| 171 |
+
"acc_stderr,none": 0.0022315868748448812,
|
| 172 |
+
"alias": " - blimp_irregular_past_participle_adjectives"
|
| 173 |
+
},
|
| 174 |
+
"blimp_irregular_past_participle_verbs": {
|
| 175 |
+
"acc,none": 0.876,
|
| 176 |
+
"acc_stderr,none": 0.010427498872343961,
|
| 177 |
+
"alias": " - blimp_irregular_past_participle_verbs"
|
| 178 |
+
},
|
| 179 |
+
"blimp_irregular_plural_subject_verb_agreement_1": {
|
| 180 |
+
"acc,none": 0.931,
|
| 181 |
+
"acc_stderr,none": 0.008018934050315155,
|
| 182 |
+
"alias": " - blimp_irregular_plural_subject_verb_agreement_1"
|
| 183 |
+
},
|
| 184 |
+
"blimp_irregular_plural_subject_verb_agreement_2": {
|
| 185 |
+
"acc,none": 0.921,
|
| 186 |
+
"acc_stderr,none": 0.00853415677333344,
|
| 187 |
+
"alias": " - blimp_irregular_plural_subject_verb_agreement_2"
|
| 188 |
+
},
|
| 189 |
+
"blimp_left_branch_island_echo_question": {
|
| 190 |
+
"acc,none": 0.425,
|
| 191 |
+
"acc_stderr,none": 0.01564032031704011,
|
| 192 |
+
"alias": " - blimp_left_branch_island_echo_question"
|
| 193 |
+
},
|
| 194 |
+
"blimp_left_branch_island_simple_question": {
|
| 195 |
+
"acc,none": 0.793,
|
| 196 |
+
"acc_stderr,none": 0.012818553557843984,
|
| 197 |
+
"alias": " - blimp_left_branch_island_simple_question"
|
| 198 |
+
},
|
| 199 |
+
"blimp_matrix_question_npi_licensor_present": {
|
| 200 |
+
"acc,none": 0.697,
|
| 201 |
+
"acc_stderr,none": 0.014539683710535246,
|
| 202 |
+
"alias": " - blimp_matrix_question_npi_licensor_present"
|
| 203 |
+
},
|
| 204 |
+
"blimp_npi_present_1": {
|
| 205 |
+
"acc,none": 0.745,
|
| 206 |
+
"acc_stderr,none": 0.013790038620872833,
|
| 207 |
+
"alias": " - blimp_npi_present_1"
|
| 208 |
+
},
|
| 209 |
+
"blimp_npi_present_2": {
|
| 210 |
+
"acc,none": 0.763,
|
| 211 |
+
"acc_stderr,none": 0.013454070462577964,
|
| 212 |
+
"alias": " - blimp_npi_present_2"
|
| 213 |
+
},
|
| 214 |
+
"blimp_only_npi_licensor_present": {
|
| 215 |
+
"acc,none": 0.979,
|
| 216 |
+
"acc_stderr,none": 0.00453647215130651,
|
| 217 |
+
"alias": " - blimp_only_npi_licensor_present"
|
| 218 |
+
},
|
| 219 |
+
"blimp_only_npi_scope": {
|
| 220 |
+
"acc,none": 0.721,
|
| 221 |
+
"acc_stderr,none": 0.014190150117612035,
|
| 222 |
+
"alias": " - blimp_only_npi_scope"
|
| 223 |
+
},
|
| 224 |
+
"blimp_passive_1": {
|
| 225 |
+
"acc,none": 0.904,
|
| 226 |
+
"acc_stderr,none": 0.009320454434783219,
|
| 227 |
+
"alias": " - blimp_passive_1"
|
| 228 |
+
},
|
| 229 |
+
"blimp_passive_2": {
|
| 230 |
+
"acc,none": 0.899,
|
| 231 |
+
"acc_stderr,none": 0.009533618929340992,
|
| 232 |
+
"alias": " - blimp_passive_2"
|
| 233 |
+
},
|
| 234 |
+
"blimp_principle_A_c_command": {
|
| 235 |
+
"acc,none": 0.682,
|
| 236 |
+
"acc_stderr,none": 0.014734079309311901,
|
| 237 |
+
"alias": " - blimp_principle_A_c_command"
|
| 238 |
+
},
|
| 239 |
+
"blimp_principle_A_case_1": {
|
| 240 |
+
"acc,none": 1.0,
|
| 241 |
+
"acc_stderr,none": 0.0,
|
| 242 |
+
"alias": " - blimp_principle_A_case_1"
|
| 243 |
+
},
|
| 244 |
+
"blimp_principle_A_case_2": {
|
| 245 |
+
"acc,none": 0.97,
|
| 246 |
+
"acc_stderr,none": 0.00539714082909918,
|
| 247 |
+
"alias": " - blimp_principle_A_case_2"
|
| 248 |
+
},
|
| 249 |
+
"blimp_principle_A_domain_1": {
|
| 250 |
+
"acc,none": 0.984,
|
| 251 |
+
"acc_stderr,none": 0.003969856390319422,
|
| 252 |
+
"alias": " - blimp_principle_A_domain_1"
|
| 253 |
+
},
|
| 254 |
+
"blimp_principle_A_domain_2": {
|
| 255 |
+
"acc,none": 0.708,
|
| 256 |
+
"acc_stderr,none": 0.014385511563477338,
|
| 257 |
+
"alias": " - blimp_principle_A_domain_2"
|
| 258 |
+
},
|
| 259 |
+
"blimp_principle_A_domain_3": {
|
| 260 |
+
"acc,none": 0.588,
|
| 261 |
+
"acc_stderr,none": 0.015572363292015086,
|
| 262 |
+
"alias": " - blimp_principle_A_domain_3"
|
| 263 |
+
},
|
| 264 |
+
"blimp_principle_A_reconstruction": {
|
| 265 |
+
"acc,none": 0.274,
|
| 266 |
+
"acc_stderr,none": 0.014111099288259583,
|
| 267 |
+
"alias": " - blimp_principle_A_reconstruction"
|
| 268 |
+
},
|
| 269 |
+
"blimp_regular_plural_subject_verb_agreement_1": {
|
| 270 |
+
"acc,none": 0.962,
|
| 271 |
+
"acc_stderr,none": 0.006049181150584933,
|
| 272 |
+
"alias": " - blimp_regular_plural_subject_verb_agreement_1"
|
| 273 |
+
},
|
| 274 |
+
"blimp_regular_plural_subject_verb_agreement_2": {
|
| 275 |
+
"acc,none": 0.913,
|
| 276 |
+
"acc_stderr,none": 0.008916866630745902,
|
| 277 |
+
"alias": " - blimp_regular_plural_subject_verb_agreement_2"
|
| 278 |
+
},
|
| 279 |
+
"blimp_sentential_negation_npi_licensor_present": {
|
| 280 |
+
"acc,none": 0.993,
|
| 281 |
+
"acc_stderr,none": 0.0026377941462437772,
|
| 282 |
+
"alias": " - blimp_sentential_negation_npi_licensor_present"
|
| 283 |
+
},
|
| 284 |
+
"blimp_sentential_negation_npi_scope": {
|
| 285 |
+
"acc,none": 0.828,
|
| 286 |
+
"acc_stderr,none": 0.011939788882495321,
|
| 287 |
+
"alias": " - blimp_sentential_negation_npi_scope"
|
| 288 |
+
},
|
| 289 |
+
"blimp_sentential_subject_island": {
|
| 290 |
+
"acc,none": 0.267,
|
| 291 |
+
"acc_stderr,none": 0.013996674851796264,
|
| 292 |
+
"alias": " - blimp_sentential_subject_island"
|
| 293 |
+
},
|
| 294 |
+
"blimp_superlative_quantifiers_1": {
|
| 295 |
+
"acc,none": 0.882,
|
| 296 |
+
"acc_stderr,none": 0.010206869264381795,
|
| 297 |
+
"alias": " - blimp_superlative_quantifiers_1"
|
| 298 |
+
},
|
| 299 |
+
"blimp_superlative_quantifiers_2": {
|
| 300 |
+
"acc,none": 0.801,
|
| 301 |
+
"acc_stderr,none": 0.01263164908309918,
|
| 302 |
+
"alias": " - blimp_superlative_quantifiers_2"
|
| 303 |
+
},
|
| 304 |
+
"blimp_tough_vs_raising_1": {
|
| 305 |
+
"acc,none": 0.728,
|
| 306 |
+
"acc_stderr,none": 0.014078856992462616,
|
| 307 |
+
"alias": " - blimp_tough_vs_raising_1"
|
| 308 |
+
},
|
| 309 |
+
"blimp_tough_vs_raising_2": {
|
| 310 |
+
"acc,none": 0.847,
|
| 311 |
+
"acc_stderr,none": 0.011389500459665525,
|
| 312 |
+
"alias": " - blimp_tough_vs_raising_2"
|
| 313 |
+
},
|
| 314 |
+
"blimp_transitive": {
|
| 315 |
+
"acc,none": 0.894,
|
| 316 |
+
"acc_stderr,none": 0.009739551265785115,
|
| 317 |
+
"alias": " - blimp_transitive"
|
| 318 |
+
},
|
| 319 |
+
"blimp_wh_island": {
|
| 320 |
+
"acc,none": 0.768,
|
| 321 |
+
"acc_stderr,none": 0.013354937452281569,
|
| 322 |
+
"alias": " - blimp_wh_island"
|
| 323 |
+
},
|
| 324 |
+
"blimp_wh_questions_object_gap": {
|
| 325 |
+
"acc,none": 0.802,
|
| 326 |
+
"acc_stderr,none": 0.012607733934175303,
|
| 327 |
+
"alias": " - blimp_wh_questions_object_gap"
|
| 328 |
+
},
|
| 329 |
+
"blimp_wh_questions_subject_gap": {
|
| 330 |
+
"acc,none": 0.934,
|
| 331 |
+
"acc_stderr,none": 0.007855297938697594,
|
| 332 |
+
"alias": " - blimp_wh_questions_subject_gap"
|
| 333 |
+
},
|
| 334 |
+
"blimp_wh_questions_subject_gap_long_distance": {
|
| 335 |
+
"acc,none": 0.885,
|
| 336 |
+
"acc_stderr,none": 0.010093407594904614,
|
| 337 |
+
"alias": " - blimp_wh_questions_subject_gap_long_distance"
|
| 338 |
+
},
|
| 339 |
+
"blimp_wh_vs_that_no_gap": {
|
| 340 |
+
"acc,none": 0.954,
|
| 341 |
+
"acc_stderr,none": 0.006627814717380708,
|
| 342 |
+
"alias": " - blimp_wh_vs_that_no_gap"
|
| 343 |
+
},
|
| 344 |
+
"blimp_wh_vs_that_no_gap_long_distance": {
|
| 345 |
+
"acc,none": 0.935,
|
| 346 |
+
"acc_stderr,none": 0.007799733061832023,
|
| 347 |
+
"alias": " - blimp_wh_vs_that_no_gap_long_distance"
|
| 348 |
+
},
|
| 349 |
+
"blimp_wh_vs_that_with_gap": {
|
| 350 |
+
"acc,none": 0.536,
|
| 351 |
+
"acc_stderr,none": 0.015778243024904586,
|
| 352 |
+
"alias": " - blimp_wh_vs_that_with_gap"
|
| 353 |
+
},
|
| 354 |
+
"blimp_wh_vs_that_with_gap_long_distance": {
|
| 355 |
+
"acc,none": 0.46,
|
| 356 |
+
"acc_stderr,none": 0.01576859691439438,
|
| 357 |
+
"alias": " - blimp_wh_vs_that_with_gap_long_distance"
|
| 358 |
+
}
|
| 359 |
+
}
|
| 360 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/sciq_2025-03-10T18-07-00.999754.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"acc,none": 0.863,
|
| 21 |
+
"acc_stderr,none": 0.010878848714333308,
|
| 22 |
+
"acc_norm,none": 0.797,
|
| 23 |
+
"acc_norm_stderr,none": 0.012726073744598288,
|
| 24 |
+
"alias": "sciq"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/sciq_2025-03-12T11-12-57.165000.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"sciq"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"sciq": {
|
| 20 |
+
"acc,none": 0.867,
|
| 21 |
+
"acc_stderr,none": 0.01074366913239733,
|
| 22 |
+
"acc_norm,none": 0.805,
|
| 23 |
+
"acc_norm_stderr,none": 0.012535235623319325,
|
| 24 |
+
"alias": "sciq"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/winogrande_2025-03-10T18-05-31.048143.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"acc,none": 0.5374901341752171,
|
| 21 |
+
"acc_stderr,none": 0.014012928183336573,
|
| 22 |
+
"alias": "winogrande"
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/winogrande_2025-03-12T11-11-17.816730.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"winogrande"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"winogrande": {
|
| 20 |
+
"acc,none": 0.5335438042620363,
|
| 21 |
+
"acc_stderr,none": 0.014020826677598098,
|
| 22 |
+
"alias": "winogrande"
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xcopa_2025-03-10T18-56-23.087378.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5330909090909091,
|
| 21 |
+
"acc_stderr,none": 0.006722874288986094,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"acc,none": 0.51,
|
| 26 |
+
"acc_stderr,none": 0.02237859698923078,
|
| 27 |
+
"alias": " - xcopa_et"
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"acc,none": 0.502,
|
| 31 |
+
"acc_stderr,none": 0.022382894986483524,
|
| 32 |
+
"alias": " - xcopa_ht"
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"acc,none": 0.54,
|
| 36 |
+
"acc_stderr,none": 0.02231133324528967,
|
| 37 |
+
"alias": " - xcopa_id"
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"acc,none": 0.556,
|
| 41 |
+
"acc_stderr,none": 0.02224224437573102,
|
| 42 |
+
"alias": " - xcopa_it"
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"acc,none": 0.482,
|
| 46 |
+
"acc_stderr,none": 0.02236856511738799,
|
| 47 |
+
"alias": " - xcopa_qu"
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"acc,none": 0.54,
|
| 51 |
+
"acc_stderr,none": 0.022311333245289666,
|
| 52 |
+
"alias": " - xcopa_sw"
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"acc,none": 0.55,
|
| 56 |
+
"acc_stderr,none": 0.022270877485360444,
|
| 57 |
+
"alias": " - xcopa_ta"
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"acc,none": 0.552,
|
| 61 |
+
"acc_stderr,none": 0.02226169729227013,
|
| 62 |
+
"alias": " - xcopa_th"
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"acc,none": 0.514,
|
| 66 |
+
"acc_stderr,none": 0.022374298166353185,
|
| 67 |
+
"alias": " - xcopa_tr"
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"acc,none": 0.528,
|
| 71 |
+
"acc_stderr,none": 0.022347949832668093,
|
| 72 |
+
"alias": " - xcopa_vi"
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"acc,none": 0.59,
|
| 76 |
+
"acc_stderr,none": 0.022017482578127676,
|
| 77 |
+
"alias": " - xcopa_zh"
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xcopa_2025-03-12T12-05-27.136379.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xcopa"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xcopa": {
|
| 20 |
+
"acc,none": 0.5358181818181819,
|
| 21 |
+
"acc_stderr,none": 0.006720464517893033,
|
| 22 |
+
"alias": "xcopa"
|
| 23 |
+
},
|
| 24 |
+
"xcopa_et": {
|
| 25 |
+
"acc,none": 0.508,
|
| 26 |
+
"acc_stderr,none": 0.022380208834928035,
|
| 27 |
+
"alias": " - xcopa_et"
|
| 28 |
+
},
|
| 29 |
+
"xcopa_ht": {
|
| 30 |
+
"acc,none": 0.504,
|
| 31 |
+
"acc_stderr,none": 0.02238235778196214,
|
| 32 |
+
"alias": " - xcopa_ht"
|
| 33 |
+
},
|
| 34 |
+
"xcopa_id": {
|
| 35 |
+
"acc,none": 0.546,
|
| 36 |
+
"acc_stderr,none": 0.02228814759117695,
|
| 37 |
+
"alias": " - xcopa_id"
|
| 38 |
+
},
|
| 39 |
+
"xcopa_it": {
|
| 40 |
+
"acc,none": 0.558,
|
| 41 |
+
"acc_stderr,none": 0.02223197069632112,
|
| 42 |
+
"alias": " - xcopa_it"
|
| 43 |
+
},
|
| 44 |
+
"xcopa_qu": {
|
| 45 |
+
"acc,none": 0.482,
|
| 46 |
+
"acc_stderr,none": 0.02236856511738799,
|
| 47 |
+
"alias": " - xcopa_qu"
|
| 48 |
+
},
|
| 49 |
+
"xcopa_sw": {
|
| 50 |
+
"acc,none": 0.534,
|
| 51 |
+
"acc_stderr,none": 0.02233126442325838,
|
| 52 |
+
"alias": " - xcopa_sw"
|
| 53 |
+
},
|
| 54 |
+
"xcopa_ta": {
|
| 55 |
+
"acc,none": 0.546,
|
| 56 |
+
"acc_stderr,none": 0.02228814759117695,
|
| 57 |
+
"alias": " - xcopa_ta"
|
| 58 |
+
},
|
| 59 |
+
"xcopa_th": {
|
| 60 |
+
"acc,none": 0.556,
|
| 61 |
+
"acc_stderr,none": 0.022242244375731017,
|
| 62 |
+
"alias": " - xcopa_th"
|
| 63 |
+
},
|
| 64 |
+
"xcopa_tr": {
|
| 65 |
+
"acc,none": 0.526,
|
| 66 |
+
"acc_stderr,none": 0.02235279165091416,
|
| 67 |
+
"alias": " - xcopa_tr"
|
| 68 |
+
},
|
| 69 |
+
"xcopa_vi": {
|
| 70 |
+
"acc,none": 0.544,
|
| 71 |
+
"acc_stderr,none": 0.022296238348407063,
|
| 72 |
+
"alias": " - xcopa_vi"
|
| 73 |
+
},
|
| 74 |
+
"xcopa_zh": {
|
| 75 |
+
"acc,none": 0.59,
|
| 76 |
+
"acc_stderr,none": 0.022017482578127676,
|
| 77 |
+
"alias": " - xcopa_zh"
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xnli_2025-03-10T22-52-22.630412.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3740557129367328,
|
| 21 |
+
"acc_stderr,none": 0.0023440725323408814,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"acc,none": 0.3345381526104418,
|
| 26 |
+
"acc_stderr,none": 0.009457404390939166,
|
| 27 |
+
"alias": " - xnli_ar"
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"acc,none": 0.3718875502008032,
|
| 31 |
+
"acc_stderr,none": 0.00968750795863181,
|
| 32 |
+
"alias": " - xnli_bg"
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"acc,none": 0.42369477911646586,
|
| 36 |
+
"acc_stderr,none": 0.0099046785408289,
|
| 37 |
+
"alias": " - xnli_de"
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"acc,none": 0.3654618473895582,
|
| 41 |
+
"acc_stderr,none": 0.009652447412833482,
|
| 42 |
+
"alias": " - xnli_el"
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"acc,none": 0.4855421686746988,
|
| 46 |
+
"acc_stderr,none": 0.010017882185606005,
|
| 47 |
+
"alias": " - xnli_en"
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"acc,none": 0.3550200803212851,
|
| 51 |
+
"acc_stderr,none": 0.00959151273097429,
|
| 52 |
+
"alias": " - xnli_es"
|
| 53 |
+
},
|
| 54 |
+
"xnli_eu": {
|
| 55 |
+
"acc,none": 0.36167664670658684,
|
| 56 |
+
"acc_stderr,none": 0.006788988682090094,
|
| 57 |
+
"alias": " - xnli_eu"
|
| 58 |
+
},
|
| 59 |
+
"xnli_fr": {
|
| 60 |
+
"acc,none": 0.41445783132530123,
|
| 61 |
+
"acc_stderr,none": 0.009874311310483537,
|
| 62 |
+
"alias": " - xnli_fr"
|
| 63 |
+
},
|
| 64 |
+
"xnli_hi": {
|
| 65 |
+
"acc,none": 0.3526104417670683,
|
| 66 |
+
"acc_stderr,none": 0.009576746271768752,
|
| 67 |
+
"alias": " - xnli_hi"
|
| 68 |
+
},
|
| 69 |
+
"xnli_ru": {
|
| 70 |
+
"acc,none": 0.41726907630522087,
|
| 71 |
+
"acc_stderr,none": 0.009883930537517769,
|
| 72 |
+
"alias": " - xnli_ru"
|
| 73 |
+
},
|
| 74 |
+
"xnli_sw": {
|
| 75 |
+
"acc,none": 0.348995983935743,
|
| 76 |
+
"acc_stderr,none": 0.009554095988300685,
|
| 77 |
+
"alias": " - xnli_sw"
|
| 78 |
+
},
|
| 79 |
+
"xnli_th": {
|
| 80 |
+
"acc,none": 0.3530120481927711,
|
| 81 |
+
"acc_stderr,none": 0.009579225840709712,
|
| 82 |
+
"alias": " - xnli_th"
|
| 83 |
+
},
|
| 84 |
+
"xnli_tr": {
|
| 85 |
+
"acc,none": 0.3690763052208835,
|
| 86 |
+
"acc_stderr,none": 0.009672395644470427,
|
| 87 |
+
"alias": " - xnli_tr"
|
| 88 |
+
},
|
| 89 |
+
"xnli_ur": {
|
| 90 |
+
"acc,none": 0.3353413654618474,
|
| 91 |
+
"acc_stderr,none": 0.009463034891512704,
|
| 92 |
+
"alias": " - xnli_ur"
|
| 93 |
+
},
|
| 94 |
+
"xnli_vi": {
|
| 95 |
+
"acc,none": 0.37028112449799194,
|
| 96 |
+
"acc_stderr,none": 0.009678915409840292,
|
| 97 |
+
"alias": " - xnli_vi"
|
| 98 |
+
},
|
| 99 |
+
"xnli_zh": {
|
| 100 |
+
"acc,none": 0.3385542168674699,
|
| 101 |
+
"acc_stderr,none": 0.00948525020851688,
|
| 102 |
+
"alias": " - xnli_zh"
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xnli_2025-03-12T16-39-24.982478.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xnli"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xnli": {
|
| 20 |
+
"acc,none": 0.3767233238904627,
|
| 21 |
+
"acc_stderr,none": 0.002345702445283327,
|
| 22 |
+
"alias": "xnli"
|
| 23 |
+
},
|
| 24 |
+
"xnli_ar": {
|
| 25 |
+
"acc,none": 0.3353413654618474,
|
| 26 |
+
"acc_stderr,none": 0.009463034891512703,
|
| 27 |
+
"alias": " - xnli_ar"
|
| 28 |
+
},
|
| 29 |
+
"xnli_bg": {
|
| 30 |
+
"acc,none": 0.40522088353413654,
|
| 31 |
+
"acc_stderr,none": 0.009840367477589285,
|
| 32 |
+
"alias": " - xnli_bg"
|
| 33 |
+
},
|
| 34 |
+
"xnli_de": {
|
| 35 |
+
"acc,none": 0.4285140562248996,
|
| 36 |
+
"acc_stderr,none": 0.009919113605650934,
|
| 37 |
+
"alias": " - xnli_de"
|
| 38 |
+
},
|
| 39 |
+
"xnli_el": {
|
| 40 |
+
"acc,none": 0.3562248995983936,
|
| 41 |
+
"acc_stderr,none": 0.009598796305792166,
|
| 42 |
+
"alias": " - xnli_el"
|
| 43 |
+
},
|
| 44 |
+
"xnli_en": {
|
| 45 |
+
"acc,none": 0.4903614457831325,
|
| 46 |
+
"acc_stderr,none": 0.010020210558438297,
|
| 47 |
+
"alias": " - xnli_en"
|
| 48 |
+
},
|
| 49 |
+
"xnli_es": {
|
| 50 |
+
"acc,none": 0.36224899598393573,
|
| 51 |
+
"acc_stderr,none": 0.009634223618009006,
|
| 52 |
+
"alias": " - xnli_es"
|
| 53 |
+
},
|
| 54 |
+
"xnli_eu": {
|
| 55 |
+
"acc,none": 0.36047904191616764,
|
| 56 |
+
"acc_stderr,none": 0.006784094439482414,
|
| 57 |
+
"alias": " - xnli_eu"
|
| 58 |
+
},
|
| 59 |
+
"xnli_fr": {
|
| 60 |
+
"acc,none": 0.43253012048192774,
|
| 61 |
+
"acc_stderr,none": 0.00993040902713945,
|
| 62 |
+
"alias": " - xnli_fr"
|
| 63 |
+
},
|
| 64 |
+
"xnli_hi": {
|
| 65 |
+
"acc,none": 0.35542168674698793,
|
| 66 |
+
"acc_stderr,none": 0.009593947957927137,
|
| 67 |
+
"alias": " - xnli_hi"
|
| 68 |
+
},
|
| 69 |
+
"xnli_ru": {
|
| 70 |
+
"acc,none": 0.42048192771084336,
|
| 71 |
+
"acc_stderr,none": 0.009894519551105777,
|
| 72 |
+
"alias": " - xnli_ru"
|
| 73 |
+
},
|
| 74 |
+
"xnli_sw": {
|
| 75 |
+
"acc,none": 0.3417670682730924,
|
| 76 |
+
"acc_stderr,none": 0.009506977398287621,
|
| 77 |
+
"alias": " - xnli_sw"
|
| 78 |
+
},
|
| 79 |
+
"xnli_th": {
|
| 80 |
+
"acc,none": 0.3397590361445783,
|
| 81 |
+
"acc_stderr,none": 0.009493454925438252,
|
| 82 |
+
"alias": " - xnli_th"
|
| 83 |
+
},
|
| 84 |
+
"xnli_tr": {
|
| 85 |
+
"acc,none": 0.3755020080321285,
|
| 86 |
+
"acc_stderr,none": 0.00970642284437982,
|
| 87 |
+
"alias": " - xnli_tr"
|
| 88 |
+
},
|
| 89 |
+
"xnli_ur": {
|
| 90 |
+
"acc,none": 0.3337349397590361,
|
| 91 |
+
"acc_stderr,none": 0.009451743112667055,
|
| 92 |
+
"alias": " - xnli_ur"
|
| 93 |
+
},
|
| 94 |
+
"xnli_vi": {
|
| 95 |
+
"acc,none": 0.36947791164658633,
|
| 96 |
+
"acc_stderr,none": 0.00967457608577645,
|
| 97 |
+
"alias": " - xnli_vi"
|
| 98 |
+
},
|
| 99 |
+
"xnli_zh": {
|
| 100 |
+
"acc,none": 0.3369477911646586,
|
| 101 |
+
"acc_stderr,none": 0.009474203778757713,
|
| 102 |
+
"alias": " - xnli_zh"
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xstorycloze_2025-03-10T23-08-53.855464.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5288490463871006,
|
| 21 |
+
"acc_stderr,none": 0.003862144546165837,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"acc,none": 0.4811383189940437,
|
| 26 |
+
"acc_stderr,none": 0.012857966762464992,
|
| 27 |
+
"alias": " - xstorycloze_ar"
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"acc,none": 0.6307081403044341,
|
| 31 |
+
"acc_stderr,none": 0.012419685881273582,
|
| 32 |
+
"alias": " - xstorycloze_en"
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"acc,none": 0.5373924553275976,
|
| 36 |
+
"acc_stderr,none": 0.012831093347016553,
|
| 37 |
+
"alias": " - xstorycloze_es"
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"acc,none": 0.5268034414295168,
|
| 41 |
+
"acc_stderr,none": 0.012848623899505767,
|
| 42 |
+
"alias": " - xstorycloze_eu"
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"acc,none": 0.5109199205823958,
|
| 46 |
+
"acc_stderr,none": 0.012864056278255038,
|
| 47 |
+
"alias": " - xstorycloze_hi"
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"acc,none": 0.5168762409000662,
|
| 51 |
+
"acc_stderr,none": 0.012859793919977604,
|
| 52 |
+
"alias": " - xstorycloze_id"
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"acc,none": 0.48974189278623426,
|
| 56 |
+
"acc_stderr,none": 0.012864417047980475,
|
| 57 |
+
"alias": " - xstorycloze_my"
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"acc,none": 0.5195234943745863,
|
| 61 |
+
"acc_stderr,none": 0.01285731253183686,
|
| 62 |
+
"alias": " - xstorycloze_ru"
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"acc,none": 0.514890800794176,
|
| 66 |
+
"acc_stderr,none": 0.012861417842074004,
|
| 67 |
+
"alias": " - xstorycloze_sw"
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"acc,none": 0.5466578424884183,
|
| 71 |
+
"acc_stderr,none": 0.012810980537828157,
|
| 72 |
+
"alias": " - xstorycloze_te"
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"acc,none": 0.542686962276638,
|
| 76 |
+
"acc_stderr,none": 0.012820147204256244,
|
| 77 |
+
"alias": " - xstorycloze_zh"
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xstorycloze_2025-03-12T17-01-39.345687.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xstorycloze"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xstorycloze": {
|
| 20 |
+
"acc,none": 0.5288490463871006,
|
| 21 |
+
"acc_stderr,none": 0.003861656646850473,
|
| 22 |
+
"alias": "xstorycloze"
|
| 23 |
+
},
|
| 24 |
+
"xstorycloze_ar": {
|
| 25 |
+
"acc,none": 0.4798146922567836,
|
| 26 |
+
"acc_stderr,none": 0.012856635706498292,
|
| 27 |
+
"alias": " - xstorycloze_ar"
|
| 28 |
+
},
|
| 29 |
+
"xstorycloze_en": {
|
| 30 |
+
"acc,none": 0.6333553937789543,
|
| 31 |
+
"acc_stderr,none": 0.012401034429990701,
|
| 32 |
+
"alias": " - xstorycloze_en"
|
| 33 |
+
},
|
| 34 |
+
"xstorycloze_es": {
|
| 35 |
+
"acc,none": 0.5314361350099271,
|
| 36 |
+
"acc_stderr,none": 0.012841668760976905,
|
| 37 |
+
"alias": " - xstorycloze_es"
|
| 38 |
+
},
|
| 39 |
+
"xstorycloze_eu": {
|
| 40 |
+
"acc,none": 0.5268034414295168,
|
| 41 |
+
"acc_stderr,none": 0.012848623899505767,
|
| 42 |
+
"alias": " - xstorycloze_eu"
|
| 43 |
+
},
|
| 44 |
+
"xstorycloze_hi": {
|
| 45 |
+
"acc,none": 0.513567174056916,
|
| 46 |
+
"acc_stderr,none": 0.01286238758665008,
|
| 47 |
+
"alias": " - xstorycloze_hi"
|
| 48 |
+
},
|
| 49 |
+
"xstorycloze_id": {
|
| 50 |
+
"acc,none": 0.5162144275314361,
|
| 51 |
+
"acc_stderr,none": 0.01286035780505586,
|
| 52 |
+
"alias": " - xstorycloze_id"
|
| 53 |
+
},
|
| 54 |
+
"xstorycloze_my": {
|
| 55 |
+
"acc,none": 0.48974189278623426,
|
| 56 |
+
"acc_stderr,none": 0.012864417047980475,
|
| 57 |
+
"alias": " - xstorycloze_my"
|
| 58 |
+
},
|
| 59 |
+
"xstorycloze_ru": {
|
| 60 |
+
"acc,none": 0.5215089344804765,
|
| 61 |
+
"acc_stderr,none": 0.012855214257296608,
|
| 62 |
+
"alias": " - xstorycloze_ru"
|
| 63 |
+
},
|
| 64 |
+
"xstorycloze_sw": {
|
| 65 |
+
"acc,none": 0.5122435473196558,
|
| 66 |
+
"acc_stderr,none": 0.012863267059205548,
|
| 67 |
+
"alias": " - xstorycloze_sw"
|
| 68 |
+
},
|
| 69 |
+
"xstorycloze_te": {
|
| 70 |
+
"acc,none": 0.5453342157511581,
|
| 71 |
+
"acc_stderr,none": 0.012814127367359414,
|
| 72 |
+
"alias": " - xstorycloze_te"
|
| 73 |
+
},
|
| 74 |
+
"xstorycloze_zh": {
|
| 75 |
+
"acc,none": 0.5473196558570483,
|
| 76 |
+
"acc_stderr,none": 0.012809372866181955,
|
| 77 |
+
"alias": " - xstorycloze_zh"
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xwinograd_2025-03-10T23-11-45.464852.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6432906271072151,
|
| 21 |
+
"acc_stderr,none": 0.007109338831236582,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"acc,none": 0.6993548387096774,
|
| 26 |
+
"acc_stderr,none": 0.009511693326241081,
|
| 27 |
+
"alias": " - xwinograd_en"
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"acc,none": 0.5903614457831325,
|
| 31 |
+
"acc_stderr,none": 0.05430658329539148,
|
| 32 |
+
"alias": " - xwinograd_fr"
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"acc,none": 0.5526590198123045,
|
| 36 |
+
"acc_stderr,none": 0.016064426253309637,
|
| 37 |
+
"alias": " - xwinograd_jp"
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"acc,none": 0.5247148288973384,
|
| 41 |
+
"acc_stderr,none": 0.030852343325490784,
|
| 42 |
+
"alias": " - xwinograd_pt"
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"acc,none": 0.5619047619047619,
|
| 46 |
+
"acc_stderr,none": 0.02799953368887838,
|
| 47 |
+
"alias": " - xwinograd_ru"
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"acc,none": 0.6785714285714286,
|
| 51 |
+
"acc_stderr,none": 0.02082361047648225,
|
| 52 |
+
"alias": " - xwinograd_zh"
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xwinograd_2025-03-11T21-17-31.358555.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"pad_token_ids": [
|
| 11 |
+
0
|
| 12 |
+
],
|
| 13 |
+
"stop_token_ids": [
|
| 14 |
+
0,
|
| 15 |
+
261
|
| 16 |
+
],
|
| 17 |
+
"custom_prefix_token_id": 0,
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6432906271072151,
|
| 21 |
+
"acc_stderr,none": 0.007109338831236581,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"acc,none": 0.6993548387096774,
|
| 26 |
+
"acc_stderr,none": 0.009511693326241081,
|
| 27 |
+
"alias": " - xwinograd_en"
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"acc,none": 0.5903614457831325,
|
| 31 |
+
"acc_stderr,none": 0.05430658329539148,
|
| 32 |
+
"alias": " - xwinograd_fr"
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"acc,none": 0.5526590198123045,
|
| 36 |
+
"acc_stderr,none": 0.016064426253309637,
|
| 37 |
+
"alias": " - xwinograd_jp"
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"acc,none": 0.5247148288973384,
|
| 41 |
+
"acc_stderr,none": 0.030852343325490784,
|
| 42 |
+
"alias": " - xwinograd_pt"
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"acc,none": 0.5619047619047619,
|
| 46 |
+
"acc_stderr,none": 0.02799953368887838,
|
| 47 |
+
"alias": " - xwinograd_ru"
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"acc,none": 0.6785714285714286,
|
| 51 |
+
"acc_stderr,none": 0.02082361047648225,
|
| 52 |
+
"alias": " - xwinograd_zh"
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
lm_eval/rwkv7-g1-0.1b-20250307-ctx4096/xwinograd_2025-03-12T17-04-51.338405.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/models/rwkv7-g1-0.1b-20250307-ctx4096",
|
| 3 |
+
"tasks": [
|
| 4 |
+
"xwinograd"
|
| 5 |
+
],
|
| 6 |
+
"num_fewshot": 0,
|
| 7 |
+
"lm_eval_version": "0.4.3",
|
| 8 |
+
"bos_token_id": 0,
|
| 9 |
+
"eos_token_id": 0,
|
| 10 |
+
"custom_prefix_token_id": 11,
|
| 11 |
+
"pad_token_ids": [
|
| 12 |
+
11
|
| 13 |
+
],
|
| 14 |
+
"stop_token_ids": [
|
| 15 |
+
11,
|
| 16 |
+
261
|
| 17 |
+
],
|
| 18 |
+
"results": {
|
| 19 |
+
"xwinograd": {
|
| 20 |
+
"acc,none": 0.6419420094403236,
|
| 21 |
+
"acc_stderr,none": 0.0071176833742256414,
|
| 22 |
+
"alias": "xwinograd"
|
| 23 |
+
},
|
| 24 |
+
"xwinograd_en": {
|
| 25 |
+
"acc,none": 0.6980645161290323,
|
| 26 |
+
"acc_stderr,none": 0.009523285337477353,
|
| 27 |
+
"alias": " - xwinograd_en"
|
| 28 |
+
},
|
| 29 |
+
"xwinograd_fr": {
|
| 30 |
+
"acc,none": 0.5783132530120482,
|
| 31 |
+
"acc_stderr,none": 0.0545342848529511,
|
| 32 |
+
"alias": " - xwinograd_fr"
|
| 33 |
+
},
|
| 34 |
+
"xwinograd_jp": {
|
| 35 |
+
"acc,none": 0.5495307612095933,
|
| 36 |
+
"acc_stderr,none": 0.01607480892375643,
|
| 37 |
+
"alias": " - xwinograd_jp"
|
| 38 |
+
},
|
| 39 |
+
"xwinograd_pt": {
|
| 40 |
+
"acc,none": 0.532319391634981,
|
| 41 |
+
"acc_stderr,none": 0.030825503526303786,
|
| 42 |
+
"alias": " - xwinograd_pt"
|
| 43 |
+
},
|
| 44 |
+
"xwinograd_ru": {
|
| 45 |
+
"acc,none": 0.5682539682539682,
|
| 46 |
+
"acc_stderr,none": 0.02795249586167163,
|
| 47 |
+
"alias": " - xwinograd_ru"
|
| 48 |
+
},
|
| 49 |
+
"xwinograd_zh": {
|
| 50 |
+
"acc,none": 0.6726190476190477,
|
| 51 |
+
"acc_stderr,none": 0.02092316077596883,
|
| 52 |
+
"alias": " - xwinograd_zh"
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|