| [ |
| { |
| "task": "hellaswag", |
| "benchmark": "HellaSwag", |
| "metric": "acc_norm", |
| "score": 0.26847241585341564, |
| "shots": 0, |
| "runtime_sec": 157.33, |
| "status": "success" |
| }, |
| { |
| "task": "piqa", |
| "benchmark": "PIQA", |
| "metric": "acc_norm", |
| "score": 0.4923830250272035, |
| "shots": 0, |
| "runtime_sec": 34.0, |
| "status": "success" |
| }, |
| { |
| "task": "winogrande", |
| "benchmark": "WinoGrande", |
| "metric": "acc", |
| "score": 0.4996053670086819, |
| "shots": 0, |
| "runtime_sec": 26.59, |
| "status": "success" |
| }, |
| { |
| "task": "boolq", |
| "benchmark": "BoolQ", |
| "metric": "acc", |
| "score": 0.6085626911314985, |
| "shots": 0, |
| "runtime_sec": 54.76, |
| "status": "success" |
| }, |
| { |
| "task": "arc_easy", |
| "benchmark": "ARC-Easy", |
| "metric": "acc_norm", |
| "score": 0.27104377104377103, |
| "shots": 0, |
| "runtime_sec": 41.15, |
| "status": "success" |
| }, |
| { |
| "task": "arc_challenge", |
| "benchmark": "ARC-Challenge", |
| "metric": "acc_norm", |
| "score": 0.27559726962457337, |
| "shots": 0, |
| "runtime_sec": 31.75, |
| "status": "success" |
| }, |
| { |
| "task": "openbookqa", |
| "benchmark": "OpenBookQA", |
| "metric": "acc_norm", |
| "score": 0.298, |
| "shots": 0, |
| "runtime_sec": 26.69, |
| "status": "success" |
| }, |
| { |
| "task": "commonsense_qa", |
| "benchmark": "CommonsenseQA", |
| "metric": "acc", |
| "score": 0.20802620802620803, |
| "shots": 0, |
| "runtime_sec": 30.66, |
| "status": "success" |
| }, |
| { |
| "task": "lambada_openai", |
| "benchmark": "LAMBADA", |
| "metric": "acc", |
| "score": 0.0, |
| "shots": 0, |
| "runtime_sec": 68.38, |
| "status": "success" |
| }, |
| { |
| "task": "blimp", |
| "benchmark": "BLiMP", |
| "metric": "acc", |
| "score": 0.536134328358209, |
| "shots": 0, |
| "runtime_sec": 471.77, |
| "status": "success" |
| }, |
| { |
| "task": "mmlu", |
| "benchmark": "MMLU", |
| "metric": "acc", |
| "score": 0.2416322461187865, |
| "shots": 0, |
| "runtime_sec": 532.07, |
| "status": "success" |
| }, |
| { |
| "task": "wikitext", |
| "benchmark": "WikiText-2", |
| "metric": "word_perplexity", |
| "score": 39301278.79233013, |
| "shots": 0, |
| "runtime_sec": 46.38, |
| "status": "success" |
| }, |
| { |
| "task": "wikitext", |
| "benchmark": "WikiText-2", |
| "metric": "byte_perplexity", |
| "score": 26.31431152931224, |
| "shots": 0, |
| "runtime_sec": 39.52, |
| "status": "success" |
| }, |
| { |
| "task": "sciq", |
| "benchmark": "SciQ", |
| "metric": "acc_norm", |
| "score": 0.2, |
| "shots": 0, |
| "runtime_sec": 48.87, |
| "status": "success" |
| }, |
| { |
| "task": "copa", |
| "benchmark": "COPA", |
| "metric": "acc", |
| "score": 0.63, |
| "shots": 0, |
| "runtime_sec": 22.29, |
| "status": "success" |
| }, |
| { |
| "task": "race", |
| "benchmark": "RACE", |
| "metric": "acc", |
| "score": 0.22392344497607655, |
| "shots": 0, |
| "runtime_sec": 123.26, |
| "status": "success" |
| }, |
| { |
| "task": "swag", |
| "benchmark": "SWAG", |
| "metric": "acc_norm", |
| "score": 0.2589723083075077, |
| "shots": 0, |
| "runtime_sec": 141.34, |
| "status": "success" |
| }, |
| { |
| "task": "truthfulqa_mc2", |
| "benchmark": "TruthfulQA MC2", |
| "metric": "acc", |
| "score": 0.4981403502526444, |
| "shots": 0, |
| "runtime_sec": 52.22, |
| "status": "success" |
| } |
| ] |