GODELEV
/

Ant-10M

+[
+  {
+    "task": "hellaswag",
+    "benchmark": "HellaSwag",
+    "metric": "acc_norm",
+    "score": 0.2672774347739494,
+    "shots": 0,
+    "runtime_sec": 152.49,
+    "status": "success"
+  },
+  {
+    "task": "piqa",
+    "benchmark": "PIQA",
+    "metric": "acc_norm",
+    "score": 0.5032644178454843,
+    "shots": 0,
+    "runtime_sec": 29.83,
+    "status": "success"
+  },
+  {
+    "task": "winogrande",
+    "benchmark": "WinoGrande",
+    "metric": "acc",
+    "score": 0.4964483030781373,
+    "shots": 0,
+    "runtime_sec": 22.5,
+    "status": "success"
+  },
+  {
+    "task": "boolq",
+    "benchmark": "BoolQ",
+    "metric": "acc",
+    "score": 0.3782874617737003,
+    "shots": 0,
+    "runtime_sec": 54.69,
+    "status": "success"
+  },
+  {
+    "task": "arc_easy",
+    "benchmark": "ARC-Easy",
+    "metric": "acc_norm",
+    "score": 0.2769360269360269,
+    "shots": 0,
+    "runtime_sec": 38.91,
+    "status": "success"
+  },
+  {
+    "task": "arc_challenge",
+    "benchmark": "ARC-Challenge",
+    "metric": "acc_norm",
+    "score": 0.27474402730375425,
+    "shots": 0,
+    "runtime_sec": 29.25,
+    "status": "success"
+  },
+  {
+    "task": "openbookqa",
+    "benchmark": "OpenBookQA",
+    "metric": "acc_norm",
+    "score": 0.308,
+    "shots": 0,
+    "runtime_sec": 23.18,
+    "status": "success"
+  },
+  {
+    "task": "commonsense_qa",
+    "benchmark": "CommonsenseQA",
+    "metric": "acc",
+    "score": 0.18591318591318592,
+    "shots": 0,
+    "runtime_sec": 27.0,
+    "status": "success"
+  },
+  {
+    "task": "lambada_openai",
+    "benchmark": "LAMBADA",
+    "metric": "acc",
+    "score": 0.0,
+    "shots": 0,
+    "runtime_sec": 71.7,
+    "status": "success"
+  },
+  {
+    "task": "blimp",
+    "benchmark": "BLiMP",
+    "metric": "acc",
+    "score": 0.5428358208955224,
+    "shots": 0,
+    "runtime_sec": 367.52,
+    "status": "success"
+  },
+  {
+    "task": "mmlu",
+    "benchmark": "MMLU",
+    "metric": "acc",
+    "score": 0.2543797179888905,
+    "shots": 0,
+    "runtime_sec": 295.22,
+    "status": "success"
+  },
+  {
+    "task": "wikitext",
+    "benchmark": "WikiText-2",
+    "metric": "word_perplexity",
+    "score": 88520100.69650024,
+    "shots": 0,
+    "runtime_sec": 34.96,
+    "status": "success"
+  },
+  {
+    "task": "wikitext",
+    "benchmark": "WikiText-2",
+    "metric": "byte_perplexity",
+    "score": 30.629263941602346,
+    "shots": 0,
+    "runtime_sec": 31.49,
+    "status": "success"
+  },
+  {
+    "task": "sciq",
+    "benchmark": "SciQ",
+    "metric": "acc_norm",
+    "score": 0.215,
+    "shots": 0,
+    "runtime_sec": 41.48,
+    "status": "success"
+  },
+  {
+    "task": "copa",
+    "benchmark": "COPA",
+    "metric": "acc",
+    "score": 0.57,
+    "shots": 0,
+    "runtime_sec": 18.99,
+    "status": "success"
+  },
+  {
+    "task": "race",
+    "benchmark": "RACE",
+    "metric": "acc",
+    "score": 0.22775119617224882,
+    "shots": 0,
+    "runtime_sec": 100.93,
+    "status": "success"
+  },
+  {
+    "task": "swag",
+    "benchmark": "SWAG",
+    "metric": "acc_norm",
+    "score": 0.2575227431770469,
+    "shots": 0,
+    "runtime_sec": 153.63,
+    "status": "success"
+  },
+  {
+    "task": "truthfulqa_mc2",
+    "benchmark": "TruthfulQA MC2",
+    "metric": "acc",
+    "score": 0.4874513485881811,
+    "shots": 0,
+    "runtime_sec": 49.19,
+    "status": "success"
+  }
+]