Update README.md
Browse files
README.md
CHANGED
|
@@ -36,6 +36,28 @@ model-index:
|
|
| 36 |
type: pass@1
|
| 37 |
value: 0.249
|
| 38 |
verified: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
---
|
| 40 |
|
| 41 |
|
|
|
|
| 36 |
type: pass@1
|
| 37 |
value: 0.249
|
| 38 |
verified: false
|
| 39 |
+
- task:
|
| 40 |
+
name: Math Reasoning
|
| 41 |
+
type: reasoning
|
| 42 |
+
dataset:
|
| 43 |
+
name: "GSM8K"
|
| 44 |
+
type: gsm8k
|
| 45 |
+
metrics:
|
| 46 |
+
- name: pass@1
|
| 47 |
+
type: pass@1
|
| 48 |
+
value: 0.174
|
| 49 |
+
verified: false
|
| 50 |
+
- task:
|
| 51 |
+
name: Knowledge
|
| 52 |
+
type: knowledge
|
| 53 |
+
dataset:
|
| 54 |
+
name: "MMLU"
|
| 55 |
+
type: mmlu
|
| 56 |
+
metrics:
|
| 57 |
+
- name: accuracy
|
| 58 |
+
type: accuracy
|
| 59 |
+
value: 0.399
|
| 60 |
+
verified: false
|
| 61 |
---
|
| 62 |
|
| 63 |
|