Update README.md
Browse files
README.md
CHANGED
|
@@ -19,29 +19,29 @@ tags:
|
|
| 19 |
---
|
| 20 |
|
| 21 |
| Benchmark | Ling-mini-2.0 | LLaDA-MoE-7B-A1B-Instruct | LLaDA2.0-mini-preview |
|
| 22 |
-
|
|
| 23 |
-
| **Average** |
|
| 24 |
| **Knowledge** | | | |
|
| 25 |
-
| MMLU |
|
| 26 |
-
| MMLU-PRO |
|
| 27 |
-
| CMMLU |
|
| 28 |
-
| C-EVAL |
|
| 29 |
| **Reasoning** | | | |
|
| 30 |
-
| squad2.0 |
|
| 31 |
-
| drop |
|
| 32 |
-
| korbench |
|
| 33 |
| **Coding** | | | |
|
| 34 |
-
| CruxEval-O |
|
| 35 |
-
| mbpp |
|
| 36 |
-
| MultiPL-E |
|
| 37 |
-
| humaneval |
|
| 38 |
-
| Bigcodebench-Full | 35.
|
| 39 |
| **Math** | | | |
|
| 40 |
-
| GSM8K |
|
| 41 |
-
| math |
|
| 42 |
| **Agent & Alignment** | | | |
|
| 43 |
-
| BFCL_Live |
|
| 44 |
-
| IFEval-strict -prompt |
|
| 45 |
|
| 46 |
|
| 47 |
|
|
|
|
| 19 |
---
|
| 20 |
|
| 21 |
| Benchmark | Ling-mini-2.0 | LLaDA-MoE-7B-A1B-Instruct | LLaDA2.0-mini-preview |
|
| 22 |
+
| :---: | :---: | :---: | :---: |
|
| 23 |
+
| **Average** | 74.60 | 59.72 | 66.89 |
|
| 24 |
| **Knowledge** | | | |
|
| 25 |
+
| MMLU | 82.15 | 67.18 | 72.49 |
|
| 26 |
+
| MMLU-PRO | 63.72 | 44.64 | 49.22 |
|
| 27 |
+
| CMMLU | 80.84 | 64.30 | 67.53 |
|
| 28 |
+
| C-EVAL | 82.10 | 63.93 | 66.54 |
|
| 29 |
| **Reasoning** | | | |
|
| 30 |
+
| squad2.0 | 75.56 | 86.81 | 85.61 |
|
| 31 |
+
| drop | 78.80 | 79.77 | 79.49 |
|
| 32 |
+
| korbench | 62.72 | 38.40 | 37.26 |
|
| 33 |
| **Coding** | | | |
|
| 34 |
+
| CruxEval-O | 76.12 | 42.38 | 61.88 |
|
| 35 |
+
| mbpp | 84.07 | 70.02 | 77.75 |
|
| 36 |
+
| MultiPL-E | 67.09 | 52.53 | 62.43 |
|
| 37 |
+
| humaneval | 85.98 | 61.59 | 80.49 |
|
| 38 |
+
| Bigcodebench-Full | 35.00 | 20.44 | 30.44 |
|
| 39 |
| **Math** | | | |
|
| 40 |
+
| GSM8K | 94.62 | 82.41 | 89.01 |
|
| 41 |
+
| math | 94.66 | 58.68 | 73.50 |
|
| 42 |
| **Agent & Alignment** | | | |
|
| 43 |
+
| BFCL_Live | 53.98 | 63.09 | 74.11 |
|
| 44 |
+
| IFEval-strict -prompt | 76.16 | 59.33 | 62.50 |
|
| 45 |
|
| 46 |
|
| 47 |
|