Coder-AN commited on
Commit
63c25cf
·
1 Parent(s): dccf4a7
Files changed (2) hide show
  1. README.md +4 -4
  2. README_en.md +4 -4
README.md CHANGED
@@ -80,8 +80,8 @@ tasks:
80
  |模型名称|HumanEval|MBPP|LiveCodeBench|均分|
81
  |:-:|:-:|:-:|:-:|:-:|
82
  |Qwen3-8B(think)|84.76|78.60|63.10|75.49|
83
- |Qwen2.5-7B-Instruct|63.41|68.48|8.15|46.68|
84
- |Llama3.1-8B-Instruct|84.15|70.82|34.55|63.17|
85
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|76.83|77.04|28.44|60.77|
86
 
87
  </details>
@@ -92,8 +92,8 @@ tasks:
92
  |模型名称|GPQA|Math|GSM-8K|均分|
93
  |:-:|:-:|:-:|:-:|:-:|
94
  |Qwen3-8B(think)|38.38|83.84|93.03|71.75|
95
- |Qwen2.5-7B-Instruct|25.25|49.22|85.82|53.43|
96
- |Llama3.1-8B-Instruct|35.35|73.66|88.48|65.83|
97
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|30.30|72.18|91.36|64.61|
98
 
99
  </details>
 
80
  |模型名称|HumanEval|MBPP|LiveCodeBench|均分|
81
  |:-:|:-:|:-:|:-:|:-:|
82
  |Qwen3-8B(think)|84.76|78.60|63.10|75.49|
83
+ |Llama3.1-8B-Instruct|63.41|68.48|8.15|46.68|
84
+ |Qwen2.5-7B-Instruct|84.15|70.82|34.55|63.17|
85
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|76.83|77.04|28.44|60.77|
86
 
87
  </details>
 
92
  |模型名称|GPQA|Math|GSM-8K|均分|
93
  |:-:|:-:|:-:|:-:|:-:|
94
  |Qwen3-8B(think)|38.38|83.84|93.03|71.75|
95
+ |Llama3.1-8B-Instruct|25.25|49.22|85.82|53.43|
96
+ |Qwen2.5-7B-Instruct|35.35|73.66|88.48|65.83|
97
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|30.30|72.18|91.36|64.61|
98
 
99
  </details>
README_en.md CHANGED
@@ -78,8 +78,8 @@ We conduct a review based on [OpenCompass](https://github.com/open-compass/openc
78
  |Model|HumanEval|MBPP|LiveCodeBench|Mean|
79
  |:-:|:-:|:-:|:-:|:-:|
80
  |Qwen3-8B(think)|84.76|78.60|63.10|75.49|
81
- |Qwen2.5-7B-Instruct|63.41|68.48|8.15|46.68|
82
- |Llama3.1-8B-Instruct|84.15|70.82|34.55|63.17|
83
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|76.83|77.04|28.44|60.77|
84
 
85
  </details>
@@ -90,8 +90,8 @@ We conduct a review based on [OpenCompass](https://github.com/open-compass/openc
90
  |Model|GPQA|Math|GSM-8K|Mean|
91
  |:-:|:-:|:-:|:-:|:-:|
92
  |Qwen3-8B(think)|38.38|83.84|93.03|71.75|
93
- |Qwen2.5-7B-Instruct|25.25|49.22|85.82|53.43|
94
- |Llama3.1-8B-Instruct|35.35|73.66|88.48|65.83|
95
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|30.30|72.18|91.36|64.61|
96
 
97
  </details>
 
78
  |Model|HumanEval|MBPP|LiveCodeBench|Mean|
79
  |:-:|:-:|:-:|:-:|:-:|
80
  |Qwen3-8B(think)|84.76|78.60|63.10|75.49|
81
+ |Llama3.1-8B-Instruct|63.41|68.48|8.15|46.68|
82
+ |Qwen2.5-7B-Instruct|84.15|70.82|34.55|63.17|
83
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|76.83|77.04|28.44|60.77|
84
 
85
  </details>
 
90
  |Model|GPQA|Math|GSM-8K|Mean|
91
  |:-:|:-:|:-:|:-:|:-:|
92
  |Qwen3-8B(think)|38.38|83.84|93.03|71.75|
93
+ |Llama3.1-8B-Instruct|25.25|49.22|85.82|53.43|
94
+ |Qwen2.5-7B-Instruct|35.35|73.66|88.48|65.83|
95
  |AI-Flow-Ruyi-7B-E7B-0725<b>(ours)</b>|30.30|72.18|91.36|64.61|
96
 
97
  </details>