Update README.md
Browse files
README.md
CHANGED
|
@@ -31,14 +31,110 @@ language:
|
|
| 31 |
|
| 32 |
SP3F-7B is a multilingual model trained with Self-Play with Privileged Pairwise Feedback, we use Qwen2.5-7B as our base.
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
### Citation
|
| 44 |
|
|
|
|
| 31 |
|
| 32 |
SP3F-7B is a multilingual model trained with Self-Play with Privileged Pairwise Feedback, we use Qwen2.5-7B as our base.
|
| 33 |
|
| 34 |
+
<table>
|
| 35 |
+
<thead>
|
| 36 |
+
<tr>
|
| 37 |
+
<th rowspan="2">Model</th>
|
| 38 |
+
<th colspan="2">Overall</th>
|
| 39 |
+
<th colspan="2">MGSM</th>
|
| 40 |
+
<th colspan="2">MT Math100</th>
|
| 41 |
+
<th colspan="2">Belebele</th>
|
| 42 |
+
<th colspan="2">Global MMLU Lite</th>
|
| 43 |
+
</tr>
|
| 44 |
+
<tr>
|
| 45 |
+
<th>Acc</th>
|
| 46 |
+
<th>Lang</th>
|
| 47 |
+
<th>Acc</th>
|
| 48 |
+
<th>Lang</th>
|
| 49 |
+
<th>Acc</th>
|
| 50 |
+
<th>Lang</th>
|
| 51 |
+
<th>Acc</th>
|
| 52 |
+
<th>Lang</th>
|
| 53 |
+
<th>Acc</th>
|
| 54 |
+
<th>Lang</th>
|
| 55 |
+
</tr>
|
| 56 |
+
</thead>
|
| 57 |
+
<tbody>
|
| 58 |
+
<tr>
|
| 59 |
+
<td>Qwen2.5-7B</td>
|
| 60 |
+
<td>14.79</td>
|
| 61 |
+
<td>78.78</td>
|
| 62 |
+
<td>22.15</td>
|
| 63 |
+
<td>90.67</td>
|
| 64 |
+
<td>21.16</td>
|
| 65 |
+
<td>58.22</td>
|
| 66 |
+
<td>7.52</td>
|
| 67 |
+
<td>80.39</td>
|
| 68 |
+
<td>8.34</td>
|
| 69 |
+
<td>85.85</td>
|
| 70 |
+
</tr>
|
| 71 |
+
<tr>
|
| 72 |
+
<td> + SFT</td>
|
| 73 |
+
<td>21.70</td>
|
| 74 |
+
<td>82.11</td>
|
| 75 |
+
<td>33.66</td>
|
| 76 |
+
<td>91.37</td>
|
| 77 |
+
<td>26.72</td>
|
| 78 |
+
<td>58.26</td>
|
| 79 |
+
<td>12.94</td>
|
| 80 |
+
<td>89.18</td>
|
| 81 |
+
<td>13.48</td>
|
| 82 |
+
<td>89.62</td>
|
| 83 |
+
</tr>
|
| 84 |
+
<tr>
|
| 85 |
+
<td> + RLVR</td>
|
| 86 |
+
<td><u>57.79</u></td>
|
| 87 |
+
<td><b>96.09</b></td>
|
| 88 |
+
<td>65.34</td>
|
| 89 |
+
<td><b>99.75</b></td>
|
| 90 |
+
<td>44.50</td>
|
| 91 |
+
<td><b>86.10</b></td>
|
| 92 |
+
<td><b>68.18</b></td>
|
| 93 |
+
<td><u>98.73</u></td>
|
| 94 |
+
<td><u>53.15</u></td>
|
| 95 |
+
<td><b>99.78</b></td>
|
| 96 |
+
</tr>
|
| 97 |
+
<tr>
|
| 98 |
+
<td><b>SP3F-7B</b></td>
|
| 99 |
+
<td><b>61.91</b></td>
|
| 100 |
+
<td><u>95.35</u></td>
|
| 101 |
+
<td><b>72.50</b></td>
|
| 102 |
+
<td><u>99.38</u></td>
|
| 103 |
+
<td><u>56.84</u></td>
|
| 104 |
+
<td><u>82.93</u></td>
|
| 105 |
+
<td><u>67.54</u></td>
|
| 106 |
+
<td><b>99.65</b></td>
|
| 107 |
+
<td>50.76</td>
|
| 108 |
+
<td><u>99.45</u></td>
|
| 109 |
+
</tr>
|
| 110 |
+
<tr>
|
| 111 |
+
<td>Qwen2.5-7B-Instruct</td>
|
| 112 |
+
<td>55.87</td>
|
| 113 |
+
<td>89.21</td>
|
| 114 |
+
<td><u>66.36</u></td>
|
| 115 |
+
<td>98.38</td>
|
| 116 |
+
<td>52.12</td>
|
| 117 |
+
<td>65.66</td>
|
| 118 |
+
<td>56.79</td>
|
| 119 |
+
<td>96.59</td>
|
| 120 |
+
<td>48.20</td>
|
| 121 |
+
<td>96.21</td>
|
| 122 |
+
</tr>
|
| 123 |
+
<tr>
|
| 124 |
+
<td> + Translate Test</td>
|
| 125 |
+
<td>57.01</td>
|
| 126 |
+
<td>85.98</td>
|
| 127 |
+
<td>66.15</td>
|
| 128 |
+
<td>95.81</td>
|
| 129 |
+
<td><b>60.08</b></td>
|
| 130 |
+
<td>59.34</td>
|
| 131 |
+
<td>48.09</td>
|
| 132 |
+
<td>92.27</td>
|
| 133 |
+
<td><b>53.73</b></td>
|
| 134 |
+
<td>96.49</td>
|
| 135 |
+
</tr>
|
| 136 |
+
</tbody>
|
| 137 |
+
</table>
|
| 138 |
|
| 139 |
### Citation
|
| 140 |
|