|
|
--- |
|
|
license: mit |
|
|
datasets: |
|
|
- neulab/SP3F-Training-Data |
|
|
base_model: |
|
|
- Qwen/Qwen2.5-7B |
|
|
pipeline_tag: text-generation |
|
|
language: |
|
|
- ar |
|
|
- bn |
|
|
- de |
|
|
- en |
|
|
- es |
|
|
- fr |
|
|
- hi |
|
|
- id |
|
|
- it |
|
|
- ja |
|
|
- ko |
|
|
- pt |
|
|
- ru |
|
|
- sw |
|
|
- te |
|
|
- th |
|
|
- yo |
|
|
- zh |
|
|
--- |
|
|
|
|
|
|
|
|
# SP3F-7B |
|
|
|
|
|
SP3F-7B is a multilingual model trained with Self-Play with Privileged Pairwise Feedback, we use Qwen2.5-7B as our base. |
|
|
|
|
|
<table> |
|
|
<thead> |
|
|
<tr> |
|
|
<th rowspan="2">Model</th> |
|
|
<th colspan="2">Overall</th> |
|
|
<th colspan="2">MGSM</th> |
|
|
<th colspan="2">MT Math100</th> |
|
|
<th colspan="2">Belebele</th> |
|
|
<th colspan="2">Global MMLU Lite</th> |
|
|
</tr> |
|
|
<tr> |
|
|
<th>Acc</th> |
|
|
<th>Lang</th> |
|
|
<th>Acc</th> |
|
|
<th>Lang</th> |
|
|
<th>Acc</th> |
|
|
<th>Lang</th> |
|
|
<th>Acc</th> |
|
|
<th>Lang</th> |
|
|
<th>Acc</th> |
|
|
<th>Lang</th> |
|
|
</tr> |
|
|
</thead> |
|
|
<tbody> |
|
|
<tr> |
|
|
<td>Qwen2.5-7B</td> |
|
|
<td>14.79</td> |
|
|
<td>78.78</td> |
|
|
<td>22.15</td> |
|
|
<td>90.67</td> |
|
|
<td>21.16</td> |
|
|
<td>58.22</td> |
|
|
<td>7.52</td> |
|
|
<td>80.39</td> |
|
|
<td>8.34</td> |
|
|
<td>85.85</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td> + SFT</td> |
|
|
<td>21.70</td> |
|
|
<td>82.11</td> |
|
|
<td>33.66</td> |
|
|
<td>91.37</td> |
|
|
<td>26.72</td> |
|
|
<td>58.26</td> |
|
|
<td>12.94</td> |
|
|
<td>89.18</td> |
|
|
<td>13.48</td> |
|
|
<td>89.62</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td> + RLVR</td> |
|
|
<td><u>57.79</u></td> |
|
|
<td><b>96.09</b></td> |
|
|
<td>65.34</td> |
|
|
<td><b>99.75</b></td> |
|
|
<td>44.50</td> |
|
|
<td><b>86.10</b></td> |
|
|
<td><b>68.18</b></td> |
|
|
<td><u>98.73</u></td> |
|
|
<td><u>53.15</u></td> |
|
|
<td><b>99.78</b></td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td><b>SP3F-7B</b></td> |
|
|
<td><b>61.91</b></td> |
|
|
<td><u>95.35</u></td> |
|
|
<td><b>72.50</b></td> |
|
|
<td><u>99.38</u></td> |
|
|
<td><u>56.84</u></td> |
|
|
<td><u>82.93</u></td> |
|
|
<td><u>67.54</u></td> |
|
|
<td><b>99.65</b></td> |
|
|
<td>50.76</td> |
|
|
<td><u>99.45</u></td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen2.5-7B-Instruct</td> |
|
|
<td>55.87</td> |
|
|
<td>89.21</td> |
|
|
<td><u>66.36</u></td> |
|
|
<td>98.38</td> |
|
|
<td>52.12</td> |
|
|
<td>65.66</td> |
|
|
<td>56.79</td> |
|
|
<td>96.59</td> |
|
|
<td>48.20</td> |
|
|
<td>96.21</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td> + Translate Test</td> |
|
|
<td>57.01</td> |
|
|
<td>85.98</td> |
|
|
<td>66.15</td> |
|
|
<td>95.81</td> |
|
|
<td><b>60.08</b></td> |
|
|
<td>59.34</td> |
|
|
<td>48.09</td> |
|
|
<td>92.27</td> |
|
|
<td><b>53.73</b></td> |
|
|
<td>96.49</td> |
|
|
</tr> |
|
|
</tbody> |
|
|
</table> |
|
|
|
|
|
### Citation |
|
|
|
|
|
If you find this work helpful please use the following to cite our work. |
|
|
|
|
|
``` |
|
|
|
|
|
``` |