| license: mit | |
| tags: | |
| - unsloth | |
| - trl | |
| - grpo | |
| datasets: | |
| - openai/gsm8k | |
| language: | |
| - en | |
| - zh | |
| base_model: | |
| - Qwen/Qwen2.5-3B-Instruct | |
| https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(3B)-GRPO.ipynb | |
| license: mit | |
| tags: | |
| - unsloth | |
| - trl | |
| - grpo | |
| datasets: | |
| - openai/gsm8k | |
| language: | |
| - en | |
| - zh | |
| base_model: | |
| - Qwen/Qwen2.5-3B-Instruct | |
| https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(3B)-GRPO.ipynb | |