Model save
Browse files- README.md +2 -2
- all_results.json +5 -5
- model.safetensors +1 -1
- train_results.json +5 -5
- trainer_state.json +362 -462
- training_args.bin +2 -2
README.md
CHANGED
|
@@ -27,14 +27,14 @@ print(output["generated_text"])
|
|
| 27 |
|
| 28 |
## Training procedure
|
| 29 |
|
| 30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/2495479412-/huggingface/runs/
|
| 31 |
|
| 32 |
|
| 33 |
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
| 34 |
|
| 35 |
### Framework versions
|
| 36 |
|
| 37 |
-
- TRL: 0.
|
| 38 |
- Transformers: 4.49.0.dev0
|
| 39 |
- Pytorch: 2.5.1
|
| 40 |
- Datasets: 3.3.0
|
|
|
|
| 27 |
|
| 28 |
## Training procedure
|
| 29 |
|
| 30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/2495479412-/huggingface/runs/jq7at7mf)
|
| 31 |
|
| 32 |
|
| 33 |
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
| 34 |
|
| 35 |
### Framework versions
|
| 36 |
|
| 37 |
+
- TRL: 0.15.1
|
| 38 |
- Transformers: 4.49.0.dev0
|
| 39 |
- Pytorch: 2.5.1
|
| 40 |
- Datasets: 3.3.0
|
all_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
-
"train_samples":
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second": 0.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
+
"train_loss": 0.011576007895314433,
|
| 4 |
+
"train_runtime": 33226.465,
|
| 5 |
+
"train_samples": 1000,
|
| 6 |
+
"train_samples_per_second": 0.602,
|
| 7 |
+
"train_steps_per_second": 0.005
|
| 8 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3554214752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5202ee4d11bf12fa59e49685898173c59ed7b46ea6aea39ad2e5e1bd0277c10
|
| 3 |
size 3554214752
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
-
"train_samples":
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second": 0.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
+
"train_loss": 0.011576007895314433,
|
| 4 |
+
"train_runtime": 33226.465,
|
| 5 |
+
"train_samples": 1000,
|
| 6 |
+
"train_samples_per_second": 0.602,
|
| 7 |
+
"train_steps_per_second": 0.005
|
| 8 |
}
|
trainer_state.json
CHANGED
|
@@ -1,575 +1,475 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"eval_steps":
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"completion_length":
|
| 13 |
-
"epoch": 0.
|
| 14 |
-
"grad_norm": 0.
|
| 15 |
-
"kl": 0.
|
| 16 |
-
"learning_rate":
|
| 17 |
"loss": 0.0,
|
| 18 |
-
"reward":
|
| 19 |
-
"reward_std": 0.
|
| 20 |
-
"rewards/accuracy_reward_word": 0.
|
| 21 |
-
"rewards/format_reward": 0.
|
|
|
|
| 22 |
"step": 5
|
| 23 |
},
|
| 24 |
{
|
| 25 |
-
"completion_length":
|
| 26 |
-
"epoch": 1.
|
| 27 |
-
"grad_norm": 0.
|
| 28 |
-
"kl": 0.
|
| 29 |
-
"learning_rate":
|
| 30 |
-
"loss": 0.
|
| 31 |
-
"reward":
|
| 32 |
-
"reward_std": 0.
|
| 33 |
-
"rewards/accuracy_reward_word": 0.
|
| 34 |
-
"rewards/format_reward": 0.
|
|
|
|
| 35 |
"step": 10
|
| 36 |
},
|
| 37 |
{
|
| 38 |
-
"completion_length":
|
| 39 |
-
"epoch":
|
| 40 |
-
"grad_norm":
|
| 41 |
-
"kl": 0.
|
| 42 |
-
"learning_rate": 1.
|
| 43 |
-
"loss": 0.
|
| 44 |
-
"reward":
|
| 45 |
-
"reward_std": 0.
|
| 46 |
-
"rewards/accuracy_reward_word": 0.
|
| 47 |
-
"rewards/format_reward": 0.
|
|
|
|
| 48 |
"step": 15
|
| 49 |
},
|
| 50 |
{
|
| 51 |
-
"completion_length":
|
| 52 |
-
"epoch":
|
| 53 |
-
"grad_norm": 0.
|
| 54 |
-
"kl": 0.
|
| 55 |
-
"learning_rate":
|
| 56 |
-
"loss": 0.
|
| 57 |
-
"reward":
|
| 58 |
-
"reward_std": 0.
|
| 59 |
-
"rewards/accuracy_reward_word": 0.
|
| 60 |
-
"rewards/format_reward": 0.
|
|
|
|
| 61 |
"step": 20
|
| 62 |
},
|
| 63 |
{
|
| 64 |
-
"completion_length":
|
| 65 |
-
"epoch":
|
| 66 |
-
"grad_norm": 0.
|
| 67 |
-
"kl": 0.
|
| 68 |
-
"learning_rate": 1.
|
| 69 |
-
"loss": 0.
|
| 70 |
-
"reward":
|
| 71 |
-
"reward_std": 0.
|
| 72 |
-
"rewards/accuracy_reward_word": 0.
|
| 73 |
-
"rewards/format_reward": 0.
|
|
|
|
| 74 |
"step": 25
|
| 75 |
},
|
| 76 |
{
|
| 77 |
-
"completion_length":
|
| 78 |
-
"epoch":
|
| 79 |
-
"grad_norm":
|
| 80 |
-
"kl":
|
| 81 |
-
"learning_rate": 1.
|
| 82 |
-
"loss": 0.
|
| 83 |
-
"reward":
|
| 84 |
-
"reward_std": 0.
|
| 85 |
-
"rewards/accuracy_reward_word": 0.
|
| 86 |
-
"rewards/format_reward": 0.
|
|
|
|
| 87 |
"step": 30
|
| 88 |
},
|
| 89 |
{
|
| 90 |
-
"completion_length":
|
| 91 |
-
"epoch":
|
| 92 |
-
"grad_norm":
|
| 93 |
-
"kl": 0.
|
| 94 |
-
"learning_rate": 1.
|
| 95 |
-
"loss": 0.
|
| 96 |
-
"reward":
|
| 97 |
-
"reward_std": 0.
|
| 98 |
-
"rewards/accuracy_reward_word": 0.
|
| 99 |
-
"rewards/format_reward": 0.
|
|
|
|
| 100 |
"step": 35
|
| 101 |
},
|
| 102 |
{
|
| 103 |
-
"completion_length":
|
| 104 |
-
"epoch":
|
| 105 |
-
"grad_norm":
|
| 106 |
-
"kl": 0.
|
| 107 |
-
"learning_rate": 1.
|
| 108 |
-
"loss": 0.
|
| 109 |
-
"reward":
|
| 110 |
-
"reward_std": 0.
|
| 111 |
-
"rewards/accuracy_reward_word": 0.
|
| 112 |
-
"rewards/format_reward": 0.
|
|
|
|
| 113 |
"step": 40
|
| 114 |
},
|
| 115 |
{
|
| 116 |
-
"completion_length":
|
| 117 |
-
"epoch":
|
| 118 |
-
"grad_norm": 0.
|
| 119 |
-
"kl": 0.
|
| 120 |
-
"learning_rate": 1.
|
| 121 |
-
"loss": 0.
|
| 122 |
-
"reward":
|
| 123 |
-
"reward_std": 0.
|
| 124 |
-
"rewards/accuracy_reward_word": 0.
|
| 125 |
-
"rewards/format_reward": 0.
|
|
|
|
| 126 |
"step": 45
|
| 127 |
},
|
| 128 |
{
|
| 129 |
-
"completion_length":
|
| 130 |
-
"epoch":
|
| 131 |
-
"grad_norm": 0.
|
| 132 |
-
"kl": 0.
|
| 133 |
-
"learning_rate": 1.
|
| 134 |
-
"loss": 0.
|
| 135 |
-
"reward":
|
| 136 |
-
"reward_std": 0.
|
| 137 |
-
"rewards/accuracy_reward_word": 0.
|
| 138 |
-
"rewards/format_reward": 0.
|
|
|
|
| 139 |
"step": 50
|
| 140 |
},
|
| 141 |
{
|
| 142 |
-
"completion_length":
|
| 143 |
-
"epoch":
|
| 144 |
-
"grad_norm": 0.
|
| 145 |
-
"kl": 0.
|
| 146 |
-
"learning_rate": 1.
|
| 147 |
-
"loss": 0.
|
| 148 |
-
"reward":
|
| 149 |
-
"reward_std": 0.
|
| 150 |
-
"rewards/accuracy_reward_word": 0.
|
| 151 |
-
"rewards/format_reward": 0.
|
|
|
|
| 152 |
"step": 55
|
| 153 |
},
|
| 154 |
{
|
| 155 |
-
"completion_length":
|
| 156 |
-
"epoch":
|
| 157 |
-
"grad_norm": 0.
|
| 158 |
-
"kl": 0.
|
| 159 |
-
"learning_rate": 1.
|
| 160 |
-
"loss": 0.
|
| 161 |
-
"reward":
|
| 162 |
-
"reward_std": 0.
|
| 163 |
-
"rewards/accuracy_reward_word": 0.
|
| 164 |
-
"rewards/format_reward": 0.
|
|
|
|
| 165 |
"step": 60
|
| 166 |
},
|
| 167 |
{
|
| 168 |
-
"completion_length":
|
| 169 |
-
"epoch":
|
| 170 |
-
"grad_norm": 0.
|
| 171 |
-
"kl": 0.
|
| 172 |
-
"learning_rate": 1.
|
| 173 |
-
"loss": 0.
|
| 174 |
-
"reward":
|
| 175 |
-
"reward_std": 0.
|
| 176 |
-
"rewards/accuracy_reward_word": 0.
|
| 177 |
-
"rewards/format_reward": 0.
|
|
|
|
| 178 |
"step": 65
|
| 179 |
},
|
| 180 |
{
|
| 181 |
-
"completion_length":
|
| 182 |
-
"epoch":
|
| 183 |
-
"grad_norm": 0.
|
| 184 |
-
"kl": 0.
|
| 185 |
-
"learning_rate": 1.
|
| 186 |
-
"loss": 0.
|
| 187 |
-
"reward":
|
| 188 |
-
"reward_std": 0.
|
| 189 |
-
"rewards/accuracy_reward_word": 0.
|
| 190 |
-
"rewards/format_reward": 0.
|
|
|
|
| 191 |
"step": 70
|
| 192 |
},
|
| 193 |
{
|
| 194 |
-
"completion_length":
|
| 195 |
-
"epoch":
|
| 196 |
-
"grad_norm": 0.
|
| 197 |
-
"kl": 0.
|
| 198 |
-
"learning_rate": 1.
|
| 199 |
-
"loss": 0.
|
| 200 |
-
"reward":
|
| 201 |
-
"reward_std": 0.
|
| 202 |
-
"rewards/accuracy_reward_word": 0.
|
| 203 |
-
"rewards/format_reward": 0.
|
|
|
|
| 204 |
"step": 75
|
| 205 |
},
|
| 206 |
{
|
| 207 |
-
"completion_length":
|
| 208 |
-
"epoch":
|
| 209 |
-
"grad_norm": 0.
|
| 210 |
-
"kl": 0.
|
| 211 |
-
"learning_rate": 1.
|
| 212 |
-
"loss": 0.
|
| 213 |
-
"reward":
|
| 214 |
-
"reward_std": 0.
|
| 215 |
-
"rewards/accuracy_reward_word": 0.
|
| 216 |
-
"rewards/format_reward": 0.
|
|
|
|
| 217 |
"step": 80
|
| 218 |
},
|
| 219 |
{
|
| 220 |
-
"completion_length":
|
| 221 |
-
"epoch":
|
| 222 |
-
"grad_norm": 0.
|
| 223 |
-
"kl": 0.
|
| 224 |
-
"learning_rate": 1.
|
| 225 |
-
"loss": 0.
|
| 226 |
-
"reward":
|
| 227 |
-
"reward_std": 0.
|
| 228 |
-
"rewards/accuracy_reward_word": 0.
|
| 229 |
-
"rewards/format_reward": 0.
|
|
|
|
| 230 |
"step": 85
|
| 231 |
},
|
| 232 |
{
|
| 233 |
-
"completion_length":
|
| 234 |
-
"epoch":
|
| 235 |
-
"grad_norm": 0.
|
| 236 |
-
"kl": 0.
|
| 237 |
-
"learning_rate":
|
| 238 |
-
"loss": 0.
|
| 239 |
-
"reward":
|
| 240 |
-
"reward_std": 0.
|
| 241 |
-
"rewards/accuracy_reward_word": 0.
|
| 242 |
-
"rewards/format_reward": 0.
|
|
|
|
| 243 |
"step": 90
|
| 244 |
},
|
| 245 |
{
|
| 246 |
-
"completion_length":
|
| 247 |
-
"epoch":
|
| 248 |
-
"grad_norm":
|
| 249 |
-
"kl":
|
| 250 |
-
"learning_rate":
|
| 251 |
-
"loss": 0.
|
| 252 |
-
"reward":
|
| 253 |
-
"reward_std": 0.
|
| 254 |
-
"rewards/accuracy_reward_word": 0.
|
| 255 |
-
"rewards/format_reward": 0.
|
|
|
|
| 256 |
"step": 95
|
| 257 |
},
|
| 258 |
{
|
| 259 |
-
"completion_length":
|
| 260 |
-
"epoch":
|
| 261 |
-
"grad_norm": 0.
|
| 262 |
-
"kl": 0.
|
| 263 |
-
"learning_rate":
|
| 264 |
-
"loss": 0.
|
| 265 |
-
"reward":
|
| 266 |
-
"reward_std": 0.
|
| 267 |
-
"rewards/accuracy_reward_word": 0.
|
| 268 |
-
"rewards/format_reward": 0.
|
| 269 |
-
"
|
| 270 |
-
},
|
| 271 |
-
{
|
| 272 |
-
"epoch": 16.78048780487805,
|
| 273 |
-
"eval_completion_length": 190.2759769984654,
|
| 274 |
-
"eval_kl": 0.32310267857142855,
|
| 275 |
-
"eval_loss": 0.014612293802201748,
|
| 276 |
-
"eval_reward": 0.5988541117736271,
|
| 277 |
-
"eval_reward_std": 0.07925501785108022,
|
| 278 |
-
"eval_rewards/accuracy_reward_word": 0.5988540819713047,
|
| 279 |
-
"eval_rewards/format_reward": 0.0,
|
| 280 |
-
"eval_runtime": 96.8362,
|
| 281 |
-
"eval_samples_per_second": 2.065,
|
| 282 |
-
"eval_steps_per_second": 0.01,
|
| 283 |
"step": 100
|
| 284 |
},
|
| 285 |
{
|
| 286 |
-
"completion_length":
|
| 287 |
-
"epoch":
|
| 288 |
-
"grad_norm":
|
| 289 |
-
"kl":
|
| 290 |
-
"learning_rate":
|
| 291 |
-
"loss": 0.
|
| 292 |
-
"reward":
|
| 293 |
-
"reward_std": 0.
|
| 294 |
-
"rewards/accuracy_reward_word": 0.
|
| 295 |
-
"rewards/format_reward": 0.
|
|
|
|
| 296 |
"step": 105
|
| 297 |
},
|
| 298 |
{
|
| 299 |
-
"completion_length":
|
| 300 |
-
"epoch":
|
| 301 |
-
"grad_norm":
|
| 302 |
-
"kl":
|
| 303 |
-
"learning_rate":
|
| 304 |
-
"loss": 0.
|
| 305 |
-
"reward":
|
| 306 |
-
"reward_std": 0.
|
| 307 |
-
"rewards/accuracy_reward_word": 0.
|
| 308 |
-
"rewards/format_reward": 0.
|
|
|
|
| 309 |
"step": 110
|
| 310 |
},
|
| 311 |
{
|
| 312 |
-
"completion_length":
|
| 313 |
-
"epoch":
|
| 314 |
-
"grad_norm":
|
| 315 |
-
"kl": 0.
|
| 316 |
-
"learning_rate":
|
| 317 |
-
"loss": 0.
|
| 318 |
-
"reward":
|
| 319 |
-
"reward_std": 0.
|
| 320 |
-
"rewards/accuracy_reward_word": 0.
|
| 321 |
-
"rewards/format_reward": 0.
|
|
|
|
| 322 |
"step": 115
|
| 323 |
},
|
| 324 |
{
|
| 325 |
-
"completion_length":
|
| 326 |
-
"epoch":
|
| 327 |
-
"grad_norm": 0.
|
| 328 |
-
"kl":
|
| 329 |
-
"learning_rate":
|
| 330 |
-
"loss": 0.
|
| 331 |
-
"reward":
|
| 332 |
-
"reward_std": 0.
|
| 333 |
-
"rewards/accuracy_reward_word": 0.
|
| 334 |
-
"rewards/format_reward": 0.
|
|
|
|
| 335 |
"step": 120
|
| 336 |
},
|
| 337 |
{
|
| 338 |
-
"completion_length":
|
| 339 |
-
"epoch":
|
| 340 |
-
"grad_norm": 0.
|
| 341 |
-
"kl": 0.
|
| 342 |
-
"learning_rate":
|
| 343 |
-
"loss": 0.
|
| 344 |
-
"reward":
|
| 345 |
-
"reward_std": 0.
|
| 346 |
-
"rewards/accuracy_reward_word": 0.
|
| 347 |
-
"rewards/format_reward": 0.
|
|
|
|
| 348 |
"step": 125
|
| 349 |
},
|
| 350 |
{
|
| 351 |
-
"completion_length":
|
| 352 |
-
"epoch":
|
| 353 |
-
"grad_norm": 0.
|
| 354 |
-
"kl": 0.
|
| 355 |
-
"learning_rate":
|
| 356 |
-
"loss": 0.
|
| 357 |
-
"reward":
|
| 358 |
-
"reward_std": 0.
|
| 359 |
-
"rewards/accuracy_reward_word": 0.
|
| 360 |
-
"rewards/format_reward": 0.
|
|
|
|
| 361 |
"step": 130
|
| 362 |
},
|
| 363 |
{
|
| 364 |
-
"completion_length":
|
| 365 |
-
"epoch":
|
| 366 |
-
"grad_norm": 0.
|
| 367 |
-
"kl": 0.
|
| 368 |
-
"learning_rate":
|
| 369 |
-
"loss": 0.
|
| 370 |
-
"reward":
|
| 371 |
-
"reward_std": 0.
|
| 372 |
-
"rewards/accuracy_reward_word": 0.
|
| 373 |
-
"rewards/format_reward": 0.
|
|
|
|
| 374 |
"step": 135
|
| 375 |
},
|
| 376 |
{
|
| 377 |
-
"completion_length":
|
| 378 |
-
"epoch":
|
| 379 |
-
"grad_norm":
|
| 380 |
-
"kl": 0.
|
| 381 |
-
"learning_rate":
|
| 382 |
-
"loss": 0.
|
| 383 |
-
"reward":
|
| 384 |
-
"reward_std": 0.
|
| 385 |
-
"rewards/accuracy_reward_word": 0.
|
| 386 |
-
"rewards/format_reward": 0.
|
|
|
|
| 387 |
"step": 140
|
| 388 |
},
|
| 389 |
{
|
| 390 |
-
"completion_length":
|
| 391 |
-
"epoch":
|
| 392 |
-
"grad_norm": 0.
|
| 393 |
-
"kl": 0.
|
| 394 |
-
"learning_rate":
|
| 395 |
-
"loss": 0.
|
| 396 |
-
"reward":
|
| 397 |
-
"reward_std": 0.
|
| 398 |
-
"rewards/accuracy_reward_word": 0.
|
| 399 |
-
"rewards/format_reward": 0.
|
|
|
|
| 400 |
"step": 145
|
| 401 |
},
|
| 402 |
{
|
| 403 |
-
"completion_length":
|
| 404 |
-
"epoch":
|
| 405 |
-
"grad_norm": 0.
|
| 406 |
-
"kl": 0.
|
| 407 |
-
"learning_rate":
|
| 408 |
-
"loss": 0.
|
| 409 |
-
"reward":
|
| 410 |
-
"reward_std": 0.
|
| 411 |
-
"rewards/accuracy_reward_word": 0.
|
| 412 |
-
"rewards/format_reward": 0.
|
|
|
|
| 413 |
"step": 150
|
| 414 |
},
|
| 415 |
{
|
| 416 |
-
"completion_length":
|
| 417 |
-
"epoch":
|
| 418 |
-
"grad_norm": 0.
|
| 419 |
-
"kl": 0.
|
| 420 |
-
"learning_rate":
|
| 421 |
-
"loss": 0.
|
| 422 |
-
"reward":
|
| 423 |
-
"reward_std": 0.
|
| 424 |
-
"rewards/accuracy_reward_word": 0.
|
| 425 |
-
"rewards/format_reward": 0.
|
|
|
|
| 426 |
"step": 155
|
| 427 |
},
|
| 428 |
{
|
| 429 |
-
"completion_length":
|
| 430 |
-
"epoch":
|
| 431 |
-
"grad_norm": 0.
|
| 432 |
-
"kl": 0.
|
| 433 |
-
"learning_rate":
|
| 434 |
-
"loss": 0.
|
| 435 |
-
"reward":
|
| 436 |
-
"reward_std": 0.
|
| 437 |
-
"rewards/accuracy_reward_word": 0.
|
| 438 |
-
"rewards/format_reward": 0.
|
|
|
|
| 439 |
"step": 160
|
| 440 |
},
|
| 441 |
{
|
| 442 |
-
"
|
| 443 |
-
"
|
| 444 |
-
"grad_norm": 0.13969573378562927,
|
| 445 |
-
"kl": 0.3170572916666667,
|
| 446 |
-
"learning_rate": 1.808479557110081e-06,
|
| 447 |
-
"loss": 0.0105,
|
| 448 |
-
"reward": 0.6073169595364368,
|
| 449 |
-
"reward_std": 0.12986301354160815,
|
| 450 |
-
"rewards/accuracy_reward_word": 0.607316970373645,
|
| 451 |
-
"rewards/format_reward": 0.0,
|
| 452 |
-
"step": 165
|
| 453 |
-
},
|
| 454 |
-
{
|
| 455 |
-
"completion_length": 181.3133195819277,
|
| 456 |
-
"epoch": 28.390243902439025,
|
| 457 |
-
"grad_norm": 0.21579568088054657,
|
| 458 |
-
"kl": 0.3319720643939394,
|
| 459 |
-
"learning_rate": 1.339745962155613e-06,
|
| 460 |
-
"loss": 0.011,
|
| 461 |
-
"reward": 0.6149095554243434,
|
| 462 |
-
"reward_std": 0.13368092341856522,
|
| 463 |
-
"rewards/accuracy_reward_word": 0.6149095631006992,
|
| 464 |
-
"rewards/format_reward": 0.0,
|
| 465 |
-
"step": 170
|
| 466 |
-
},
|
| 467 |
-
{
|
| 468 |
-
"completion_length": 179.86851408987334,
|
| 469 |
-
"epoch": 29.195121951219512,
|
| 470 |
-
"grad_norm": 0.1424599587917328,
|
| 471 |
-
"kl": 0.32353811553030304,
|
| 472 |
-
"learning_rate": 9.369221296335007e-07,
|
| 473 |
-
"loss": 0.0107,
|
| 474 |
-
"reward": 0.6067391426274271,
|
| 475 |
-
"reward_std": 0.14061830226670613,
|
| 476 |
-
"rewards/accuracy_reward_word": 0.6067391435305277,
|
| 477 |
-
"rewards/format_reward": 0.0,
|
| 478 |
-
"step": 175
|
| 479 |
-
},
|
| 480 |
-
{
|
| 481 |
-
"completion_length": 177.73133919455788,
|
| 482 |
-
"epoch": 30.0,
|
| 483 |
-
"grad_norm": 0.12455170601606369,
|
| 484 |
-
"kl": 0.32202888257575757,
|
| 485 |
-
"learning_rate": 6.030737921409169e-07,
|
| 486 |
-
"loss": 0.0107,
|
| 487 |
-
"reward": 0.6030513856447104,
|
| 488 |
-
"reward_std": 0.13355868055739187,
|
| 489 |
-
"rewards/accuracy_reward_word": 0.6030513928695158,
|
| 490 |
-
"rewards/format_reward": 0.0,
|
| 491 |
-
"step": 180
|
| 492 |
-
},
|
| 493 |
-
{
|
| 494 |
-
"completion_length": 178.40078887939453,
|
| 495 |
-
"epoch": 30.975609756097562,
|
| 496 |
-
"grad_norm": 0.14259420335292816,
|
| 497 |
-
"kl": 0.315283203125,
|
| 498 |
-
"learning_rate": 3.4074173710931804e-07,
|
| 499 |
-
"loss": 0.0127,
|
| 500 |
-
"reward": 0.6099405620247126,
|
| 501 |
-
"reward_std": 0.13809194271452724,
|
| 502 |
-
"rewards/accuracy_reward_word": 0.6099405620247126,
|
| 503 |
-
"rewards/format_reward": 0.0,
|
| 504 |
-
"step": 185
|
| 505 |
-
},
|
| 506 |
-
{
|
| 507 |
-
"completion_length": 174.6302841648911,
|
| 508 |
-
"epoch": 31.78048780487805,
|
| 509 |
-
"grad_norm": 0.1332584172487259,
|
| 510 |
-
"kl": 0.3048354640151515,
|
| 511 |
-
"learning_rate": 1.519224698779198e-07,
|
| 512 |
-
"loss": 0.0101,
|
| 513 |
-
"reward": 0.5993030166084116,
|
| 514 |
-
"reward_std": 0.1328924118795178,
|
| 515 |
-
"rewards/accuracy_reward_word": 0.5993030138991096,
|
| 516 |
-
"rewards/format_reward": 0.0,
|
| 517 |
-
"step": 190
|
| 518 |
-
},
|
| 519 |
-
{
|
| 520 |
-
"completion_length": 184.39110634543678,
|
| 521 |
-
"epoch": 32.58536585365854,
|
| 522 |
-
"grad_norm": 0.13871651887893677,
|
| 523 |
-
"kl": 0.31865530303030304,
|
| 524 |
-
"learning_rate": 3.805301908254455e-08,
|
| 525 |
-
"loss": 0.0106,
|
| 526 |
-
"reward": 0.6105609360066327,
|
| 527 |
-
"reward_std": 0.14098199039246095,
|
| 528 |
-
"rewards/accuracy_reward_word": 0.610560937812834,
|
| 529 |
-
"rewards/format_reward": 0.0,
|
| 530 |
-
"step": 195
|
| 531 |
-
},
|
| 532 |
-
{
|
| 533 |
-
"completion_length": 184.56710722952178,
|
| 534 |
-
"epoch": 33.390243902439025,
|
| 535 |
-
"grad_norm": 0.2092735767364502,
|
| 536 |
-
"kl": 0.3228574810606061,
|
| 537 |
-
"learning_rate": 0.0,
|
| 538 |
-
"loss": 0.0107,
|
| 539 |
-
"reward": 0.6148976295283346,
|
| 540 |
-
"reward_std": 0.1421807540975737,
|
| 541 |
-
"rewards/accuracy_reward_word": 0.614897631334536,
|
| 542 |
-
"rewards/format_reward": 0.0,
|
| 543 |
-
"step": 200
|
| 544 |
-
},
|
| 545 |
-
{
|
| 546 |
-
"epoch": 33.390243902439025,
|
| 547 |
-
"eval_completion_length": 183.40406145368303,
|
| 548 |
-
"eval_kl": 0.3189174107142857,
|
| 549 |
-
"eval_loss": 0.014444979839026928,
|
| 550 |
-
"eval_reward": 0.6251552956444877,
|
| 551 |
-
"eval_reward_std": 0.1085951988186155,
|
| 552 |
-
"eval_rewards/accuracy_reward_word": 0.6251552700996399,
|
| 553 |
-
"eval_rewards/format_reward": 0.0,
|
| 554 |
-
"eval_runtime": 95.3621,
|
| 555 |
-
"eval_samples_per_second": 2.097,
|
| 556 |
-
"eval_steps_per_second": 0.01,
|
| 557 |
-
"step": 200
|
| 558 |
-
},
|
| 559 |
-
{
|
| 560 |
-
"epoch": 33.390243902439025,
|
| 561 |
-
"step": 200,
|
| 562 |
"total_flos": 0.0,
|
| 563 |
-
"train_loss": 0.
|
| 564 |
-
"train_runtime":
|
| 565 |
-
"train_samples_per_second":
|
| 566 |
-
"train_steps_per_second": 0.
|
| 567 |
}
|
| 568 |
],
|
| 569 |
"logging_steps": 5,
|
| 570 |
-
"max_steps":
|
| 571 |
"num_input_tokens_seen": 0,
|
| 572 |
-
"num_train_epochs":
|
| 573 |
"save_steps": 500,
|
| 574 |
"stateful_callbacks": {
|
| 575 |
"TrainerControl": {
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 19.895104895104897,
|
| 5 |
+
"eval_steps": 1000,
|
| 6 |
+
"global_step": 160,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"completion_length": 364.37423477172854,
|
| 13 |
+
"epoch": 0.5594405594405595,
|
| 14 |
+
"grad_norm": 0.27259644865989685,
|
| 15 |
+
"kl": 0.00020606517791748046,
|
| 16 |
+
"learning_rate": 6.25e-06,
|
| 17 |
"loss": 0.0,
|
| 18 |
+
"reward": 1.2981241554021836,
|
| 19 |
+
"reward_std": 0.34615046456456183,
|
| 20 |
+
"rewards/accuracy_reward_word": 0.32314828839153054,
|
| 21 |
+
"rewards/format_reward": 0.9845982514321804,
|
| 22 |
+
"rewards/repetition_penalty_reward": -0.009622399129875702,
|
| 23 |
"step": 5
|
| 24 |
},
|
| 25 |
{
|
| 26 |
+
"completion_length": 300.59944620999426,
|
| 27 |
+
"epoch": 1.2237762237762237,
|
| 28 |
+
"grad_norm": 0.6720463037490845,
|
| 29 |
+
"kl": 0.054093274203213776,
|
| 30 |
+
"learning_rate": 1.25e-05,
|
| 31 |
+
"loss": 0.0024,
|
| 32 |
+
"reward": 1.4319009455767544,
|
| 33 |
+
"reward_std": 0.3311943354254419,
|
| 34 |
+
"rewards/accuracy_reward_word": 0.44601600854234263,
|
| 35 |
+
"rewards/format_reward": 0.992999211631038,
|
| 36 |
+
"rewards/repetition_penalty_reward": -0.007114298457467653,
|
| 37 |
"step": 10
|
| 38 |
},
|
| 39 |
{
|
| 40 |
+
"completion_length": 217.79476432800294,
|
| 41 |
+
"epoch": 1.7832167832167833,
|
| 42 |
+
"grad_norm": 0.29593613743782043,
|
| 43 |
+
"kl": 0.110693359375,
|
| 44 |
+
"learning_rate": 1.8750000000000002e-05,
|
| 45 |
+
"loss": 0.0044,
|
| 46 |
+
"reward": 1.5967296287417412,
|
| 47 |
+
"reward_std": 0.3098897160962224,
|
| 48 |
+
"rewards/accuracy_reward_word": 0.6012545950710774,
|
| 49 |
+
"rewards/format_reward": 0.9985491126775742,
|
| 50 |
+
"rewards/repetition_penalty_reward": -0.003074125228158664,
|
| 51 |
"step": 15
|
| 52 |
},
|
| 53 |
{
|
| 54 |
+
"completion_length": 200.27791127291593,
|
| 55 |
+
"epoch": 2.4475524475524475,
|
| 56 |
+
"grad_norm": 0.26520445942878723,
|
| 57 |
+
"kl": 0.1642123135653409,
|
| 58 |
+
"learning_rate": 1.9961946980917457e-05,
|
| 59 |
+
"loss": 0.0072,
|
| 60 |
+
"reward": 1.6224368851293216,
|
| 61 |
+
"reward_std": 0.27555874311788514,
|
| 62 |
+
"rewards/accuracy_reward_word": 0.6239803387698802,
|
| 63 |
+
"rewards/format_reward": 0.998985393480821,
|
| 64 |
+
"rewards/repetition_penalty_reward": -0.000528900803601573,
|
| 65 |
"step": 20
|
| 66 |
},
|
| 67 |
{
|
| 68 |
+
"completion_length": 270.1645825125954,
|
| 69 |
+
"epoch": 3.111888111888112,
|
| 70 |
+
"grad_norm": 0.34845927357673645,
|
| 71 |
+
"kl": 0.24596058238636365,
|
| 72 |
+
"learning_rate": 1.9807852804032306e-05,
|
| 73 |
+
"loss": 0.0108,
|
| 74 |
+
"reward": 1.5945834151723168,
|
| 75 |
+
"reward_std": 0.3396115639162334,
|
| 76 |
+
"rewards/accuracy_reward_word": 0.6209050552411512,
|
| 77 |
+
"rewards/format_reward": 0.9739245474338531,
|
| 78 |
+
"rewards/repetition_penalty_reward": -0.0002461845716342478,
|
| 79 |
"step": 25
|
| 80 |
},
|
| 81 |
{
|
| 82 |
+
"completion_length": 235.43092441558838,
|
| 83 |
+
"epoch": 3.6713286713286712,
|
| 84 |
+
"grad_norm": 3.628526449203491,
|
| 85 |
+
"kl": 1.1632080078125,
|
| 86 |
+
"learning_rate": 1.953716950748227e-05,
|
| 87 |
+
"loss": 0.0466,
|
| 88 |
+
"reward": 1.3556241348385811,
|
| 89 |
+
"reward_std": 0.18557674251496792,
|
| 90 |
+
"rewards/accuracy_reward_word": 0.36838247990235684,
|
| 91 |
+
"rewards/format_reward": 0.9909598484635354,
|
| 92 |
+
"rewards/repetition_penalty_reward": -0.003718209845078491,
|
| 93 |
"step": 30
|
| 94 |
},
|
| 95 |
{
|
| 96 |
+
"completion_length": 246.95547086542302,
|
| 97 |
+
"epoch": 4.335664335664336,
|
| 98 |
+
"grad_norm": 1.1314058303833008,
|
| 99 |
+
"kl": 0.3246515447443182,
|
| 100 |
+
"learning_rate": 1.9153114791194475e-05,
|
| 101 |
+
"loss": 0.0143,
|
| 102 |
+
"reward": 1.3994574384255842,
|
| 103 |
+
"reward_std": 0.20727728540077806,
|
| 104 |
+
"rewards/accuracy_reward_word": 0.41514577995985746,
|
| 105 |
+
"rewards/format_reward": 0.9877232407981699,
|
| 106 |
+
"rewards/repetition_penalty_reward": -0.003411588459336847,
|
| 107 |
"step": 35
|
| 108 |
},
|
| 109 |
{
|
| 110 |
+
"completion_length": 295.24443359375,
|
| 111 |
+
"epoch": 4.895104895104895,
|
| 112 |
+
"grad_norm": 0.7106680274009705,
|
| 113 |
+
"kl": 0.45169677734375,
|
| 114 |
+
"learning_rate": 1.866025403784439e-05,
|
| 115 |
+
"loss": 0.0181,
|
| 116 |
+
"reward": 1.5822205603122712,
|
| 117 |
+
"reward_std": 0.3901990856975317,
|
| 118 |
+
"rewards/accuracy_reward_word": 0.6376858472824096,
|
| 119 |
+
"rewards/format_reward": 0.9446428999304771,
|
| 120 |
+
"rewards/repetition_penalty_reward": -0.0001081747655689469,
|
| 121 |
"step": 40
|
| 122 |
},
|
| 123 |
{
|
| 124 |
+
"completion_length": 227.82925120267,
|
| 125 |
+
"epoch": 5.559440559440559,
|
| 126 |
+
"grad_norm": 0.8820157051086426,
|
| 127 |
+
"kl": 0.4667746803977273,
|
| 128 |
+
"learning_rate": 1.806444604267483e-05,
|
| 129 |
+
"loss": 0.0205,
|
| 130 |
+
"reward": 1.4798817797140642,
|
| 131 |
+
"reward_std": 0.2851217135533013,
|
| 132 |
+
"rewards/accuracy_reward_word": 0.5120031014931473,
|
| 133 |
+
"rewards/format_reward": 0.9679383540695364,
|
| 134 |
+
"rewards/repetition_penalty_reward": -5.967220460495975e-05,
|
| 135 |
"step": 45
|
| 136 |
},
|
| 137 |
{
|
| 138 |
+
"completion_length": 215.02862150018865,
|
| 139 |
+
"epoch": 6.223776223776224,
|
| 140 |
+
"grad_norm": 0.19495409727096558,
|
| 141 |
+
"kl": 0.41028941761363635,
|
| 142 |
+
"learning_rate": 1.737277336810124e-05,
|
| 143 |
+
"loss": 0.0181,
|
| 144 |
+
"reward": 1.5078922672705217,
|
| 145 |
+
"reward_std": 0.27444807008247485,
|
| 146 |
+
"rewards/accuracy_reward_word": 0.5324522018093955,
|
| 147 |
+
"rewards/format_reward": 0.9754464673725042,
|
| 148 |
+
"rewards/repetition_penalty_reward": -6.40446375076532e-06,
|
| 149 |
"step": 50
|
| 150 |
},
|
| 151 |
{
|
| 152 |
+
"completion_length": 253.25693054199218,
|
| 153 |
+
"epoch": 6.783216783216783,
|
| 154 |
+
"grad_norm": 0.161370187997818,
|
| 155 |
+
"kl": 0.3687744140625,
|
| 156 |
+
"learning_rate": 1.659345815100069e-05,
|
| 157 |
+
"loss": 0.0148,
|
| 158 |
+
"reward": 1.617374736070633,
|
| 159 |
+
"reward_std": 0.3065785804763436,
|
| 160 |
+
"rewards/accuracy_reward_word": 0.6409355964511633,
|
| 161 |
+
"rewards/format_reward": 0.9764509290456772,
|
| 162 |
+
"rewards/repetition_penalty_reward": -1.1780754721257836e-05,
|
| 163 |
"step": 55
|
| 164 |
},
|
| 165 |
{
|
| 166 |
+
"completion_length": 275.0440474423495,
|
| 167 |
+
"epoch": 7.4475524475524475,
|
| 168 |
+
"grad_norm": 0.2272227257490158,
|
| 169 |
+
"kl": 0.35566850142045453,
|
| 170 |
+
"learning_rate": 1.573576436351046e-05,
|
| 171 |
+
"loss": 0.0156,
|
| 172 |
+
"reward": 1.5927915437655016,
|
| 173 |
+
"reward_std": 0.30714009304276924,
|
| 174 |
+
"rewards/accuracy_reward_word": 0.6195999278940938,
|
| 175 |
+
"rewards/format_reward": 0.97321432557973,
|
| 176 |
+
"rewards/repetition_penalty_reward": -2.271822495458764e-05,
|
| 177 |
"step": 60
|
| 178 |
},
|
| 179 |
{
|
| 180 |
+
"completion_length": 262.15382610667837,
|
| 181 |
+
"epoch": 8.111888111888112,
|
| 182 |
+
"grad_norm": 0.19355681538581848,
|
| 183 |
+
"kl": 0.3338068181818182,
|
| 184 |
+
"learning_rate": 1.4809887689193878e-05,
|
| 185 |
+
"loss": 0.0147,
|
| 186 |
+
"reward": 1.4948042712428353,
|
| 187 |
+
"reward_std": 0.2743698521940546,
|
| 188 |
+
"rewards/accuracy_reward_word": 0.5210835107348182,
|
| 189 |
+
"rewards/format_reward": 0.9737216295166449,
|
| 190 |
+
"rewards/repetition_penalty_reward": -8.746641965858131e-07,
|
| 191 |
"step": 65
|
| 192 |
},
|
| 193 |
{
|
| 194 |
+
"completion_length": 250.4502347946167,
|
| 195 |
+
"epoch": 8.671328671328672,
|
| 196 |
+
"grad_norm": 0.1308055967092514,
|
| 197 |
+
"kl": 0.31865234375,
|
| 198 |
+
"learning_rate": 1.3826834323650899e-05,
|
| 199 |
+
"loss": 0.0127,
|
| 200 |
+
"reward": 1.4026295721530915,
|
| 201 |
+
"reward_std": 0.23001151392236352,
|
| 202 |
+
"rewards/accuracy_reward_word": 0.4245518417446874,
|
| 203 |
+
"rewards/format_reward": 0.9781250409781933,
|
| 204 |
+
"rewards/repetition_penalty_reward": -4.732433080789633e-05,
|
| 205 |
"step": 70
|
| 206 |
},
|
| 207 |
{
|
| 208 |
+
"completion_length": 257.2775072617964,
|
| 209 |
+
"epoch": 9.335664335664335,
|
| 210 |
+
"grad_norm": 0.1448088437318802,
|
| 211 |
+
"kl": 0.2882302024147727,
|
| 212 |
+
"learning_rate": 1.2798290140309924e-05,
|
| 213 |
+
"loss": 0.0127,
|
| 214 |
+
"reward": 1.4172937978397717,
|
| 215 |
+
"reward_std": 0.23249881604517048,
|
| 216 |
+
"rewards/accuracy_reward_word": 0.4396151855418628,
|
| 217 |
+
"rewards/format_reward": 0.9776786098426039,
|
| 218 |
+
"rewards/repetition_penalty_reward": 0.0,
|
| 219 |
"step": 75
|
| 220 |
},
|
| 221 |
{
|
| 222 |
+
"completion_length": 259.0001234054565,
|
| 223 |
+
"epoch": 9.895104895104895,
|
| 224 |
+
"grad_norm": 0.1194038838148117,
|
| 225 |
+
"kl": 0.26318359375,
|
| 226 |
+
"learning_rate": 1.1736481776669307e-05,
|
| 227 |
+
"loss": 0.0105,
|
| 228 |
+
"reward": 1.4497434869408607,
|
| 229 |
+
"reward_std": 0.25042697712779044,
|
| 230 |
+
"rewards/accuracy_reward_word": 0.47421625480055807,
|
| 231 |
+
"rewards/format_reward": 0.9755580753087998,
|
| 232 |
+
"rewards/repetition_penalty_reward": -3.085259668296203e-05,
|
| 233 |
"step": 80
|
| 234 |
},
|
| 235 |
{
|
| 236 |
+
"completion_length": 251.73012508045542,
|
| 237 |
+
"epoch": 10.55944055944056,
|
| 238 |
+
"grad_norm": 0.12483782321214676,
|
| 239 |
+
"kl": 0.25578169389204547,
|
| 240 |
+
"learning_rate": 1.0654031292301432e-05,
|
| 241 |
+
"loss": 0.0113,
|
| 242 |
+
"reward": 1.477828394282948,
|
| 243 |
+
"reward_std": 0.24647284172136674,
|
| 244 |
+
"rewards/accuracy_reward_word": 0.49977607000619173,
|
| 245 |
+
"rewards/format_reward": 0.978084458546205,
|
| 246 |
+
"rewards/repetition_penalty_reward": -3.212933171942661e-05,
|
| 247 |
"step": 85
|
| 248 |
},
|
| 249 |
{
|
| 250 |
+
"completion_length": 243.65271100130948,
|
| 251 |
+
"epoch": 11.223776223776223,
|
| 252 |
+
"grad_norm": 0.4139878749847412,
|
| 253 |
+
"kl": 0.2525967684659091,
|
| 254 |
+
"learning_rate": 9.563806126346643e-06,
|
| 255 |
+
"loss": 0.0111,
|
| 256 |
+
"reward": 1.4612440195950596,
|
| 257 |
+
"reward_std": 0.23823331762105227,
|
| 258 |
+
"rewards/accuracy_reward_word": 0.4819643903862346,
|
| 259 |
+
"rewards/format_reward": 0.9793019870465453,
|
| 260 |
+
"rewards/repetition_penalty_reward": -2.236067350416058e-05,
|
| 261 |
"step": 90
|
| 262 |
},
|
| 263 |
{
|
| 264 |
+
"completion_length": 246.18003330230712,
|
| 265 |
+
"epoch": 11.783216783216783,
|
| 266 |
+
"grad_norm": 0.11199598014354706,
|
| 267 |
+
"kl": 0.23426513671875,
|
| 268 |
+
"learning_rate": 8.478766138100834e-06,
|
| 269 |
+
"loss": 0.0094,
|
| 270 |
+
"reward": 1.4039153650403022,
|
| 271 |
+
"reward_std": 0.20266931243240832,
|
| 272 |
+
"rewards/accuracy_reward_word": 0.4193565859692171,
|
| 273 |
+
"rewards/format_reward": 0.9845982529222965,
|
| 274 |
+
"rewards/repetition_penalty_reward": -3.9484114859078544e-05,
|
| 275 |
"step": 95
|
| 276 |
},
|
| 277 |
{
|
| 278 |
+
"completion_length": 256.24219929088247,
|
| 279 |
+
"epoch": 12.447552447552448,
|
| 280 |
+
"grad_norm": 0.23437026143074036,
|
| 281 |
+
"kl": 0.24050071022727273,
|
| 282 |
+
"learning_rate": 7.411809548974792e-06,
|
| 283 |
+
"loss": 0.0106,
|
| 284 |
+
"reward": 1.3849081302231008,
|
| 285 |
+
"reward_std": 0.20749073509465565,
|
| 286 |
+
"rewards/accuracy_reward_word": 0.4026952323249795,
|
| 287 |
+
"rewards/format_reward": 0.9822443513707682,
|
| 288 |
+
"rewards/repetition_penalty_reward": -3.145250816900939e-05,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
"step": 100
|
| 290 |
},
|
| 291 |
{
|
| 292 |
+
"completion_length": 257.46277410333806,
|
| 293 |
+
"epoch": 13.111888111888112,
|
| 294 |
+
"grad_norm": 0.11270800977945328,
|
| 295 |
+
"kl": 0.21811745383522727,
|
| 296 |
+
"learning_rate": 6.375619617162985e-06,
|
| 297 |
+
"loss": 0.0096,
|
| 298 |
+
"reward": 1.4131186618046327,
|
| 299 |
+
"reward_std": 0.2085759434002367,
|
| 300 |
+
"rewards/accuracy_reward_word": 0.4291051478040489,
|
| 301 |
+
"rewards/format_reward": 0.9840706532651727,
|
| 302 |
+
"rewards/repetition_penalty_reward": -5.714496695542369e-05,
|
| 303 |
"step": 105
|
| 304 |
},
|
| 305 |
{
|
| 306 |
+
"completion_length": 260.4148557662964,
|
| 307 |
+
"epoch": 13.671328671328672,
|
| 308 |
+
"grad_norm": 0.1118827760219574,
|
| 309 |
+
"kl": 0.21689453125,
|
| 310 |
+
"learning_rate": 5.382513867649663e-06,
|
| 311 |
+
"loss": 0.0087,
|
| 312 |
+
"reward": 1.413018099963665,
|
| 313 |
+
"reward_std": 0.20751294190995395,
|
| 314 |
+
"rewards/accuracy_reward_word": 0.4284829759038985,
|
| 315 |
+
"rewards/format_reward": 0.9845982521772385,
|
| 316 |
+
"rewards/repetition_penalty_reward": -6.31413837254513e-05,
|
| 317 |
"step": 110
|
| 318 |
},
|
| 319 |
{
|
| 320 |
+
"completion_length": 255.0886882435192,
|
| 321 |
+
"epoch": 14.335664335664335,
|
| 322 |
+
"grad_norm": 0.1052035540342331,
|
| 323 |
+
"kl": 0.21266867897727273,
|
| 324 |
+
"learning_rate": 4.444297669803981e-06,
|
| 325 |
+
"loss": 0.0094,
|
| 326 |
+
"reward": 1.3777457651766865,
|
| 327 |
+
"reward_std": 0.19213764581151985,
|
| 328 |
+
"rewards/accuracy_reward_word": 0.3917047695934095,
|
| 329 |
+
"rewards/format_reward": 0.9860998696901582,
|
| 330 |
+
"rewards/repetition_penalty_reward": -5.887894752215123e-05,
|
| 331 |
"step": 115
|
| 332 |
},
|
| 333 |
{
|
| 334 |
+
"completion_length": 250.88360347747803,
|
| 335 |
+
"epoch": 14.895104895104895,
|
| 336 |
+
"grad_norm": 0.1148873046040535,
|
| 337 |
+
"kl": 0.20421142578125,
|
| 338 |
+
"learning_rate": 3.5721239031346067e-06,
|
| 339 |
+
"loss": 0.0082,
|
| 340 |
+
"reward": 1.4046544060111046,
|
| 341 |
+
"reward_std": 0.194459034409374,
|
| 342 |
+
"rewards/accuracy_reward_word": 0.41711079380474986,
|
| 343 |
+
"rewards/format_reward": 0.9876116372644901,
|
| 344 |
+
"rewards/repetition_penalty_reward": -6.803718715673313e-05,
|
| 345 |
"step": 120
|
| 346 |
},
|
| 347 |
{
|
| 348 |
+
"completion_length": 249.46257175098765,
|
| 349 |
+
"epoch": 15.55944055944056,
|
| 350 |
+
"grad_norm": 0.11614679545164108,
|
| 351 |
+
"kl": 0.20160466974431818,
|
| 352 |
+
"learning_rate": 2.776360379402445e-06,
|
| 353 |
+
"loss": 0.0089,
|
| 354 |
+
"reward": 1.4239109510725194,
|
| 355 |
+
"reward_std": 0.20904017239809036,
|
| 356 |
+
"rewards/accuracy_reward_word": 0.4381290450692177,
|
| 357 |
+
"rewards/format_reward": 0.985795489766381,
|
| 358 |
+
"rewards/repetition_penalty_reward": -1.3588532800151205e-05,
|
| 359 |
"step": 125
|
| 360 |
},
|
| 361 |
{
|
| 362 |
+
"completion_length": 252.77699990706012,
|
| 363 |
+
"epoch": 16.223776223776223,
|
| 364 |
+
"grad_norm": 0.11460437625646591,
|
| 365 |
+
"kl": 0.20647638494318182,
|
| 366 |
+
"learning_rate": 2.0664665970876496e-06,
|
| 367 |
+
"loss": 0.0091,
|
| 368 |
+
"reward": 1.4404897730458865,
|
| 369 |
+
"reward_std": 0.22268841487609528,
|
| 370 |
+
"rewards/accuracy_reward_word": 0.45830493445762177,
|
| 371 |
+
"rewards/format_reward": 0.9822443588213488,
|
| 372 |
+
"rewards/repetition_penalty_reward": -5.952174582158808e-05,
|
| 373 |
"step": 130
|
| 374 |
},
|
| 375 |
{
|
| 376 |
+
"completion_length": 256.87255535125735,
|
| 377 |
+
"epoch": 16.783216783216783,
|
| 378 |
+
"grad_norm": 0.10793906450271606,
|
| 379 |
+
"kl": 0.20732421875,
|
| 380 |
+
"learning_rate": 1.4508812932705364e-06,
|
| 381 |
+
"loss": 0.0083,
|
| 382 |
+
"reward": 1.478071430325508,
|
| 383 |
+
"reward_std": 0.22782372254878283,
|
| 384 |
+
"rewards/accuracy_reward_word": 0.49404887384735047,
|
| 385 |
+
"rewards/format_reward": 0.9840402141213417,
|
| 386 |
+
"rewards/repetition_penalty_reward": -1.7666907024249667e-05,
|
| 387 |
"step": 135
|
| 388 |
},
|
| 389 |
{
|
| 390 |
+
"completion_length": 254.8349351015958,
|
| 391 |
+
"epoch": 17.447552447552447,
|
| 392 |
+
"grad_norm": 0.13680703938007355,
|
| 393 |
+
"kl": 0.19947398792613635,
|
| 394 |
+
"learning_rate": 9.369221296335007e-07,
|
| 395 |
+
"loss": 0.0088,
|
| 396 |
+
"reward": 1.4661599763415076,
|
| 397 |
+
"reward_std": 0.22828074849464677,
|
| 398 |
+
"rewards/accuracy_reward_word": 0.4821980211206458,
|
| 399 |
+
"rewards/format_reward": 0.9839691946452315,
|
| 400 |
+
"rewards/repetition_penalty_reward": -7.247217581607401e-06,
|
| 401 |
"step": 140
|
| 402 |
},
|
| 403 |
{
|
| 404 |
+
"completion_length": 254.28024777499112,
|
| 405 |
+
"epoch": 18.111888111888113,
|
| 406 |
+
"grad_norm": 0.12113064527511597,
|
| 407 |
+
"kl": 0.2043124112215909,
|
| 408 |
+
"learning_rate": 5.306987050489442e-07,
|
| 409 |
+
"loss": 0.009,
|
| 410 |
+
"reward": 1.4483650543472983,
|
| 411 |
+
"reward_std": 0.222187442493371,
|
| 412 |
+
"rewards/accuracy_reward_word": 0.4643406889147379,
|
| 413 |
+
"rewards/format_reward": 0.9840706539424983,
|
| 414 |
+
"rewards/repetition_penalty_reward": -4.6301358824249206e-05,
|
| 415 |
"step": 145
|
| 416 |
},
|
| 417 |
{
|
| 418 |
+
"completion_length": 254.92579154968263,
|
| 419 |
+
"epoch": 18.67132867132867,
|
| 420 |
+
"grad_norm": 0.12059218436479568,
|
| 421 |
+
"kl": 0.2029052734375,
|
| 422 |
+
"learning_rate": 2.370399288006664e-07,
|
| 423 |
+
"loss": 0.0081,
|
| 424 |
+
"reward": 1.4487672820687294,
|
| 425 |
+
"reward_std": 0.228369791386649,
|
| 426 |
+
"rewards/accuracy_reward_word": 0.46667207330465316,
|
| 427 |
+
"rewards/format_reward": 0.9821428969502449,
|
| 428 |
+
"rewards/repetition_penalty_reward": -4.7683547745691615e-05,
|
| 429 |
"step": 150
|
| 430 |
},
|
| 431 |
{
|
| 432 |
+
"completion_length": 255.3859702023593,
|
| 433 |
+
"epoch": 19.335664335664337,
|
| 434 |
+
"grad_norm": 0.12672924995422363,
|
| 435 |
+
"kl": 0.20241477272727273,
|
| 436 |
+
"learning_rate": 5.943661777680354e-08,
|
| 437 |
+
"loss": 0.0089,
|
| 438 |
+
"reward": 1.4418584392829374,
|
| 439 |
+
"reward_std": 0.22563406520269133,
|
| 440 |
+
"rewards/accuracy_reward_word": 0.45739410241896455,
|
| 441 |
+
"rewards/format_reward": 0.9844764979048208,
|
| 442 |
+
"rewards/repetition_penalty_reward": -1.2175325537100434e-05,
|
| 443 |
"step": 155
|
| 444 |
},
|
| 445 |
{
|
| 446 |
+
"completion_length": 250.57400856018066,
|
| 447 |
+
"epoch": 19.895104895104897,
|
| 448 |
+
"grad_norm": 0.12355446815490723,
|
| 449 |
+
"kl": 0.19569091796875,
|
| 450 |
+
"learning_rate": 0.0,
|
| 451 |
+
"loss": 0.0078,
|
| 452 |
+
"reward": 1.4596911922097207,
|
| 453 |
+
"reward_std": 0.21876802388578653,
|
| 454 |
+
"rewards/accuracy_reward_word": 0.4751062370836735,
|
| 455 |
+
"rewards/format_reward": 0.9845982559025288,
|
| 456 |
+
"rewards/repetition_penalty_reward": -1.3301288072398166e-05,
|
| 457 |
"step": 160
|
| 458 |
},
|
| 459 |
{
|
| 460 |
+
"epoch": 19.895104895104897,
|
| 461 |
+
"step": 160,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
"total_flos": 0.0,
|
| 463 |
+
"train_loss": 0.011576007895314433,
|
| 464 |
+
"train_runtime": 33226.465,
|
| 465 |
+
"train_samples_per_second": 0.602,
|
| 466 |
+
"train_steps_per_second": 0.005
|
| 467 |
}
|
| 468 |
],
|
| 469 |
"logging_steps": 5,
|
| 470 |
+
"max_steps": 160,
|
| 471 |
"num_input_tokens_seen": 0,
|
| 472 |
+
"num_train_epochs": 20,
|
| 473 |
"save_steps": 500,
|
| 474 |
"stateful_callbacks": {
|
| 475 |
"TrainerControl": {
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0b151708dbfb039fab3363622568c9f229a742f720cf88aa4b879c5da30f5f8
|
| 3 |
+
size 7608
|