Training in progress, step 300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc3cf965cb3e46bce1a07e647d9e64ded38cef1306287e6c24c592e3bafdafa6
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99f35c97132fdb9c45702a3a626946b943e4f0e9d6e04d821cf969f555bfc36f
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dc64f06f07f11b5133abb444d47a3661de90bef33673b0253120e1e16093534
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e96b3a3c408ef67987cac348d29150759cd1e3152271b07b879ff530abc69a0f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -307,6 +307,66 @@
|
|
| 307 |
"reward_std": 0.11504097878932953,
|
| 308 |
"rewards/custom_reward_logic_v2": 0.17062499970197678,
|
| 309 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
}
|
| 311 |
],
|
| 312 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.0023892769251598824,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 300,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 307 |
"reward_std": 0.11504097878932953,
|
| 308 |
"rewards/custom_reward_logic_v2": 0.17062499970197678,
|
| 309 |
"step": 250
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"completion_length": 20.475,
|
| 313 |
+
"epoch": 0.002070706668471898,
|
| 314 |
+
"grad_norm": 0.03645075112581253,
|
| 315 |
+
"kl": 0.3291649468243122,
|
| 316 |
+
"learning_rate": 4.267766952966369e-06,
|
| 317 |
+
"loss": 0.0132,
|
| 318 |
+
"reward": 0.16500000059604644,
|
| 319 |
+
"reward_std": 0.1858065977692604,
|
| 320 |
+
"rewards/custom_reward_logic_v2": 0.16500000059604644,
|
| 321 |
+
"step": 260
|
| 322 |
+
},
|
| 323 |
+
{
|
| 324 |
+
"completion_length": 24.5625,
|
| 325 |
+
"epoch": 0.0021503492326438944,
|
| 326 |
+
"grad_norm": 1.1167131662368774,
|
| 327 |
+
"kl": 0.33756194859743116,
|
| 328 |
+
"learning_rate": 4.188975519039151e-06,
|
| 329 |
+
"loss": 0.0135,
|
| 330 |
+
"reward": 0.10505000110715627,
|
| 331 |
+
"reward_std": 0.0828484557569027,
|
| 332 |
+
"rewards/custom_reward_logic_v2": 0.10505000110715627,
|
| 333 |
+
"step": 270
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"completion_length": 19.925,
|
| 337 |
+
"epoch": 0.0022299917968158904,
|
| 338 |
+
"grad_norm": 0.8635123372077942,
|
| 339 |
+
"kl": 0.32979664355516436,
|
| 340 |
+
"learning_rate": 4.106969024216348e-06,
|
| 341 |
+
"loss": 0.0132,
|
| 342 |
+
"reward": 0.20062500163912772,
|
| 343 |
+
"reward_std": 0.1258012667298317,
|
| 344 |
+
"rewards/custom_reward_logic_v2": 0.20062500163912772,
|
| 345 |
+
"step": 280
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"completion_length": 41.35625,
|
| 349 |
+
"epoch": 0.0023096343609878864,
|
| 350 |
+
"grad_norm": 0.7731335163116455,
|
| 351 |
+
"kl": 0.29900490418076514,
|
| 352 |
+
"learning_rate": 4.021903572521802e-06,
|
| 353 |
+
"loss": 0.012,
|
| 354 |
+
"reward": 0.13356250263750552,
|
| 355 |
+
"reward_std": 0.11855373680591583,
|
| 356 |
+
"rewards/custom_reward_logic_v2": 0.13356250263750552,
|
| 357 |
+
"step": 290
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"completion_length": 21.425,
|
| 361 |
+
"epoch": 0.0023892769251598824,
|
| 362 |
+
"grad_norm": 0.050558220595121384,
|
| 363 |
+
"kl": 0.30905950888991357,
|
| 364 |
+
"learning_rate": 3.933941090877615e-06,
|
| 365 |
+
"loss": 0.0124,
|
| 366 |
+
"reward": 0.10625000111758709,
|
| 367 |
+
"reward_std": 0.07851103022694587,
|
| 368 |
+
"rewards/custom_reward_logic_v2": 0.10625000111758709,
|
| 369 |
+
"step": 300
|
| 370 |
}
|
| 371 |
],
|
| 372 |
"logging_steps": 10,
|