Training in progress, step 350, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab9a1c3e4f7eeb26f1787aab2bede14a97d3e21b68a19b5777a98b5c4dc9c594
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecb6746c7a22945701e7525b6e96db148acb5c526d8ec4de574c6cd87337c3b9
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eed43572ea1df9b6a964b9089ff0a48779e2a1cc8929f8355fea19e7f7ab5c5f
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:905cad78b215386b1078b951a642067b64baccf738f77304c45fea8d964d3906
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -367,6 +367,66 @@
|
|
| 367 |
"reward_std": 0.07851103022694587,
|
| 368 |
"rewards/custom_reward_logic_v2": 0.10625000111758709,
|
| 369 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
}
|
| 371 |
],
|
| 372 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.002787489746019863,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 350,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 367 |
"reward_std": 0.07851103022694587,
|
| 368 |
"rewards/custom_reward_logic_v2": 0.10625000111758709,
|
| 369 |
"step": 300
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"completion_length": 28.39375,
|
| 373 |
+
"epoch": 0.0024689194893318784,
|
| 374 |
+
"grad_norm": 1.2737127542495728,
|
| 375 |
+
"kl": 0.3259002223610878,
|
| 376 |
+
"learning_rate": 3.8432490208670605e-06,
|
| 377 |
+
"loss": 0.013,
|
| 378 |
+
"reward": 0.07012500055134296,
|
| 379 |
+
"reward_std": 0.21550666987895967,
|
| 380 |
+
"rewards/custom_reward_logic_v2": 0.07012500055134296,
|
| 381 |
+
"step": 310
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"completion_length": 20.49375,
|
| 385 |
+
"epoch": 0.002548562053503875,
|
| 386 |
+
"grad_norm": 1.3667010068893433,
|
| 387 |
+
"kl": 0.32961594611406325,
|
| 388 |
+
"learning_rate": 3.7500000000000005e-06,
|
| 389 |
+
"loss": 0.0132,
|
| 390 |
+
"reward": 0.15562500059604645,
|
| 391 |
+
"reward_std": 0.14379026368260384,
|
| 392 |
+
"rewards/custom_reward_logic_v2": 0.15562500059604645,
|
| 393 |
+
"step": 320
|
| 394 |
+
},
|
| 395 |
+
{
|
| 396 |
+
"completion_length": 23.7625,
|
| 397 |
+
"epoch": 0.002628204617675871,
|
| 398 |
+
"grad_norm": 0.9662195444107056,
|
| 399 |
+
"kl": 0.3291011206805706,
|
| 400 |
+
"learning_rate": 3.654371533087586e-06,
|
| 401 |
+
"loss": 0.0132,
|
| 402 |
+
"reward": 0.20617500003427267,
|
| 403 |
+
"reward_std": 0.12530190348625184,
|
| 404 |
+
"rewards/custom_reward_logic_v2": 0.20617500003427267,
|
| 405 |
+
"step": 330
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"completion_length": 19.15,
|
| 409 |
+
"epoch": 0.002707847181847867,
|
| 410 |
+
"grad_norm": 2.964785099029541,
|
| 411 |
+
"kl": 0.3629206448793411,
|
| 412 |
+
"learning_rate": 3.556545654351749e-06,
|
| 413 |
+
"loss": 0.0145,
|
| 414 |
+
"reward": 0.10437500067055225,
|
| 415 |
+
"reward_std": 0.12071752324700355,
|
| 416 |
+
"rewards/custom_reward_logic_v2": 0.10437500067055225,
|
| 417 |
+
"step": 340
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"completion_length": 20.4875,
|
| 421 |
+
"epoch": 0.002787489746019863,
|
| 422 |
+
"grad_norm": 1.0044533014297485,
|
| 423 |
+
"kl": 0.3254102662205696,
|
| 424 |
+
"learning_rate": 3.4567085809127247e-06,
|
| 425 |
+
"loss": 0.013,
|
| 426 |
+
"reward": 0.15562499798834323,
|
| 427 |
+
"reward_std": 0.15355074554681777,
|
| 428 |
+
"rewards/custom_reward_logic_v2": 0.15562499798834323,
|
| 429 |
+
"step": 350
|
| 430 |
}
|
| 431 |
],
|
| 432 |
"logging_steps": 10,
|