Training in progress, step 1225, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca8baaba22487cdce42630abaf28904ed482a9cc8827520bc824bb81cd486cd4
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c50c6e305b6a0d4073f6216b5ff8f9f102df9985bb7e7ec1efec53cfd87ac3c
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2abe6090a2975a0c5e7fd37addef971ec2653163e71ac7336dc65e74c34c04f
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9debf92aca27ce6ace0aef79d370cac248bb98d16412d955b96820394066292d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1447,6 +1447,30 @@
|
|
| 1447 |
"reward_std": 0.33152099549770353,
|
| 1448 |
"rewards/custom_reward_simplified_v7_dblog": 0.728125,
|
| 1449 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1450 |
}
|
| 1451 |
],
|
| 1452 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.00975621411106952,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1225,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1447 |
"reward_std": 0.33152099549770353,
|
| 1448 |
"rewards/custom_reward_simplified_v7_dblog": 0.728125,
|
| 1449 |
"step": 1200
|
| 1450 |
+
},
|
| 1451 |
+
{
|
| 1452 |
+
"completion_length": 660.7375,
|
| 1453 |
+
"epoch": 0.009636750264811526,
|
| 1454 |
+
"grad_norm": 0.20679971575737,
|
| 1455 |
+
"kl": 0.00826664932537824,
|
| 1456 |
+
"learning_rate": 2.898267170168807e-06,
|
| 1457 |
+
"loss": 0.0003,
|
| 1458 |
+
"reward": 0.665625,
|
| 1459 |
+
"reward_std": 0.25403511226177217,
|
| 1460 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.665625,
|
| 1461 |
+
"step": 1210
|
| 1462 |
+
},
|
| 1463 |
+
{
|
| 1464 |
+
"completion_length": 653.59375,
|
| 1465 |
+
"epoch": 0.009716392828983521,
|
| 1466 |
+
"grad_norm": 0.14609546959400177,
|
| 1467 |
+
"kl": 0.007603704649955034,
|
| 1468 |
+
"learning_rate": 2.862329648268117e-06,
|
| 1469 |
+
"loss": 0.0003,
|
| 1470 |
+
"reward": 0.94375,
|
| 1471 |
+
"reward_std": 0.26154626756906507,
|
| 1472 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.94375,
|
| 1473 |
+
"step": 1220
|
| 1474 |
}
|
| 1475 |
],
|
| 1476 |
"logging_steps": 10,
|