Training in progress, step 1150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b87a7124fc823146a12cbeaefbe5ff8d91326cefab763555aa61dbbb9e056fab
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f14effc9368161c55b1d77fbb7e90bb1cd9af0086afde89e1a683d869fb3248
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b6ca00cea62eabd929fd77d62694b267d47c0603af124f659274f264d0bf70b
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15a1625baaf061099e4aa140afa72e1a2a32a1a3e25d7baf8a019a63d52d6e43
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1351,6 +1351,42 @@
|
|
| 1351 |
"reward_std": 0.2547163799405098,
|
| 1352 |
"rewards/custom_reward_simplified_v7_dblog": 0.728125,
|
| 1353 |
"step": 1120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
}
|
| 1355 |
],
|
| 1356 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.009158894879779549,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1150,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1351 |
"reward_std": 0.2547163799405098,
|
| 1352 |
"rewards/custom_reward_simplified_v7_dblog": 0.728125,
|
| 1353 |
"step": 1120
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"completion_length": 733.2125,
|
| 1357 |
+
"epoch": 0.008999609751435557,
|
| 1358 |
+
"grad_norm": 0.2320898026227951,
|
| 1359 |
+
"kl": 0.007608366897329688,
|
| 1360 |
+
"learning_rate": 3.182099991668653e-06,
|
| 1361 |
+
"loss": 0.0003,
|
| 1362 |
+
"reward": 0.60625,
|
| 1363 |
+
"reward_std": 0.2975068032741547,
|
| 1364 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.60625,
|
| 1365 |
+
"step": 1130
|
| 1366 |
+
},
|
| 1367 |
+
{
|
| 1368 |
+
"completion_length": 603.5,
|
| 1369 |
+
"epoch": 0.009079252315607553,
|
| 1370 |
+
"grad_norm": 0.23401154577732086,
|
| 1371 |
+
"kl": 0.007222792156971991,
|
| 1372 |
+
"learning_rate": 3.147047612756302e-06,
|
| 1373 |
+
"loss": 0.0003,
|
| 1374 |
+
"reward": 0.875,
|
| 1375 |
+
"reward_std": 0.2553515017032623,
|
| 1376 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.875,
|
| 1377 |
+
"step": 1140
|
| 1378 |
+
},
|
| 1379 |
+
{
|
| 1380 |
+
"completion_length": 704.44375,
|
| 1381 |
+
"epoch": 0.009158894879779549,
|
| 1382 |
+
"grad_norm": 0.2538968324661255,
|
| 1383 |
+
"kl": 0.007968966104090213,
|
| 1384 |
+
"learning_rate": 3.1118583598858097e-06,
|
| 1385 |
+
"loss": 0.0003,
|
| 1386 |
+
"reward": 0.6875,
|
| 1387 |
+
"reward_std": 0.29204289317131044,
|
| 1388 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.6875,
|
| 1389 |
+
"step": 1150
|
| 1390 |
}
|
| 1391 |
],
|
| 1392 |
"logging_steps": 10,
|