Training in progress, step 1100, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49b80ac9e71a99d4c4c6a019f92f74ac748ae8a3db5b19d23b29551127562576
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b954913df4d8f33fa5530a046cefed963703da18792e9717aec1ad27ac9b9ac
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29c2d81a2ee2dbcbac40eefdb89ca90568c2c93f1ddee20eba510e848b8988fb
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa66b03381b0dfe975a2c18907018f054a16534d5b1412711280057eb4458970
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1291,6 +1291,42 @@
|
|
| 1291 |
"reward_std": 0.37070034593343737,
|
| 1292 |
"rewards/custom_reward_simplified_v7_dblog": 0.865625,
|
| 1293 |
"step": 1070
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1294 |
}
|
| 1295 |
],
|
| 1296 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.00876068205891957,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1291 |
"reward_std": 0.37070034593343737,
|
| 1292 |
"rewards/custom_reward_simplified_v7_dblog": 0.865625,
|
| 1293 |
"step": 1070
|
| 1294 |
+
},
|
| 1295 |
+
{
|
| 1296 |
+
"completion_length": 638.23125,
|
| 1297 |
+
"epoch": 0.008601396930575577,
|
| 1298 |
+
"grad_norm": 0.23796696960926056,
|
| 1299 |
+
"kl": 0.0075248789740726355,
|
| 1300 |
+
"learning_rate": 3.3550503583141726e-06,
|
| 1301 |
+
"loss": 0.0003,
|
| 1302 |
+
"reward": 0.746875,
|
| 1303 |
+
"reward_std": 0.25020881071686746,
|
| 1304 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.746875,
|
| 1305 |
+
"step": 1080
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"completion_length": 634.9375,
|
| 1309 |
+
"epoch": 0.008681039494747573,
|
| 1310 |
+
"grad_norm": 0.2958204448223114,
|
| 1311 |
+
"kl": 0.006533738202415406,
|
| 1312 |
+
"learning_rate": 3.3207929380339034e-06,
|
| 1313 |
+
"loss": 0.0003,
|
| 1314 |
+
"reward": 0.896875,
|
| 1315 |
+
"reward_std": 0.38549663573503495,
|
| 1316 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.896875,
|
| 1317 |
+
"step": 1090
|
| 1318 |
+
},
|
| 1319 |
+
{
|
| 1320 |
+
"completion_length": 661.2625,
|
| 1321 |
+
"epoch": 0.00876068205891957,
|
| 1322 |
+
"grad_norm": 0.007367302197962999,
|
| 1323 |
+
"kl": 0.007355101336725056,
|
| 1324 |
+
"learning_rate": 3.2863618903790346e-06,
|
| 1325 |
+
"loss": 0.0003,
|
| 1326 |
+
"reward": 0.71875,
|
| 1327 |
+
"reward_std": 0.25932966247200967,
|
| 1328 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.71875,
|
| 1329 |
+
"step": 1100
|
| 1330 |
}
|
| 1331 |
],
|
| 1332 |
"logging_steps": 10,
|