Training in progress, step 1925, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:400bf9743fcfff3f47e3f0b9a1cede38e8d6e96374ac2cd587f2d2edfd906572
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3309a30e849ace9608e1957368fc06f650c97cc91eaa6df6d4bf2f6b649868f3
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c65db11f2bbb866945208742b8cd4b8865acadf113d5d9fbfe55b269b5ff1059
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deeb8215b1ed4392892b832a6e768b3c4ae9ca65d4af274686a16e7d74532396
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2287,6 +2287,30 @@
|
|
| 2287 |
"reward_std": 0.2382744610309601,
|
| 2288 |
"rewards/custom_reward_simplified_v7_dblog": 0.725,
|
| 2289 |
"step": 1900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2290 |
}
|
| 2291 |
],
|
| 2292 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.015331193603109246,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1925,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2287 |
"reward_std": 0.2382744610309601,
|
| 2288 |
"rewards/custom_reward_simplified_v7_dblog": 0.725,
|
| 2289 |
"step": 1900
|
| 2290 |
+
},
|
| 2291 |
+
{
|
| 2292 |
+
"completion_length": 767.7375,
|
| 2293 |
+
"epoch": 0.015211729756851252,
|
| 2294 |
+
"grad_norm": 0.1330222189426422,
|
| 2295 |
+
"kl": 0.02190765142440796,
|
| 2296 |
+
"learning_rate": 6.084630428312679e-07,
|
| 2297 |
+
"loss": 0.0009,
|
| 2298 |
+
"reward": 0.66875,
|
| 2299 |
+
"reward_std": 0.27546602860093117,
|
| 2300 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.66875,
|
| 2301 |
+
"step": 1910
|
| 2302 |
+
},
|
| 2303 |
+
{
|
| 2304 |
+
"completion_length": 726.63125,
|
| 2305 |
+
"epoch": 0.015291372321023247,
|
| 2306 |
+
"grad_norm": 0.21655875444412231,
|
| 2307 |
+
"kl": 0.02581467442214489,
|
| 2308 |
+
"learning_rate": 5.848888922025553e-07,
|
| 2309 |
+
"loss": 0.001,
|
| 2310 |
+
"reward": 0.834375,
|
| 2311 |
+
"reward_std": 0.38373097851872445,
|
| 2312 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.834375,
|
| 2313 |
+
"step": 1920
|
| 2314 |
}
|
| 2315 |
],
|
| 2316 |
"logging_steps": 10,
|