Training in progress, step 1825, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce0227de8dffd60e7bcbc361e28f5f14d86f6b8aa6b9faaa25078af2c1664371
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b50241b912450499aa67b6f47d8ef5d57cc918130f305986edc730a6c70d0be
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdc279ccf06d94b21f0f1142b3ba0467a4b037c890e7d4c8b4d0d9959c7a643b
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e18d4bd19d02103826c6ccfe1e046ad882c768a3c57be1799d9b12107011c97
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2167,6 +2167,30 @@
|
|
| 2167 |
"reward_std": 0.28967257887125014,
|
| 2168 |
"rewards/custom_reward_simplified_v7_dblog": 0.75,
|
| 2169 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2170 |
}
|
| 2171 |
],
|
| 2172 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.014534767961389285,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1825,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2167 |
"reward_std": 0.28967257887125014,
|
| 2168 |
"rewards/custom_reward_simplified_v7_dblog": 0.75,
|
| 2169 |
"step": 1800
|
| 2170 |
+
},
|
| 2171 |
+
{
|
| 2172 |
+
"completion_length": 689.26875,
|
| 2173 |
+
"epoch": 0.01441530411513129,
|
| 2174 |
+
"grad_norm": 0.17589329183101654,
|
| 2175 |
+
"kl": 0.016255489736795425,
|
| 2176 |
+
"learning_rate": 8.653477618573261e-07,
|
| 2177 |
+
"loss": 0.0007,
|
| 2178 |
+
"reward": 0.765625,
|
| 2179 |
+
"reward_std": 0.3363394603133202,
|
| 2180 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.765625,
|
| 2181 |
+
"step": 1810
|
| 2182 |
+
},
|
| 2183 |
+
{
|
| 2184 |
+
"completion_length": 640.91875,
|
| 2185 |
+
"epoch": 0.014494946679303287,
|
| 2186 |
+
"grad_norm": 0.21075929701328278,
|
| 2187 |
+
"kl": 0.015922663966193795,
|
| 2188 |
+
"learning_rate": 8.380103359651554e-07,
|
| 2189 |
+
"loss": 0.0006,
|
| 2190 |
+
"reward": 0.925,
|
| 2191 |
+
"reward_std": 0.3459245666861534,
|
| 2192 |
+
"rewards/custom_reward_simplified_v7_dblog": 0.925,
|
| 2193 |
+
"step": 1820
|
| 2194 |
}
|
| 2195 |
],
|
| 2196 |
"logging_steps": 10,
|