Training in progress, step 200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 479005064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d0b10032621567c53e0e12f4051e3fa7f9880f7b87ae153f1600c2eed98d364
|
| 3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 958299770
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4c7399a7124ebfc8f2602ee4160ffecfe9c2c5345d69e9697c401f6fd3bd73d
|
| 3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d71d011e87b512f28e794476e44bdcb409ab9a4721e9b4147120eeb12f1053d
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47394e7e6639b2da14254e3e88e50a05cb1b1d15a05d1aa46398ae3b93c7909f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -187,6 +187,66 @@
|
|
| 187 |
"reward_std": 0.430637900531292,
|
| 188 |
"rewards/custom_reward_logic_v2": -0.04024999849498272,
|
| 189 |
"step": 150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
}
|
| 191 |
],
|
| 192 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.0015928512834399217,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 187 |
"reward_std": 0.430637900531292,
|
| 188 |
"rewards/custom_reward_logic_v2": -0.04024999849498272,
|
| 189 |
"step": 150
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"completion_length": 18.93125,
|
| 193 |
+
"epoch": 0.0012742810267519374,
|
| 194 |
+
"grad_norm": 0.032512303441762924,
|
| 195 |
+
"kl": 0.33459745422005654,
|
| 196 |
+
"learning_rate": 4.849231551964771e-06,
|
| 197 |
+
"loss": 0.0134,
|
| 198 |
+
"reward": 0.1650000035762787,
|
| 199 |
+
"reward_std": 0.07605109438300132,
|
| 200 |
+
"rewards/custom_reward_logic_v2": 0.1650000035762787,
|
| 201 |
+
"step": 160
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"completion_length": 17.4375,
|
| 205 |
+
"epoch": 0.0013539235909239334,
|
| 206 |
+
"grad_norm": 0.02004638873040676,
|
| 207 |
+
"kl": 0.35064528286457064,
|
| 208 |
+
"learning_rate": 4.809698831278217e-06,
|
| 209 |
+
"loss": 0.014,
|
| 210 |
+
"reward": 0.08999999985098839,
|
| 211 |
+
"reward_std": 0.125558003783226,
|
| 212 |
+
"rewards/custom_reward_logic_v2": 0.08999999985098839,
|
| 213 |
+
"step": 170
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"completion_length": 23.975,
|
| 217 |
+
"epoch": 0.0014335661550959294,
|
| 218 |
+
"grad_norm": 0.2281995564699173,
|
| 219 |
+
"kl": 0.3118164837360382,
|
| 220 |
+
"learning_rate": 4.765769467591626e-06,
|
| 221 |
+
"loss": 0.0125,
|
| 222 |
+
"reward": 0.08099999986588954,
|
| 223 |
+
"reward_std": 0.18301311507821083,
|
| 224 |
+
"rewards/custom_reward_logic_v2": 0.08099999986588954,
|
| 225 |
+
"step": 180
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"completion_length": 17.7,
|
| 229 |
+
"epoch": 0.0015132087192679256,
|
| 230 |
+
"grad_norm": 0.20832708477973938,
|
| 231 |
+
"kl": 0.34881954491138456,
|
| 232 |
+
"learning_rate": 4.717527082945555e-06,
|
| 233 |
+
"loss": 0.014,
|
| 234 |
+
"reward": 0.14687500111758708,
|
| 235 |
+
"reward_std": 0.13193419948220253,
|
| 236 |
+
"rewards/custom_reward_logic_v2": 0.14687500111758708,
|
| 237 |
+
"step": 190
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"completion_length": 28.76875,
|
| 241 |
+
"epoch": 0.0015928512834399217,
|
| 242 |
+
"grad_norm": 0.2148224264383316,
|
| 243 |
+
"kl": 0.4086977861821651,
|
| 244 |
+
"learning_rate": 4.665063509461098e-06,
|
| 245 |
+
"loss": 0.0163,
|
| 246 |
+
"reward": 0.06411250084638595,
|
| 247 |
+
"reward_std": 0.09681975245475768,
|
| 248 |
+
"rewards/custom_reward_logic_v2": 0.06411250084638595,
|
| 249 |
+
"step": 200
|
| 250 |
}
|
| 251 |
],
|
| 252 |
"logging_steps": 10,
|