Training in progress, step 4500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a5a6f82a282e6ee513038360740bfa6163feb4b1d4b1bb3319d6b0ef1f4751f
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a71d3e09cfa9bcbe2ed92701fe51af492bac444d4e688ae56a471982c181c9e9
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:802e223144d189244ce5a768642009b3c15e29f14e41b4808f514470d4c7be6e
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e092ded0b8103aeaa278f39556d48ebee944cc0f4cd6e8f95b6ba39b7752813
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4212,6 +4212,151 @@
|
|
| 4212 |
"EMA_steps_per_second": 25.542,
|
| 4213 |
"epoch": 189.1304347826087,
|
| 4214 |
"step": 4350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4215 |
}
|
| 4216 |
],
|
| 4217 |
"logging_steps": 10,
|
|
@@ -4231,7 +4376,7 @@
|
|
| 4231 |
"attributes": {}
|
| 4232 |
}
|
| 4233 |
},
|
| 4234 |
-
"total_flos": 1.
|
| 4235 |
"train_batch_size": 4,
|
| 4236 |
"trial_name": null,
|
| 4237 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 195.65217391304347,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4212 |
"EMA_steps_per_second": 25.542,
|
| 4213 |
"epoch": 189.1304347826087,
|
| 4214 |
"step": 4350
|
| 4215 |
+
},
|
| 4216 |
+
{
|
| 4217 |
+
"epoch": 189.56521739130434,
|
| 4218 |
+
"grad_norm": 1.9401793479919434,
|
| 4219 |
+
"learning_rate": 1.5299694155534387e-06,
|
| 4220 |
+
"loss": 0.2163,
|
| 4221 |
+
"step": 4360
|
| 4222 |
+
},
|
| 4223 |
+
{
|
| 4224 |
+
"epoch": 190.0,
|
| 4225 |
+
"grad_norm": 4.096744060516357,
|
| 4226 |
+
"learning_rate": 1.529967444651148e-06,
|
| 4227 |
+
"loss": 0.2344,
|
| 4228 |
+
"step": 4370
|
| 4229 |
+
},
|
| 4230 |
+
{
|
| 4231 |
+
"epoch": 190.43478260869566,
|
| 4232 |
+
"grad_norm": 2.7062318325042725,
|
| 4233 |
+
"learning_rate": 1.529965170535223e-06,
|
| 4234 |
+
"loss": 0.251,
|
| 4235 |
+
"step": 4380
|
| 4236 |
+
},
|
| 4237 |
+
{
|
| 4238 |
+
"epoch": 190.8695652173913,
|
| 4239 |
+
"grad_norm": 1.7941333055496216,
|
| 4240 |
+
"learning_rate": 1.5299625932065658e-06,
|
| 4241 |
+
"loss": 0.2192,
|
| 4242 |
+
"step": 4390
|
| 4243 |
+
},
|
| 4244 |
+
{
|
| 4245 |
+
"epoch": 191.30434782608697,
|
| 4246 |
+
"grad_norm": 2.2132506370544434,
|
| 4247 |
+
"learning_rate": 1.5299597126661977e-06,
|
| 4248 |
+
"loss": 0.2179,
|
| 4249 |
+
"step": 4400
|
| 4250 |
+
},
|
| 4251 |
+
{
|
| 4252 |
+
"epoch": 191.7391304347826,
|
| 4253 |
+
"grad_norm": 2.125366687774658,
|
| 4254 |
+
"learning_rate": 1.5299565289152606e-06,
|
| 4255 |
+
"loss": 0.2031,
|
| 4256 |
+
"step": 4410
|
| 4257 |
+
},
|
| 4258 |
+
{
|
| 4259 |
+
"epoch": 192.17391304347825,
|
| 4260 |
+
"grad_norm": 2.0995376110076904,
|
| 4261 |
+
"learning_rate": 1.5299530419550163e-06,
|
| 4262 |
+
"loss": 0.2472,
|
| 4263 |
+
"step": 4420
|
| 4264 |
+
},
|
| 4265 |
+
{
|
| 4266 |
+
"epoch": 192.6086956521739,
|
| 4267 |
+
"grad_norm": 2.151653289794922,
|
| 4268 |
+
"learning_rate": 1.529949251786847e-06,
|
| 4269 |
+
"loss": 0.2326,
|
| 4270 |
+
"step": 4430
|
| 4271 |
+
},
|
| 4272 |
+
{
|
| 4273 |
+
"epoch": 193.04347826086956,
|
| 4274 |
+
"grad_norm": 2.5126099586486816,
|
| 4275 |
+
"learning_rate": 1.5299451584122548e-06,
|
| 4276 |
+
"loss": 0.234,
|
| 4277 |
+
"step": 4440
|
| 4278 |
+
},
|
| 4279 |
+
{
|
| 4280 |
+
"epoch": 193.47826086956522,
|
| 4281 |
+
"grad_norm": 1.9897412061691284,
|
| 4282 |
+
"learning_rate": 1.5299407618328622e-06,
|
| 4283 |
+
"loss": 0.2401,
|
| 4284 |
+
"step": 4450
|
| 4285 |
+
},
|
| 4286 |
+
{
|
| 4287 |
+
"epoch": 193.91304347826087,
|
| 4288 |
+
"grad_norm": 2.143177032470703,
|
| 4289 |
+
"learning_rate": 1.5299360620504121e-06,
|
| 4290 |
+
"loss": 0.2344,
|
| 4291 |
+
"step": 4460
|
| 4292 |
+
},
|
| 4293 |
+
{
|
| 4294 |
+
"epoch": 194.34782608695653,
|
| 4295 |
+
"grad_norm": 2.5046348571777344,
|
| 4296 |
+
"learning_rate": 1.5299310590667677e-06,
|
| 4297 |
+
"loss": 0.2091,
|
| 4298 |
+
"step": 4470
|
| 4299 |
+
},
|
| 4300 |
+
{
|
| 4301 |
+
"epoch": 194.7826086956522,
|
| 4302 |
+
"grad_norm": 2.4033350944519043,
|
| 4303 |
+
"learning_rate": 1.529925752883911e-06,
|
| 4304 |
+
"loss": 0.2265,
|
| 4305 |
+
"step": 4480
|
| 4306 |
+
},
|
| 4307 |
+
{
|
| 4308 |
+
"epoch": 195.2173913043478,
|
| 4309 |
+
"grad_norm": 1.9208111763000488,
|
| 4310 |
+
"learning_rate": 1.529920143503946e-06,
|
| 4311 |
+
"loss": 0.2074,
|
| 4312 |
+
"step": 4490
|
| 4313 |
+
},
|
| 4314 |
+
{
|
| 4315 |
+
"epoch": 195.65217391304347,
|
| 4316 |
+
"grad_norm": 2.4695804119110107,
|
| 4317 |
+
"learning_rate": 1.5299142309290955e-06,
|
| 4318 |
+
"loss": 0.2067,
|
| 4319 |
+
"step": 4500
|
| 4320 |
+
},
|
| 4321 |
+
{
|
| 4322 |
+
"epoch": 195.65217391304347,
|
| 4323 |
+
"eval_loss": 0.9752073287963867,
|
| 4324 |
+
"eval_runtime": 0.4001,
|
| 4325 |
+
"eval_samples_per_second": 24.997,
|
| 4326 |
+
"eval_steps_per_second": 24.997,
|
| 4327 |
+
"step": 4500
|
| 4328 |
+
},
|
| 4329 |
+
{
|
| 4330 |
+
"Start_State_loss": 0.8609819412231445,
|
| 4331 |
+
"Start_State_runtime": 0.3943,
|
| 4332 |
+
"Start_State_samples_per_second": 25.364,
|
| 4333 |
+
"Start_State_steps_per_second": 25.364,
|
| 4334 |
+
"epoch": 195.65217391304347,
|
| 4335 |
+
"step": 4500
|
| 4336 |
+
},
|
| 4337 |
+
{
|
| 4338 |
+
"Raw_Model_loss": 0.9752073287963867,
|
| 4339 |
+
"Raw_Model_runtime": 0.3942,
|
| 4340 |
+
"Raw_Model_samples_per_second": 25.367,
|
| 4341 |
+
"Raw_Model_steps_per_second": 25.367,
|
| 4342 |
+
"epoch": 195.65217391304347,
|
| 4343 |
+
"step": 4500
|
| 4344 |
+
},
|
| 4345 |
+
{
|
| 4346 |
+
"SWA_loss": 0.8120683431625366,
|
| 4347 |
+
"SWA_runtime": 0.3958,
|
| 4348 |
+
"SWA_samples_per_second": 25.266,
|
| 4349 |
+
"SWA_steps_per_second": 25.266,
|
| 4350 |
+
"epoch": 195.65217391304347,
|
| 4351 |
+
"step": 4500
|
| 4352 |
+
},
|
| 4353 |
+
{
|
| 4354 |
+
"EMA_loss": 0.8598009943962097,
|
| 4355 |
+
"EMA_runtime": 0.3937,
|
| 4356 |
+
"EMA_samples_per_second": 25.401,
|
| 4357 |
+
"EMA_steps_per_second": 25.401,
|
| 4358 |
+
"epoch": 195.65217391304347,
|
| 4359 |
+
"step": 4500
|
| 4360 |
}
|
| 4361 |
],
|
| 4362 |
"logging_steps": 10,
|
|
|
|
| 4376 |
"attributes": {}
|
| 4377 |
}
|
| 4378 |
},
|
| 4379 |
+
"total_flos": 1.1587388919393485e+17,
|
| 4380 |
"train_batch_size": 4,
|
| 4381 |
"trial_name": null,
|
| 4382 |
"trial_params": null
|