Training in progress, step 4650, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c2fa6d7c57abe80b81e686c5bb261331348a26463db489c0057496c21099267
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:798f7293795a2261c83bf45e42597af80061bea9a6acf124906fd2023d47a1d7
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b03ef6d89f6c10452a8bc84393b1dc225e370d174364da48043ff472b287411
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f96e2618e1c452343a3740c32bea2247d1da5a3f0e229791fe5ed8f1e4e8eb3a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4357,6 +4357,151 @@
|
|
| 4357 |
"EMA_steps_per_second": 25.401,
|
| 4358 |
"epoch": 195.65217391304347,
|
| 4359 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4360 |
}
|
| 4361 |
],
|
| 4362 |
"logging_steps": 10,
|
|
@@ -4376,7 +4521,7 @@
|
|
| 4376 |
"attributes": {}
|
| 4377 |
}
|
| 4378 |
},
|
| 4379 |
-
"total_flos": 1.
|
| 4380 |
"train_batch_size": 4,
|
| 4381 |
"trial_name": null,
|
| 4382 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 202.17391304347825,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4650,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4357 |
"EMA_steps_per_second": 25.401,
|
| 4358 |
"epoch": 195.65217391304347,
|
| 4359 |
"step": 4500
|
| 4360 |
+
},
|
| 4361 |
+
{
|
| 4362 |
+
"epoch": 196.08695652173913,
|
| 4363 |
+
"grad_norm": 2.102900981903076,
|
| 4364 |
+
"learning_rate": 1.5299080151617038e-06,
|
| 4365 |
+
"loss": 0.2645,
|
| 4366 |
+
"step": 4510
|
| 4367 |
+
},
|
| 4368 |
+
{
|
| 4369 |
+
"epoch": 196.52173913043478,
|
| 4370 |
+
"grad_norm": 2.1095998287200928,
|
| 4371 |
+
"learning_rate": 1.5299014962042341e-06,
|
| 4372 |
+
"loss": 0.2165,
|
| 4373 |
+
"step": 4520
|
| 4374 |
+
},
|
| 4375 |
+
{
|
| 4376 |
+
"epoch": 196.95652173913044,
|
| 4377 |
+
"grad_norm": 2.1897032260894775,
|
| 4378 |
+
"learning_rate": 1.5298946740592704e-06,
|
| 4379 |
+
"loss": 0.2351,
|
| 4380 |
+
"step": 4530
|
| 4381 |
+
},
|
| 4382 |
+
{
|
| 4383 |
+
"epoch": 197.3913043478261,
|
| 4384 |
+
"grad_norm": 1.8536592721939087,
|
| 4385 |
+
"learning_rate": 1.5298875487295168e-06,
|
| 4386 |
+
"loss": 0.2502,
|
| 4387 |
+
"step": 4540
|
| 4388 |
+
},
|
| 4389 |
+
{
|
| 4390 |
+
"epoch": 197.82608695652175,
|
| 4391 |
+
"grad_norm": 1.948095679283142,
|
| 4392 |
+
"learning_rate": 1.5298801202177976e-06,
|
| 4393 |
+
"loss": 0.2185,
|
| 4394 |
+
"step": 4550
|
| 4395 |
+
},
|
| 4396 |
+
{
|
| 4397 |
+
"epoch": 198.2608695652174,
|
| 4398 |
+
"grad_norm": 2.084287643432617,
|
| 4399 |
+
"learning_rate": 1.529872388527057e-06,
|
| 4400 |
+
"loss": 0.2297,
|
| 4401 |
+
"step": 4560
|
| 4402 |
+
},
|
| 4403 |
+
{
|
| 4404 |
+
"epoch": 198.69565217391303,
|
| 4405 |
+
"grad_norm": 1.5842596292495728,
|
| 4406 |
+
"learning_rate": 1.5298643536603602e-06,
|
| 4407 |
+
"loss": 0.2311,
|
| 4408 |
+
"step": 4570
|
| 4409 |
+
},
|
| 4410 |
+
{
|
| 4411 |
+
"epoch": 199.1304347826087,
|
| 4412 |
+
"grad_norm": 2.214616537094116,
|
| 4413 |
+
"learning_rate": 1.5298560156208912e-06,
|
| 4414 |
+
"loss": 0.1927,
|
| 4415 |
+
"step": 4580
|
| 4416 |
+
},
|
| 4417 |
+
{
|
| 4418 |
+
"epoch": 199.56521739130434,
|
| 4419 |
+
"grad_norm": 1.5991625785827637,
|
| 4420 |
+
"learning_rate": 1.5298473744119554e-06,
|
| 4421 |
+
"loss": 0.2538,
|
| 4422 |
+
"step": 4590
|
| 4423 |
+
},
|
| 4424 |
+
{
|
| 4425 |
+
"epoch": 200.0,
|
| 4426 |
+
"grad_norm": 3.232147455215454,
|
| 4427 |
+
"learning_rate": 1.5298384300369777e-06,
|
| 4428 |
+
"loss": 0.1878,
|
| 4429 |
+
"step": 4600
|
| 4430 |
+
},
|
| 4431 |
+
{
|
| 4432 |
+
"epoch": 200.43478260869566,
|
| 4433 |
+
"grad_norm": 1.8239914178848267,
|
| 4434 |
+
"learning_rate": 1.5298291824995035e-06,
|
| 4435 |
+
"loss": 0.2398,
|
| 4436 |
+
"step": 4610
|
| 4437 |
+
},
|
| 4438 |
+
{
|
| 4439 |
+
"epoch": 200.8695652173913,
|
| 4440 |
+
"grad_norm": 1.616271734237671,
|
| 4441 |
+
"learning_rate": 1.5298196318031983e-06,
|
| 4442 |
+
"loss": 0.2276,
|
| 4443 |
+
"step": 4620
|
| 4444 |
+
},
|
| 4445 |
+
{
|
| 4446 |
+
"epoch": 201.30434782608697,
|
| 4447 |
+
"grad_norm": 1.999419093132019,
|
| 4448 |
+
"learning_rate": 1.5298097779518473e-06,
|
| 4449 |
+
"loss": 0.2271,
|
| 4450 |
+
"step": 4630
|
| 4451 |
+
},
|
| 4452 |
+
{
|
| 4453 |
+
"epoch": 201.7391304347826,
|
| 4454 |
+
"grad_norm": 2.4747536182403564,
|
| 4455 |
+
"learning_rate": 1.5297996209493567e-06,
|
| 4456 |
+
"loss": 0.1894,
|
| 4457 |
+
"step": 4640
|
| 4458 |
+
},
|
| 4459 |
+
{
|
| 4460 |
+
"epoch": 202.17391304347825,
|
| 4461 |
+
"grad_norm": 1.546221137046814,
|
| 4462 |
+
"learning_rate": 1.5297891607997524e-06,
|
| 4463 |
+
"loss": 0.2233,
|
| 4464 |
+
"step": 4650
|
| 4465 |
+
},
|
| 4466 |
+
{
|
| 4467 |
+
"epoch": 202.17391304347825,
|
| 4468 |
+
"eval_loss": 0.9768635034561157,
|
| 4469 |
+
"eval_runtime": 0.3903,
|
| 4470 |
+
"eval_samples_per_second": 25.62,
|
| 4471 |
+
"eval_steps_per_second": 25.62,
|
| 4472 |
+
"step": 4650
|
| 4473 |
+
},
|
| 4474 |
+
{
|
| 4475 |
+
"Start_State_loss": 0.8609819412231445,
|
| 4476 |
+
"Start_State_runtime": 0.3895,
|
| 4477 |
+
"Start_State_samples_per_second": 25.672,
|
| 4478 |
+
"Start_State_steps_per_second": 25.672,
|
| 4479 |
+
"epoch": 202.17391304347825,
|
| 4480 |
+
"step": 4650
|
| 4481 |
+
},
|
| 4482 |
+
{
|
| 4483 |
+
"Raw_Model_loss": 0.9768635034561157,
|
| 4484 |
+
"Raw_Model_runtime": 0.3902,
|
| 4485 |
+
"Raw_Model_samples_per_second": 25.625,
|
| 4486 |
+
"Raw_Model_steps_per_second": 25.625,
|
| 4487 |
+
"epoch": 202.17391304347825,
|
| 4488 |
+
"step": 4650
|
| 4489 |
+
},
|
| 4490 |
+
{
|
| 4491 |
+
"SWA_loss": 0.8143197894096375,
|
| 4492 |
+
"SWA_runtime": 0.3924,
|
| 4493 |
+
"SWA_samples_per_second": 25.484,
|
| 4494 |
+
"SWA_steps_per_second": 25.484,
|
| 4495 |
+
"epoch": 202.17391304347825,
|
| 4496 |
+
"step": 4650
|
| 4497 |
+
},
|
| 4498 |
+
{
|
| 4499 |
+
"EMA_loss": 0.8591374158859253,
|
| 4500 |
+
"EMA_runtime": 0.3893,
|
| 4501 |
+
"EMA_samples_per_second": 25.688,
|
| 4502 |
+
"EMA_steps_per_second": 25.688,
|
| 4503 |
+
"epoch": 202.17391304347825,
|
| 4504 |
+
"step": 4650
|
| 4505 |
}
|
| 4506 |
],
|
| 4507 |
"logging_steps": 10,
|
|
|
|
| 4521 |
"attributes": {}
|
| 4522 |
}
|
| 4523 |
},
|
| 4524 |
+
"total_flos": 1.1967994202384794e+17,
|
| 4525 |
"train_batch_size": 4,
|
| 4526 |
"trial_name": null,
|
| 4527 |
"trial_params": null
|