Training in progress, step 4800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73320e76344a133855f71d58f5599b5bab5bec21149e32ab22ea8639c81b6efa
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df0f0d4523a1748c8d67b29c00bcdc95ba64d1d0d15e03f3aaf492af944d8a42
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:790c395b0a5037d7be1e3d6aec472475bfb03f273b97c2103b3eaeeee641cbe6
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0301af88bd95ce6b3924705dcc39f92acccc19dd6a0525d5021e46ffe9ebde47
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4502,6 +4502,151 @@
|
|
| 4502 |
"EMA_steps_per_second": 25.688,
|
| 4503 |
"epoch": 202.17391304347825,
|
| 4504 |
"step": 4650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4505 |
}
|
| 4506 |
],
|
| 4507 |
"logging_steps": 10,
|
|
@@ -4521,7 +4666,7 @@
|
|
| 4521 |
"attributes": {}
|
| 4522 |
}
|
| 4523 |
},
|
| 4524 |
-
"total_flos": 1.
|
| 4525 |
"train_batch_size": 4,
|
| 4526 |
"trial_name": null,
|
| 4527 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 208.69565217391303,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4502 |
"EMA_steps_per_second": 25.688,
|
| 4503 |
"epoch": 202.17391304347825,
|
| 4504 |
"step": 4650
|
| 4505 |
+
},
|
| 4506 |
+
{
|
| 4507 |
+
"epoch": 202.6086956521739,
|
| 4508 |
+
"grad_norm": 2.3335537910461426,
|
| 4509 |
+
"learning_rate": 1.5297783975071799e-06,
|
| 4510 |
+
"loss": 0.2124,
|
| 4511 |
+
"step": 4660
|
| 4512 |
+
},
|
| 4513 |
+
{
|
| 4514 |
+
"epoch": 203.04347826086956,
|
| 4515 |
+
"grad_norm": 2.041670560836792,
|
| 4516 |
+
"learning_rate": 1.529767331075906e-06,
|
| 4517 |
+
"loss": 0.282,
|
| 4518 |
+
"step": 4670
|
| 4519 |
+
},
|
| 4520 |
+
{
|
| 4521 |
+
"epoch": 203.47826086956522,
|
| 4522 |
+
"grad_norm": 2.462006092071533,
|
| 4523 |
+
"learning_rate": 1.529755961510317e-06,
|
| 4524 |
+
"loss": 0.2352,
|
| 4525 |
+
"step": 4680
|
| 4526 |
+
},
|
| 4527 |
+
{
|
| 4528 |
+
"epoch": 203.91304347826087,
|
| 4529 |
+
"grad_norm": 1.7050849199295044,
|
| 4530 |
+
"learning_rate": 1.5297442888149193e-06,
|
| 4531 |
+
"loss": 0.2125,
|
| 4532 |
+
"step": 4690
|
| 4533 |
+
},
|
| 4534 |
+
{
|
| 4535 |
+
"epoch": 204.34782608695653,
|
| 4536 |
+
"grad_norm": 2.558677911758423,
|
| 4537 |
+
"learning_rate": 1.5297323129943396e-06,
|
| 4538 |
+
"loss": 0.2971,
|
| 4539 |
+
"step": 4700
|
| 4540 |
+
},
|
| 4541 |
+
{
|
| 4542 |
+
"epoch": 204.7826086956522,
|
| 4543 |
+
"grad_norm": 1.8304595947265625,
|
| 4544 |
+
"learning_rate": 1.5297200340533247e-06,
|
| 4545 |
+
"loss": 0.1943,
|
| 4546 |
+
"step": 4710
|
| 4547 |
+
},
|
| 4548 |
+
{
|
| 4549 |
+
"epoch": 205.2173913043478,
|
| 4550 |
+
"grad_norm": 1.568945050239563,
|
| 4551 |
+
"learning_rate": 1.5297074519967415e-06,
|
| 4552 |
+
"loss": 0.1988,
|
| 4553 |
+
"step": 4720
|
| 4554 |
+
},
|
| 4555 |
+
{
|
| 4556 |
+
"epoch": 205.65217391304347,
|
| 4557 |
+
"grad_norm": 2.6844093799591064,
|
| 4558 |
+
"learning_rate": 1.5296945668295776e-06,
|
| 4559 |
+
"loss": 0.2073,
|
| 4560 |
+
"step": 4730
|
| 4561 |
+
},
|
| 4562 |
+
{
|
| 4563 |
+
"epoch": 206.08695652173913,
|
| 4564 |
+
"grad_norm": 2.8607003688812256,
|
| 4565 |
+
"learning_rate": 1.5296813785569398e-06,
|
| 4566 |
+
"loss": 0.2542,
|
| 4567 |
+
"step": 4740
|
| 4568 |
+
},
|
| 4569 |
+
{
|
| 4570 |
+
"epoch": 206.52173913043478,
|
| 4571 |
+
"grad_norm": 2.7412221431732178,
|
| 4572 |
+
"learning_rate": 1.5296678871840554e-06,
|
| 4573 |
+
"loss": 0.2103,
|
| 4574 |
+
"step": 4750
|
| 4575 |
+
},
|
| 4576 |
+
{
|
| 4577 |
+
"epoch": 206.95652173913044,
|
| 4578 |
+
"grad_norm": 2.278228759765625,
|
| 4579 |
+
"learning_rate": 1.5296540927162723e-06,
|
| 4580 |
+
"loss": 0.2414,
|
| 4581 |
+
"step": 4760
|
| 4582 |
+
},
|
| 4583 |
+
{
|
| 4584 |
+
"epoch": 207.3913043478261,
|
| 4585 |
+
"grad_norm": 2.021712064743042,
|
| 4586 |
+
"learning_rate": 1.5296399951590582e-06,
|
| 4587 |
+
"loss": 0.2272,
|
| 4588 |
+
"step": 4770
|
| 4589 |
+
},
|
| 4590 |
+
{
|
| 4591 |
+
"epoch": 207.82608695652175,
|
| 4592 |
+
"grad_norm": 2.287015438079834,
|
| 4593 |
+
"learning_rate": 1.5296255945180007e-06,
|
| 4594 |
+
"loss": 0.2046,
|
| 4595 |
+
"step": 4780
|
| 4596 |
+
},
|
| 4597 |
+
{
|
| 4598 |
+
"epoch": 208.2608695652174,
|
| 4599 |
+
"grad_norm": 2.0657951831817627,
|
| 4600 |
+
"learning_rate": 1.5296108907988078e-06,
|
| 4601 |
+
"loss": 0.2052,
|
| 4602 |
+
"step": 4790
|
| 4603 |
+
},
|
| 4604 |
+
{
|
| 4605 |
+
"epoch": 208.69565217391303,
|
| 4606 |
+
"grad_norm": 2.087261915206909,
|
| 4607 |
+
"learning_rate": 1.529595884007308e-06,
|
| 4608 |
+
"loss": 0.2538,
|
| 4609 |
+
"step": 4800
|
| 4610 |
+
},
|
| 4611 |
+
{
|
| 4612 |
+
"epoch": 208.69565217391303,
|
| 4613 |
+
"eval_loss": 0.9772452116012573,
|
| 4614 |
+
"eval_runtime": 0.4827,
|
| 4615 |
+
"eval_samples_per_second": 20.715,
|
| 4616 |
+
"eval_steps_per_second": 20.715,
|
| 4617 |
+
"step": 4800
|
| 4618 |
+
},
|
| 4619 |
+
{
|
| 4620 |
+
"Start_State_loss": 0.8609819412231445,
|
| 4621 |
+
"Start_State_runtime": 0.4411,
|
| 4622 |
+
"Start_State_samples_per_second": 22.671,
|
| 4623 |
+
"Start_State_steps_per_second": 22.671,
|
| 4624 |
+
"epoch": 208.69565217391303,
|
| 4625 |
+
"step": 4800
|
| 4626 |
+
},
|
| 4627 |
+
{
|
| 4628 |
+
"Raw_Model_loss": 0.9772452116012573,
|
| 4629 |
+
"Raw_Model_runtime": 0.4433,
|
| 4630 |
+
"Raw_Model_samples_per_second": 22.56,
|
| 4631 |
+
"Raw_Model_steps_per_second": 22.56,
|
| 4632 |
+
"epoch": 208.69565217391303,
|
| 4633 |
+
"step": 4800
|
| 4634 |
+
},
|
| 4635 |
+
{
|
| 4636 |
+
"SWA_loss": 0.8191676139831543,
|
| 4637 |
+
"SWA_runtime": 0.4674,
|
| 4638 |
+
"SWA_samples_per_second": 21.394,
|
| 4639 |
+
"SWA_steps_per_second": 21.394,
|
| 4640 |
+
"epoch": 208.69565217391303,
|
| 4641 |
+
"step": 4800
|
| 4642 |
+
},
|
| 4643 |
+
{
|
| 4644 |
+
"EMA_loss": 0.8595923185348511,
|
| 4645 |
+
"EMA_runtime": 0.4649,
|
| 4646 |
+
"EMA_samples_per_second": 21.511,
|
| 4647 |
+
"EMA_steps_per_second": 21.511,
|
| 4648 |
+
"epoch": 208.69565217391303,
|
| 4649 |
+
"step": 4800
|
| 4650 |
}
|
| 4651 |
],
|
| 4652 |
"logging_steps": 10,
|
|
|
|
| 4666 |
"attributes": {}
|
| 4667 |
}
|
| 4668 |
},
|
| 4669 |
+
"total_flos": 1.2352177659543552e+17,
|
| 4670 |
"train_batch_size": 4,
|
| 4671 |
"trial_name": null,
|
| 4672 |
"trial_params": null
|