Training in progress, epoch 78, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559424792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21c9f69a173a892667d6dee7aaad371f40e7baf8cb3e9d969f1b6dcde4a515f1
|
| 3 |
size 559424792
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1118926970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b013c178fe9fdbc20f357d276fbc376ffeb5ae200d4656a25c0840d60905025
|
| 3 |
size 1118926970
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:325b7db62095a95388ccc4086fa74938caa1cf51efa7e26a65d55b88f9c20149
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43652567e6245845fba84f7f23bb5bb43e6dfe5f4e79e8b80a87c4194eca0dd2
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 12.716951370239258,
|
| 3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -7651,6 +7651,105 @@
|
|
| 7651 |
"eval_samples_per_second": 29.542,
|
| 7652 |
"eval_steps_per_second": 3.712,
|
| 7653 |
"step": 100485
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7654 |
}
|
| 7655 |
],
|
| 7656 |
"logging_steps": 100,
|
|
@@ -7665,7 +7764,7 @@
|
|
| 7665 |
"early_stopping_threshold": 0.0
|
| 7666 |
},
|
| 7667 |
"attributes": {
|
| 7668 |
-
"early_stopping_patience_counter":
|
| 7669 |
}
|
| 7670 |
},
|
| 7671 |
"TrainerControl": {
|
|
@@ -7674,12 +7773,12 @@
|
|
| 7674 |
"should_evaluate": false,
|
| 7675 |
"should_log": false,
|
| 7676 |
"should_save": true,
|
| 7677 |
-
"should_training_stop":
|
| 7678 |
},
|
| 7679 |
"attributes": {}
|
| 7680 |
}
|
| 7681 |
},
|
| 7682 |
-
"total_flos": 4.
|
| 7683 |
"train_batch_size": 8,
|
| 7684 |
"trial_name": null,
|
| 7685 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 12.716951370239258,
|
| 3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
| 4 |
+
"epoch": 78.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 101790,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 7651 |
"eval_samples_per_second": 29.542,
|
| 7652 |
"eval_steps_per_second": 3.712,
|
| 7653 |
"step": 100485
|
| 7654 |
+
},
|
| 7655 |
+
{
|
| 7656 |
+
"epoch": 77.01149425287356,
|
| 7657 |
+
"grad_norm": 1.5276216268539429,
|
| 7658 |
+
"learning_rate": 1.8869731800766285e-06,
|
| 7659 |
+
"loss": 12.0855,
|
| 7660 |
+
"step": 100500
|
| 7661 |
+
},
|
| 7662 |
+
{
|
| 7663 |
+
"epoch": 77.08812260536398,
|
| 7664 |
+
"grad_norm": 1.6150214672088623,
|
| 7665 |
+
"learning_rate": 1.839080459770115e-06,
|
| 7666 |
+
"loss": 12.0257,
|
| 7667 |
+
"step": 100600
|
| 7668 |
+
},
|
| 7669 |
+
{
|
| 7670 |
+
"epoch": 77.16475095785441,
|
| 7671 |
+
"grad_norm": 1.33854341506958,
|
| 7672 |
+
"learning_rate": 1.7911877394636015e-06,
|
| 7673 |
+
"loss": 11.8612,
|
| 7674 |
+
"step": 100700
|
| 7675 |
+
},
|
| 7676 |
+
{
|
| 7677 |
+
"epoch": 77.24137931034483,
|
| 7678 |
+
"grad_norm": 3.1214921474456787,
|
| 7679 |
+
"learning_rate": 1.7432950191570881e-06,
|
| 7680 |
+
"loss": 11.8497,
|
| 7681 |
+
"step": 100800
|
| 7682 |
+
},
|
| 7683 |
+
{
|
| 7684 |
+
"epoch": 77.31800766283524,
|
| 7685 |
+
"grad_norm": 1.6240577697753906,
|
| 7686 |
+
"learning_rate": 1.695402298850575e-06,
|
| 7687 |
+
"loss": 11.7641,
|
| 7688 |
+
"step": 100900
|
| 7689 |
+
},
|
| 7690 |
+
{
|
| 7691 |
+
"epoch": 77.39463601532567,
|
| 7692 |
+
"grad_norm": 1.1317625045776367,
|
| 7693 |
+
"learning_rate": 1.6475095785440615e-06,
|
| 7694 |
+
"loss": 11.846,
|
| 7695 |
+
"step": 101000
|
| 7696 |
+
},
|
| 7697 |
+
{
|
| 7698 |
+
"epoch": 77.47126436781609,
|
| 7699 |
+
"grad_norm": 1.294534683227539,
|
| 7700 |
+
"learning_rate": 1.599616858237548e-06,
|
| 7701 |
+
"loss": 11.7364,
|
| 7702 |
+
"step": 101100
|
| 7703 |
+
},
|
| 7704 |
+
{
|
| 7705 |
+
"epoch": 77.5478927203065,
|
| 7706 |
+
"grad_norm": 1.099564790725708,
|
| 7707 |
+
"learning_rate": 1.5517241379310346e-06,
|
| 7708 |
+
"loss": 11.8554,
|
| 7709 |
+
"step": 101200
|
| 7710 |
+
},
|
| 7711 |
+
{
|
| 7712 |
+
"epoch": 77.62452107279694,
|
| 7713 |
+
"grad_norm": 1.1482292413711548,
|
| 7714 |
+
"learning_rate": 1.503831417624521e-06,
|
| 7715 |
+
"loss": 11.9346,
|
| 7716 |
+
"step": 101300
|
| 7717 |
+
},
|
| 7718 |
+
{
|
| 7719 |
+
"epoch": 77.70114942528735,
|
| 7720 |
+
"grad_norm": 1.198670506477356,
|
| 7721 |
+
"learning_rate": 1.4559386973180077e-06,
|
| 7722 |
+
"loss": 11.8685,
|
| 7723 |
+
"step": 101400
|
| 7724 |
+
},
|
| 7725 |
+
{
|
| 7726 |
+
"epoch": 77.77777777777777,
|
| 7727 |
+
"grad_norm": 2.162407159805298,
|
| 7728 |
+
"learning_rate": 1.4080459770114944e-06,
|
| 7729 |
+
"loss": 11.8622,
|
| 7730 |
+
"step": 101500
|
| 7731 |
+
},
|
| 7732 |
+
{
|
| 7733 |
+
"epoch": 77.8544061302682,
|
| 7734 |
+
"grad_norm": 1.3662519454956055,
|
| 7735 |
+
"learning_rate": 1.3601532567049808e-06,
|
| 7736 |
+
"loss": 11.9364,
|
| 7737 |
+
"step": 101600
|
| 7738 |
+
},
|
| 7739 |
+
{
|
| 7740 |
+
"epoch": 77.93103448275862,
|
| 7741 |
+
"grad_norm": 1.1566288471221924,
|
| 7742 |
+
"learning_rate": 1.3122605363984675e-06,
|
| 7743 |
+
"loss": 11.7345,
|
| 7744 |
+
"step": 101700
|
| 7745 |
+
},
|
| 7746 |
+
{
|
| 7747 |
+
"epoch": 78.0,
|
| 7748 |
+
"eval_loss": 12.726273536682129,
|
| 7749 |
+
"eval_runtime": 44.1782,
|
| 7750 |
+
"eval_samples_per_second": 29.539,
|
| 7751 |
+
"eval_steps_per_second": 3.712,
|
| 7752 |
+
"step": 101790
|
| 7753 |
}
|
| 7754 |
],
|
| 7755 |
"logging_steps": 100,
|
|
|
|
| 7764 |
"early_stopping_threshold": 0.0
|
| 7765 |
},
|
| 7766 |
"attributes": {
|
| 7767 |
+
"early_stopping_patience_counter": 10
|
| 7768 |
}
|
| 7769 |
},
|
| 7770 |
"TrainerControl": {
|
|
|
|
| 7773 |
"should_evaluate": false,
|
| 7774 |
"should_log": false,
|
| 7775 |
"should_save": true,
|
| 7776 |
+
"should_training_stop": true
|
| 7777 |
},
|
| 7778 |
"attributes": {}
|
| 7779 |
}
|
| 7780 |
},
|
| 7781 |
+
"total_flos": 4.742440753614336e+16,
|
| 7782 |
"train_batch_size": 8,
|
| 7783 |
"trial_name": null,
|
| 7784 |
"trial_params": null
|