Training in progress, step 24500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a24784d7d4d1fc3fbb8fc67b836bb1c712f3f7b0c64c99ff1439b5fe2a051a5b
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:970bf634ac4ebe2edc929d89a76708d0db104aba94f7c07086ba7355e47fc214
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21c4e82da06fbd2a474e3defc7564dd624a46c9abb731c268ce51609d77b1972
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a72d6c77b06746b972f475e80b7808db62786b79e89637fe4fa97684c444ab
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3751,6 +3751,84 @@
|
|
| 3751 |
"eval_samples_per_second": 22.717,
|
| 3752 |
"eval_steps_per_second": 5.679,
|
| 3753 |
"step": 24000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3754 |
}
|
| 3755 |
],
|
| 3756 |
"logging_steps": 50,
|
|
@@ -3770,7 +3848,7 @@
|
|
| 3770 |
"attributes": {}
|
| 3771 |
}
|
| 3772 |
},
|
| 3773 |
-
"total_flos": 5.
|
| 3774 |
"train_batch_size": 4,
|
| 3775 |
"trial_name": null,
|
| 3776 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.08147666603326797,
|
| 3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-24500",
|
| 4 |
+
"epoch": 1.96,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 24500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3751 |
"eval_samples_per_second": 22.717,
|
| 3752 |
"eval_steps_per_second": 5.679,
|
| 3753 |
"step": 24000
|
| 3754 |
+
},
|
| 3755 |
+
{
|
| 3756 |
+
"epoch": 1.924,
|
| 3757 |
+
"grad_norm": 0.18958355486392975,
|
| 3758 |
+
"learning_rate": 1.1496e-06,
|
| 3759 |
+
"loss": 0.054,
|
| 3760 |
+
"step": 24050
|
| 3761 |
+
},
|
| 3762 |
+
{
|
| 3763 |
+
"epoch": 1.928,
|
| 3764 |
+
"grad_norm": 0.12351219356060028,
|
| 3765 |
+
"learning_rate": 1.0896e-06,
|
| 3766 |
+
"loss": 0.0614,
|
| 3767 |
+
"step": 24100
|
| 3768 |
+
},
|
| 3769 |
+
{
|
| 3770 |
+
"epoch": 1.932,
|
| 3771 |
+
"grad_norm": 0.175857812166214,
|
| 3772 |
+
"learning_rate": 1.0296e-06,
|
| 3773 |
+
"loss": 0.0541,
|
| 3774 |
+
"step": 24150
|
| 3775 |
+
},
|
| 3776 |
+
{
|
| 3777 |
+
"epoch": 1.936,
|
| 3778 |
+
"grad_norm": 0.15968987345695496,
|
| 3779 |
+
"learning_rate": 9.696e-07,
|
| 3780 |
+
"loss": 0.0568,
|
| 3781 |
+
"step": 24200
|
| 3782 |
+
},
|
| 3783 |
+
{
|
| 3784 |
+
"epoch": 1.94,
|
| 3785 |
+
"grad_norm": 0.15781116485595703,
|
| 3786 |
+
"learning_rate": 9.096e-07,
|
| 3787 |
+
"loss": 0.055,
|
| 3788 |
+
"step": 24250
|
| 3789 |
+
},
|
| 3790 |
+
{
|
| 3791 |
+
"epoch": 1.944,
|
| 3792 |
+
"grad_norm": 0.17276370525360107,
|
| 3793 |
+
"learning_rate": 8.496000000000001e-07,
|
| 3794 |
+
"loss": 0.0475,
|
| 3795 |
+
"step": 24300
|
| 3796 |
+
},
|
| 3797 |
+
{
|
| 3798 |
+
"epoch": 1.948,
|
| 3799 |
+
"grad_norm": 0.09511862695217133,
|
| 3800 |
+
"learning_rate": 7.896e-07,
|
| 3801 |
+
"loss": 0.0509,
|
| 3802 |
+
"step": 24350
|
| 3803 |
+
},
|
| 3804 |
+
{
|
| 3805 |
+
"epoch": 1.952,
|
| 3806 |
+
"grad_norm": 0.1636885106563568,
|
| 3807 |
+
"learning_rate": 7.296000000000001e-07,
|
| 3808 |
+
"loss": 0.0508,
|
| 3809 |
+
"step": 24400
|
| 3810 |
+
},
|
| 3811 |
+
{
|
| 3812 |
+
"epoch": 1.956,
|
| 3813 |
+
"grad_norm": 0.12255977094173431,
|
| 3814 |
+
"learning_rate": 6.696e-07,
|
| 3815 |
+
"loss": 0.0467,
|
| 3816 |
+
"step": 24450
|
| 3817 |
+
},
|
| 3818 |
+
{
|
| 3819 |
+
"epoch": 1.96,
|
| 3820 |
+
"grad_norm": 0.18492284417152405,
|
| 3821 |
+
"learning_rate": 6.096000000000001e-07,
|
| 3822 |
+
"loss": 0.0559,
|
| 3823 |
+
"step": 24500
|
| 3824 |
+
},
|
| 3825 |
+
{
|
| 3826 |
+
"epoch": 1.96,
|
| 3827 |
+
"eval_loss": 0.08147666603326797,
|
| 3828 |
+
"eval_runtime": 88.0584,
|
| 3829 |
+
"eval_samples_per_second": 22.712,
|
| 3830 |
+
"eval_steps_per_second": 5.678,
|
| 3831 |
+
"step": 24500
|
| 3832 |
}
|
| 3833 |
],
|
| 3834 |
"logging_steps": 50,
|
|
|
|
| 3848 |
"attributes": {}
|
| 3849 |
}
|
| 3850 |
},
|
| 3851 |
+
"total_flos": 5.967787327488e+16,
|
| 3852 |
"train_batch_size": 4,
|
| 3853 |
"trial_name": null,
|
| 3854 |
"trial_params": null
|