Training in progress, epoch 8, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step3750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3750/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 515926240
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:047ccf1116a27aef838b2cf5df06c84e4bb47355dadf04bbc7c769c648c695a1
|
| 3 |
size 515926240
|
last-checkpoint/global_step3750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b00fcb4ad728e6db2e89317543a69ff85e21e32f4dee2b06bd7b6a430f5d14f5
|
| 3 |
+
size 388949733
|
last-checkpoint/global_step3750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5169d2293afafcbaf979e1dd49480bfd6c4e0d6f20b335853e598bca09cd960c
|
| 3 |
+
size 388949797
|
last-checkpoint/global_step3750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecafa13a717e6ff9e3063143aa1ed4e5d14ace6573e8c9bb5789db122bcd5d64
|
| 3 |
+
size 388949797
|
last-checkpoint/global_step3750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:442c8069b48d7cc14e8386947976f9cff945d8ea06ed46ad502dc307465626d3
|
| 3 |
+
size 388949797
|
last-checkpoint/global_step3750/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edc16391daa13a5806f041f469a67b9568f43248f9b6a0589ed7b8453b261f48
|
| 3 |
+
size 982830693
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step3750
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:299c5961b9e338d8364f29ced7a7fb3e75f50e59821688a9e8a25b3194dbb538
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f719324819b1361b02477a54a0118529d0dd0ce09bbe595cfcd2887f984dce1b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcbfd45a4fefb05cc24a437f3cd43b41e4d635d02a5f6cceac023d5107bddf30
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d63c70c685f3c487dd218b7cdb050acfe8f45710a539363f79ab3d74af9dbe6
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d61bfc2f268866ccd39b04a4ca0145d716056cc446969bdbb0838734542c317
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 8.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5780,6 +5780,84 @@
|
|
| 5780 |
"eval_samples_per_second": 102.703,
|
| 5781 |
"eval_steps_per_second": 12.883,
|
| 5782 |
"step": 3700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5783 |
}
|
| 5784 |
],
|
| 5785 |
"logging_steps": 5,
|
|
@@ -5808,7 +5886,7 @@
|
|
| 5808 |
"attributes": {}
|
| 5809 |
}
|
| 5810 |
},
|
| 5811 |
-
"total_flos": 1.
|
| 5812 |
"train_batch_size": 2,
|
| 5813 |
"trial_name": null,
|
| 5814 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.7094199061393738,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 8.351893095768375,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 3750,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5780 |
"eval_samples_per_second": 102.703,
|
| 5781 |
"eval_steps_per_second": 12.883,
|
| 5782 |
"step": 3700
|
| 5783 |
+
},
|
| 5784 |
+
{
|
| 5785 |
+
"epoch": 8.251670378619155,
|
| 5786 |
+
"grad_norm": 1.1108819246292114,
|
| 5787 |
+
"learning_rate": 1.218548828243257e-05,
|
| 5788 |
+
"loss": 0.6861,
|
| 5789 |
+
"step": 3705
|
| 5790 |
+
},
|
| 5791 |
+
{
|
| 5792 |
+
"epoch": 8.262806236080179,
|
| 5793 |
+
"grad_norm": 1.2709901332855225,
|
| 5794 |
+
"learning_rate": 1.2173041072641407e-05,
|
| 5795 |
+
"loss": 0.6582,
|
| 5796 |
+
"step": 3710
|
| 5797 |
+
},
|
| 5798 |
+
{
|
| 5799 |
+
"epoch": 8.273942093541203,
|
| 5800 |
+
"grad_norm": 1.2328144311904907,
|
| 5801 |
+
"learning_rate": 1.2160584908401693e-05,
|
| 5802 |
+
"loss": 0.7973,
|
| 5803 |
+
"step": 3715
|
| 5804 |
+
},
|
| 5805 |
+
{
|
| 5806 |
+
"epoch": 8.285077951002227,
|
| 5807 |
+
"grad_norm": 1.4351321458816528,
|
| 5808 |
+
"learning_rate": 1.2148119821071803e-05,
|
| 5809 |
+
"loss": 0.7226,
|
| 5810 |
+
"step": 3720
|
| 5811 |
+
},
|
| 5812 |
+
{
|
| 5813 |
+
"epoch": 8.296213808463252,
|
| 5814 |
+
"grad_norm": 1.3934727907180786,
|
| 5815 |
+
"learning_rate": 1.2135645842032582e-05,
|
| 5816 |
+
"loss": 0.6999,
|
| 5817 |
+
"step": 3725
|
| 5818 |
+
},
|
| 5819 |
+
{
|
| 5820 |
+
"epoch": 8.307349665924276,
|
| 5821 |
+
"grad_norm": 1.5040556192398071,
|
| 5822 |
+
"learning_rate": 1.2123163002687258e-05,
|
| 5823 |
+
"loss": 0.7128,
|
| 5824 |
+
"step": 3730
|
| 5825 |
+
},
|
| 5826 |
+
{
|
| 5827 |
+
"epoch": 8.3184855233853,
|
| 5828 |
+
"grad_norm": 1.6388317346572876,
|
| 5829 |
+
"learning_rate": 1.2110671334461362e-05,
|
| 5830 |
+
"loss": 0.7352,
|
| 5831 |
+
"step": 3735
|
| 5832 |
+
},
|
| 5833 |
+
{
|
| 5834 |
+
"epoch": 8.329621380846325,
|
| 5835 |
+
"grad_norm": 1.1943713426589966,
|
| 5836 |
+
"learning_rate": 1.2098170868802653e-05,
|
| 5837 |
+
"loss": 0.7112,
|
| 5838 |
+
"step": 3740
|
| 5839 |
+
},
|
| 5840 |
+
{
|
| 5841 |
+
"epoch": 8.340757238307349,
|
| 5842 |
+
"grad_norm": 1.1306072473526,
|
| 5843 |
+
"learning_rate": 1.208566163718104e-05,
|
| 5844 |
+
"loss": 0.6745,
|
| 5845 |
+
"step": 3745
|
| 5846 |
+
},
|
| 5847 |
+
{
|
| 5848 |
+
"epoch": 8.351893095768375,
|
| 5849 |
+
"grad_norm": 1.227961540222168,
|
| 5850 |
+
"learning_rate": 1.2073143671088499e-05,
|
| 5851 |
+
"loss": 0.7776,
|
| 5852 |
+
"step": 3750
|
| 5853 |
+
},
|
| 5854 |
+
{
|
| 5855 |
+
"epoch": 8.351893095768375,
|
| 5856 |
+
"eval_loss": 0.7094199061393738,
|
| 5857 |
+
"eval_runtime": 8.4253,
|
| 5858 |
+
"eval_samples_per_second": 102.193,
|
| 5859 |
+
"eval_steps_per_second": 12.819,
|
| 5860 |
+
"step": 3750
|
| 5861 |
}
|
| 5862 |
],
|
| 5863 |
"logging_steps": 5,
|
|
|
|
| 5886 |
"attributes": {}
|
| 5887 |
}
|
| 5888 |
},
|
| 5889 |
+
"total_flos": 1.688518153497215e+18,
|
| 5890 |
"train_batch_size": 2,
|
| 5891 |
"trial_name": null,
|
| 5892 |
"trial_params": null
|