Training in progress, epoch 3, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step3151/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3151/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3151/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3151/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3151/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a31cb8fd392f2131e4bf934e5cfc6c725cef9d5daaa800fec946c0c6bb095c87
|
| 3 |
size 98088784
|
last-checkpoint/global_step3151/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19fed4d1e68e884c33108eb4a6d195216062c7261df3be3f1284ee1dfa7f661b
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step3151/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe7c11b224504d4b7ce92dcd86eeac367b83fed77a084e0ca74be6e3f15c087c
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step3151/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70f041e6e4c2fcd5847cb26f89b7e393ed9654a02c6c3e1e64c356157c74568f
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step3151/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:864c9d0a4a6a6df9e106685e32c07abf56966577d2f2ac54cfcdbefffe092ac4
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step3151/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01fe20a677165e10008e664a981608bcd879f3dd7e16388edee6794e047fb6c8
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step3151
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff6d22752bf6918604c195c68c726c5f986752b1c7745bc126a895fa5378f563
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b418cf94798b0605942c9f00109a5edef50c6b4d7c3fa15f7ef19ea7a3218590
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ea4486e4c7872a12462ac87ce32aa65944ed660174e541883f84fbec5d08986
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f803476ab2d8569ffcfe385fec2e225f971e23d78b9f9bb3450d35d825e9ea35
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49b27670a16b804d5788140685f115d1185aa03128bfff2551a4cdbedd7bc74b
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 3.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4844,6 +4844,84 @@
|
|
| 4844 |
"eval_samples_per_second": 127.13,
|
| 4845 |
"eval_steps_per_second": 15.899,
|
| 4846 |
"step": 3100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4847 |
}
|
| 4848 |
],
|
| 4849 |
"logging_steps": 5,
|
|
@@ -4872,7 +4950,7 @@
|
|
| 4872 |
"attributes": {}
|
| 4873 |
}
|
| 4874 |
},
|
| 4875 |
-
"total_flos": 1.
|
| 4876 |
"train_batch_size": 2,
|
| 4877 |
"trial_name": null,
|
| 4878 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6195825934410095,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0881812614819353,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 3150,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4844 |
"eval_samples_per_second": 127.13,
|
| 4845 |
"eval_steps_per_second": 15.899,
|
| 4846 |
"step": 3100
|
| 4847 |
+
},
|
| 4848 |
+
{
|
| 4849 |
+
"epoch": 3.0440906307409676,
|
| 4850 |
+
"grad_norm": 0.20413470268249512,
|
| 4851 |
+
"learning_rate": 3.6132616490418216e-05,
|
| 4852 |
+
"loss": 0.652,
|
| 4853 |
+
"step": 3105
|
| 4854 |
+
},
|
| 4855 |
+
{
|
| 4856 |
+
"epoch": 3.0489895897121864,
|
| 4857 |
+
"grad_norm": 0.23027652502059937,
|
| 4858 |
+
"learning_rate": 3.5966209323531355e-05,
|
| 4859 |
+
"loss": 0.6431,
|
| 4860 |
+
"step": 3110
|
| 4861 |
+
},
|
| 4862 |
+
{
|
| 4863 |
+
"epoch": 3.0538885486834046,
|
| 4864 |
+
"grad_norm": 0.24030309915542603,
|
| 4865 |
+
"learning_rate": 3.5800001854923e-05,
|
| 4866 |
+
"loss": 0.6301,
|
| 4867 |
+
"step": 3115
|
| 4868 |
+
},
|
| 4869 |
+
{
|
| 4870 |
+
"epoch": 3.0587875076546234,
|
| 4871 |
+
"grad_norm": 0.24044173955917358,
|
| 4872 |
+
"learning_rate": 3.563399579263203e-05,
|
| 4873 |
+
"loss": 0.6547,
|
| 4874 |
+
"step": 3120
|
| 4875 |
+
},
|
| 4876 |
+
{
|
| 4877 |
+
"epoch": 3.063686466625842,
|
| 4878 |
+
"grad_norm": 0.2295493483543396,
|
| 4879 |
+
"learning_rate": 3.546819284262758e-05,
|
| 4880 |
+
"loss": 0.6483,
|
| 4881 |
+
"step": 3125
|
| 4882 |
+
},
|
| 4883 |
+
{
|
| 4884 |
+
"epoch": 3.068585425597061,
|
| 4885 |
+
"grad_norm": 0.2551316022872925,
|
| 4886 |
+
"learning_rate": 3.53025947087915e-05,
|
| 4887 |
+
"loss": 0.6436,
|
| 4888 |
+
"step": 3130
|
| 4889 |
+
},
|
| 4890 |
+
{
|
| 4891 |
+
"epoch": 3.073484384568279,
|
| 4892 |
+
"grad_norm": 0.22237151861190796,
|
| 4893 |
+
"learning_rate": 3.513720309290082e-05,
|
| 4894 |
+
"loss": 0.638,
|
| 4895 |
+
"step": 3135
|
| 4896 |
+
},
|
| 4897 |
+
{
|
| 4898 |
+
"epoch": 3.078383343539498,
|
| 4899 |
+
"grad_norm": 0.20863863825798035,
|
| 4900 |
+
"learning_rate": 3.497201969461032e-05,
|
| 4901 |
+
"loss": 0.6661,
|
| 4902 |
+
"step": 3140
|
| 4903 |
+
},
|
| 4904 |
+
{
|
| 4905 |
+
"epoch": 3.0832823025107166,
|
| 4906 |
+
"grad_norm": 0.25886034965515137,
|
| 4907 |
+
"learning_rate": 3.480704621143496e-05,
|
| 4908 |
+
"loss": 0.6538,
|
| 4909 |
+
"step": 3145
|
| 4910 |
+
},
|
| 4911 |
+
{
|
| 4912 |
+
"epoch": 3.0881812614819353,
|
| 4913 |
+
"grad_norm": 0.24331150949001312,
|
| 4914 |
+
"learning_rate": 3.464228433873255e-05,
|
| 4915 |
+
"loss": 0.6569,
|
| 4916 |
+
"step": 3150
|
| 4917 |
+
},
|
| 4918 |
+
{
|
| 4919 |
+
"epoch": 3.0881812614819353,
|
| 4920 |
+
"eval_loss": 0.6195825934410095,
|
| 4921 |
+
"eval_runtime": 15.4247,
|
| 4922 |
+
"eval_samples_per_second": 127.004,
|
| 4923 |
+
"eval_steps_per_second": 15.884,
|
| 4924 |
+
"step": 3150
|
| 4925 |
}
|
| 4926 |
],
|
| 4927 |
"logging_steps": 5,
|
|
|
|
| 4950 |
"attributes": {}
|
| 4951 |
}
|
| 4952 |
},
|
| 4953 |
+
"total_flos": 1.6246141644313723e+18,
|
| 4954 |
"train_batch_size": 2,
|
| 4955 |
"trial_name": null,
|
| 4956 |
"trial_params": null
|