Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2adcc2abd2157fb60ae9fb647b506419ffcf749d96d577540797eb95581326fa
|
| 3 |
size 98088784
|
last-checkpoint/global_step2500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:442f4c30ec475090cd43f553a9f68a16138c3c60ff13adcc8dfb1a367ebc6600
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:951d8778340da268ef236cb8a9a23d9d6623d4fe1f188a61fbe08f9098254ed3
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70a4143fcbe528553de8b626a9a55ab08a186726536821f8d0397e74143dcc38
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cba209f5c515db450266bb3a7413d4377e882192a8de72815283904b18e5edf8
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2500/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72f249fa57c54d60decf5afeb96a3baa12044d9c4ac4799cfb6858f33b1839eb
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2500
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03622e737b0b8bc31e0961f16fb24ed1f36ca068fbf92172955016d36744ba8f
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8631d6a3a14a1d8ffe0ecd6bcdf565bac306a775dbc9ee116e9cb0ae92390769
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a6ff2489fd57ce8b4c3e4b6a15142a4393a7355a3116f159e4e45618f906165
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:929048071bdf5eb3856c325b4b779be5c26afd2cebae2821e0d54a31b74a2ad1
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b9eaed5707858cfa6cbc4cad23f1229a605fc99dd72f9963e5671dd9966384a
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3830,6 +3830,84 @@
|
|
| 3830 |
"eval_samples_per_second": 126.802,
|
| 3831 |
"eval_steps_per_second": 15.858,
|
| 3832 |
"step": 2450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3833 |
}
|
| 3834 |
],
|
| 3835 |
"logging_steps": 5,
|
|
@@ -3858,7 +3936,7 @@
|
|
| 3858 |
"attributes": {}
|
| 3859 |
}
|
| 3860 |
},
|
| 3861 |
-
"total_flos": 1.
|
| 3862 |
"train_batch_size": 2,
|
| 3863 |
"trial_name": null,
|
| 3864 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6351883411407471,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.4507042253521125,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3830 |
"eval_samples_per_second": 126.802,
|
| 3831 |
"eval_steps_per_second": 15.858,
|
| 3832 |
"step": 2450
|
| 3833 |
+
},
|
| 3834 |
+
{
|
| 3835 |
+
"epoch": 2.4066135946111453,
|
| 3836 |
+
"grad_norm": 0.21273045241832733,
|
| 3837 |
+
"learning_rate": 5.8806248666594436e-05,
|
| 3838 |
+
"loss": 0.6671,
|
| 3839 |
+
"step": 2455
|
| 3840 |
+
},
|
| 3841 |
+
{
|
| 3842 |
+
"epoch": 2.4115125535823636,
|
| 3843 |
+
"grad_norm": 0.21046888828277588,
|
| 3844 |
+
"learning_rate": 5.862897595233799e-05,
|
| 3845 |
+
"loss": 0.6632,
|
| 3846 |
+
"step": 2460
|
| 3847 |
+
},
|
| 3848 |
+
{
|
| 3849 |
+
"epoch": 2.4164115125535823,
|
| 3850 |
+
"grad_norm": 0.2251223474740982,
|
| 3851 |
+
"learning_rate": 5.845167004136867e-05,
|
| 3852 |
+
"loss": 0.6548,
|
| 3853 |
+
"step": 2465
|
| 3854 |
+
},
|
| 3855 |
+
{
|
| 3856 |
+
"epoch": 2.421310471524801,
|
| 3857 |
+
"grad_norm": 0.2118569016456604,
|
| 3858 |
+
"learning_rate": 5.827433275577903e-05,
|
| 3859 |
+
"loss": 0.6709,
|
| 3860 |
+
"step": 2470
|
| 3861 |
+
},
|
| 3862 |
+
{
|
| 3863 |
+
"epoch": 2.42620943049602,
|
| 3864 |
+
"grad_norm": 0.20193223655223846,
|
| 3865 |
+
"learning_rate": 5.809696591798407e-05,
|
| 3866 |
+
"loss": 0.6616,
|
| 3867 |
+
"step": 2475
|
| 3868 |
+
},
|
| 3869 |
+
{
|
| 3870 |
+
"epoch": 2.431108389467238,
|
| 3871 |
+
"grad_norm": 0.24873106181621552,
|
| 3872 |
+
"learning_rate": 5.7919571350702466e-05,
|
| 3873 |
+
"loss": 0.6734,
|
| 3874 |
+
"step": 2480
|
| 3875 |
+
},
|
| 3876 |
+
{
|
| 3877 |
+
"epoch": 2.436007348438457,
|
| 3878 |
+
"grad_norm": 0.19868969917297363,
|
| 3879 |
+
"learning_rate": 5.774215087693786e-05,
|
| 3880 |
+
"loss": 0.6802,
|
| 3881 |
+
"step": 2485
|
| 3882 |
+
},
|
| 3883 |
+
{
|
| 3884 |
+
"epoch": 2.4409063074096755,
|
| 3885 |
+
"grad_norm": 0.22367221117019653,
|
| 3886 |
+
"learning_rate": 5.7564706319960134e-05,
|
| 3887 |
+
"loss": 0.664,
|
| 3888 |
+
"step": 2490
|
| 3889 |
+
},
|
| 3890 |
+
{
|
| 3891 |
+
"epoch": 2.4458052663808942,
|
| 3892 |
+
"grad_norm": 0.2064606100320816,
|
| 3893 |
+
"learning_rate": 5.7387239503286674e-05,
|
| 3894 |
+
"loss": 0.6595,
|
| 3895 |
+
"step": 2495
|
| 3896 |
+
},
|
| 3897 |
+
{
|
| 3898 |
+
"epoch": 2.4507042253521125,
|
| 3899 |
+
"grad_norm": 0.23037855327129364,
|
| 3900 |
+
"learning_rate": 5.7209752250663576e-05,
|
| 3901 |
+
"loss": 0.6669,
|
| 3902 |
+
"step": 2500
|
| 3903 |
+
},
|
| 3904 |
+
{
|
| 3905 |
+
"epoch": 2.4507042253521125,
|
| 3906 |
+
"eval_loss": 0.6351883411407471,
|
| 3907 |
+
"eval_runtime": 15.5444,
|
| 3908 |
+
"eval_samples_per_second": 126.026,
|
| 3909 |
+
"eval_steps_per_second": 15.761,
|
| 3910 |
+
"step": 2500
|
| 3911 |
}
|
| 3912 |
],
|
| 3913 |
"logging_steps": 5,
|
|
|
|
| 3936 |
"attributes": {}
|
| 3937 |
}
|
| 3938 |
},
|
| 3939 |
+
"total_flos": 1.288170288764158e+18,
|
| 3940 |
"train_batch_size": 2,
|
| 3941 |
"trial_name": null,
|
| 3942 |
"trial_params": null
|