Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step4500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0a21dc5f4f9acf3af3f8980785056b6b5ada5cb15eef4540db5bca39c790390
|
| 3 |
size 1037269336
|
last-checkpoint/global_step4500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d31ac560812e943c8c288747e5d2039a96a47afb0abcb9be2e38f7662b5aa10
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step4500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5da41387f3470e28461291b4767a43d23bd0fe5626bc769fdd43568a69ed5c8d
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da0c7d889d827a14f364818784bf4f3e8f06fc8163b92def019bbd3d708ff6ee
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7da489d965d3264064e452ec2b0f1161d9926dfe1c6aab8ef3eb9c5e3f77e8c
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4500/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5c5f490e1ca842d837adac5ce91ab10507f4511448a022fde5f577639115024
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step4500
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3630f5c7f559df2743db6022b9a9e3a578f1caa3a824d427deb7eb53b5753113
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a975041d5ccbda078ebb49cae6863f266b7176846aea763c1f5991e324beb6a
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e16d82cf7cd32b948d7f53723214355031cb0c2f352b62b817e45196b5c3bed
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2faeda1cf20088c59a4c59ca63cd8875d237d2179a7055592aef1e315f61c7ea
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e70fa6096403ae68870c39096182c6dd70befee0d4111f312991f4b6364fbfa
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6872,6 +6872,162 @@
|
|
| 6872 |
"eval_samples_per_second": 174.461,
|
| 6873 |
"eval_steps_per_second": 10.94,
|
| 6874 |
"step": 4400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6875 |
}
|
| 6876 |
],
|
| 6877 |
"logging_steps": 5,
|
|
@@ -6900,7 +7056,7 @@
|
|
| 6900 |
"attributes": {}
|
| 6901 |
}
|
| 6902 |
},
|
| 6903 |
-
"total_flos": 1.
|
| 6904 |
"train_batch_size": 4,
|
| 6905 |
"trial_name": null,
|
| 6906 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.89194917678833,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6541648495420846,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 4500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6872 |
"eval_samples_per_second": 174.461,
|
| 6873 |
"eval_steps_per_second": 10.94,
|
| 6874 |
"step": 4400
|
| 6875 |
+
},
|
| 6876 |
+
{
|
| 6877 |
+
"epoch": 0.6403547027184183,
|
| 6878 |
+
"grad_norm": 2.5073940753936768,
|
| 6879 |
+
"learning_rate": 6.029659568988577e-05,
|
| 6880 |
+
"loss": 2.1659,
|
| 6881 |
+
"step": 4405
|
| 6882 |
+
},
|
| 6883 |
+
{
|
| 6884 |
+
"epoch": 0.6410815525512429,
|
| 6885 |
+
"grad_norm": 2.4761996269226074,
|
| 6886 |
+
"learning_rate": 6.021875803667634e-05,
|
| 6887 |
+
"loss": 1.9834,
|
| 6888 |
+
"step": 4410
|
| 6889 |
+
},
|
| 6890 |
+
{
|
| 6891 |
+
"epoch": 0.6418084023840674,
|
| 6892 |
+
"grad_norm": 2.326512336730957,
|
| 6893 |
+
"learning_rate": 6.014089490539603e-05,
|
| 6894 |
+
"loss": 2.0396,
|
| 6895 |
+
"step": 4415
|
| 6896 |
+
},
|
| 6897 |
+
{
|
| 6898 |
+
"epoch": 0.642535252216892,
|
| 6899 |
+
"grad_norm": 2.1320672035217285,
|
| 6900 |
+
"learning_rate": 6.0063006492065156e-05,
|
| 6901 |
+
"loss": 2.1046,
|
| 6902 |
+
"step": 4420
|
| 6903 |
+
},
|
| 6904 |
+
{
|
| 6905 |
+
"epoch": 0.6432621020497166,
|
| 6906 |
+
"grad_norm": 2.4910004138946533,
|
| 6907 |
+
"learning_rate": 5.998509299276773e-05,
|
| 6908 |
+
"loss": 2.0383,
|
| 6909 |
+
"step": 4425
|
| 6910 |
+
},
|
| 6911 |
+
{
|
| 6912 |
+
"epoch": 0.643988951882541,
|
| 6913 |
+
"grad_norm": 2.5594482421875,
|
| 6914 |
+
"learning_rate": 5.990715460365091e-05,
|
| 6915 |
+
"loss": 1.9068,
|
| 6916 |
+
"step": 4430
|
| 6917 |
+
},
|
| 6918 |
+
{
|
| 6919 |
+
"epoch": 0.6447158017153656,
|
| 6920 |
+
"grad_norm": 2.314884662628174,
|
| 6921 |
+
"learning_rate": 5.9829191520924444e-05,
|
| 6922 |
+
"loss": 1.9658,
|
| 6923 |
+
"step": 4435
|
| 6924 |
+
},
|
| 6925 |
+
{
|
| 6926 |
+
"epoch": 0.6454426515481901,
|
| 6927 |
+
"grad_norm": 2.5968551635742188,
|
| 6928 |
+
"learning_rate": 5.975120394086035e-05,
|
| 6929 |
+
"loss": 2.0312,
|
| 6930 |
+
"step": 4440
|
| 6931 |
+
},
|
| 6932 |
+
{
|
| 6933 |
+
"epoch": 0.6461695013810147,
|
| 6934 |
+
"grad_norm": 2.324385166168213,
|
| 6935 |
+
"learning_rate": 5.967319205979226e-05,
|
| 6936 |
+
"loss": 1.9554,
|
| 6937 |
+
"step": 4445
|
| 6938 |
+
},
|
| 6939 |
+
{
|
| 6940 |
+
"epoch": 0.6468963512138393,
|
| 6941 |
+
"grad_norm": 2.4644150733947754,
|
| 6942 |
+
"learning_rate": 5.9595156074114964e-05,
|
| 6943 |
+
"loss": 2.1902,
|
| 6944 |
+
"step": 4450
|
| 6945 |
+
},
|
| 6946 |
+
{
|
| 6947 |
+
"epoch": 0.6468963512138393,
|
| 6948 |
+
"eval_loss": 1.904667854309082,
|
| 6949 |
+
"eval_runtime": 21.5453,
|
| 6950 |
+
"eval_samples_per_second": 153.212,
|
| 6951 |
+
"eval_steps_per_second": 9.608,
|
| 6952 |
+
"step": 4450
|
| 6953 |
+
},
|
| 6954 |
+
{
|
| 6955 |
+
"epoch": 0.6476232010466637,
|
| 6956 |
+
"grad_norm": 2.2570154666900635,
|
| 6957 |
+
"learning_rate": 5.9517096180283985e-05,
|
| 6958 |
+
"loss": 2.1495,
|
| 6959 |
+
"step": 4455
|
| 6960 |
+
},
|
| 6961 |
+
{
|
| 6962 |
+
"epoch": 0.6483500508794883,
|
| 6963 |
+
"grad_norm": 2.676832914352417,
|
| 6964 |
+
"learning_rate": 5.9439012574815014e-05,
|
| 6965 |
+
"loss": 2.1315,
|
| 6966 |
+
"step": 4460
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"epoch": 0.6490769007123128,
|
| 6970 |
+
"grad_norm": 2.706221103668213,
|
| 6971 |
+
"learning_rate": 5.9360905454283424e-05,
|
| 6972 |
+
"loss": 2.0839,
|
| 6973 |
+
"step": 4465
|
| 6974 |
+
},
|
| 6975 |
+
{
|
| 6976 |
+
"epoch": 0.6498037505451374,
|
| 6977 |
+
"grad_norm": 2.5857553482055664,
|
| 6978 |
+
"learning_rate": 5.92827750153238e-05,
|
| 6979 |
+
"loss": 2.006,
|
| 6980 |
+
"step": 4470
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 0.650530600377962,
|
| 6984 |
+
"grad_norm": 2.223796844482422,
|
| 6985 |
+
"learning_rate": 5.9204621454629433e-05,
|
| 6986 |
+
"loss": 2.0653,
|
| 6987 |
+
"step": 4475
|
| 6988 |
+
},
|
| 6989 |
+
{
|
| 6990 |
+
"epoch": 0.6512574502107864,
|
| 6991 |
+
"grad_norm": 2.7300755977630615,
|
| 6992 |
+
"learning_rate": 5.9126444968951824e-05,
|
| 6993 |
+
"loss": 2.0131,
|
| 6994 |
+
"step": 4480
|
| 6995 |
+
},
|
| 6996 |
+
{
|
| 6997 |
+
"epoch": 0.651984300043611,
|
| 6998 |
+
"grad_norm": 2.2716171741485596,
|
| 6999 |
+
"learning_rate": 5.904824575510018e-05,
|
| 7000 |
+
"loss": 2.0303,
|
| 7001 |
+
"step": 4485
|
| 7002 |
+
},
|
| 7003 |
+
{
|
| 7004 |
+
"epoch": 0.6527111498764355,
|
| 7005 |
+
"grad_norm": 2.462047815322876,
|
| 7006 |
+
"learning_rate": 5.8970024009940926e-05,
|
| 7007 |
+
"loss": 1.9954,
|
| 7008 |
+
"step": 4490
|
| 7009 |
+
},
|
| 7010 |
+
{
|
| 7011 |
+
"epoch": 0.6534379997092601,
|
| 7012 |
+
"grad_norm": 2.3554959297180176,
|
| 7013 |
+
"learning_rate": 5.88917799303972e-05,
|
| 7014 |
+
"loss": 1.9951,
|
| 7015 |
+
"step": 4495
|
| 7016 |
+
},
|
| 7017 |
+
{
|
| 7018 |
+
"epoch": 0.6541648495420846,
|
| 7019 |
+
"grad_norm": 2.245417833328247,
|
| 7020 |
+
"learning_rate": 5.881351371344841e-05,
|
| 7021 |
+
"loss": 1.9328,
|
| 7022 |
+
"step": 4500
|
| 7023 |
+
},
|
| 7024 |
+
{
|
| 7025 |
+
"epoch": 0.6541648495420846,
|
| 7026 |
+
"eval_loss": 1.89194917678833,
|
| 7027 |
+
"eval_runtime": 19.3578,
|
| 7028 |
+
"eval_samples_per_second": 170.525,
|
| 7029 |
+
"eval_steps_per_second": 10.693,
|
| 7030 |
+
"step": 4500
|
| 7031 |
}
|
| 7032 |
],
|
| 7033 |
"logging_steps": 5,
|
|
|
|
| 7056 |
"attributes": {}
|
| 7057 |
}
|
| 7058 |
},
|
| 7059 |
+
"total_flos": 1.1728234225273405e+18,
|
| 7060 |
"train_batch_size": 4,
|
| 7061 |
"trial_name": null,
|
| 7062 |
"trial_params": null
|