Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step5100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5100/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5b523eb134094d0fe3ed4dfef81ffed7224784825c6f64b8661fe04d195a546
|
| 3 |
size 1037269336
|
last-checkpoint/global_step5100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab4d9e4164ee16fa83dbfd1f018e2622dd032b0baa3a0dabc64f7e73cfab6fe8
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step5100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3d52195365c651c3e5364df3d7926ee4819beb46d64296fcbebb5ce8b0e7502
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79943dd9bf44806e010c4fd6c1ac0a48f716e1b8fb74075f38c8a90fde71eea1
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10f3ec07625e370675911a8ad7ada8a9b7c40c5d92eb84e352baa294434990fe
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5100/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba5de234ff5bea5001e9e180ee326f5b49b985809bee0bce1e2e5aeab57e319a
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step5100
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56865be07eff6f66c69791fc2b9b609f0e20d2a4499e1c484d2daf5499c42b5c
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcc331c1dd0e2fd6a26f5faf857be1fe7603138c25d38c533d290076fd5c63d2
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68160d4ba6654984de0d46bc96a7fe87a66866d7126298837a820322efc5e287
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66e51d3128d1b9d77da6840ea0cc45f49e7d431d13998e4e4edcf5f6460d262d
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfd32ddbd680624dcd914b61c50d077bc8f0cb703973d6bb57f048563ab5de57
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7808,6 +7808,162 @@
|
|
| 7808 |
"eval_samples_per_second": 173.245,
|
| 7809 |
"eval_steps_per_second": 10.864,
|
| 7810 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7811 |
}
|
| 7812 |
],
|
| 7813 |
"logging_steps": 5,
|
|
@@ -7836,7 +7992,7 @@
|
|
| 7836 |
"attributes": {}
|
| 7837 |
}
|
| 7838 |
},
|
| 7839 |
-
"total_flos": 1.
|
| 7840 |
"train_batch_size": 4,
|
| 7841 |
"trial_name": null,
|
| 7842 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.8430671691894531,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.7413868294810292,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 5100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7808 |
"eval_samples_per_second": 173.245,
|
| 7809 |
"eval_steps_per_second": 10.864,
|
| 7810 |
"step": 5000
|
| 7811 |
+
},
|
| 7812 |
+
{
|
| 7813 |
+
"epoch": 0.727576682657363,
|
| 7814 |
+
"grad_norm": 2.492719888687134,
|
| 7815 |
+
"learning_rate": 5.082964248443748e-05,
|
| 7816 |
+
"loss": 1.9047,
|
| 7817 |
+
"step": 5005
|
| 7818 |
+
},
|
| 7819 |
+
{
|
| 7820 |
+
"epoch": 0.7283035324901875,
|
| 7821 |
+
"grad_norm": 2.1797542572021484,
|
| 7822 |
+
"learning_rate": 5.0750160841935874e-05,
|
| 7823 |
+
"loss": 1.9196,
|
| 7824 |
+
"step": 5010
|
| 7825 |
+
},
|
| 7826 |
+
{
|
| 7827 |
+
"epoch": 0.7290303823230121,
|
| 7828 |
+
"grad_norm": 2.329383134841919,
|
| 7829 |
+
"learning_rate": 5.067067755854552e-05,
|
| 7830 |
+
"loss": 1.9559,
|
| 7831 |
+
"step": 5015
|
| 7832 |
+
},
|
| 7833 |
+
{
|
| 7834 |
+
"epoch": 0.7297572321558367,
|
| 7835 |
+
"grad_norm": 2.5835447311401367,
|
| 7836 |
+
"learning_rate": 5.059119283436551e-05,
|
| 7837 |
+
"loss": 2.0918,
|
| 7838 |
+
"step": 5020
|
| 7839 |
+
},
|
| 7840 |
+
{
|
| 7841 |
+
"epoch": 0.7304840819886611,
|
| 7842 |
+
"grad_norm": 2.5015668869018555,
|
| 7843 |
+
"learning_rate": 5.0511706869498554e-05,
|
| 7844 |
+
"loss": 1.837,
|
| 7845 |
+
"step": 5025
|
| 7846 |
+
},
|
| 7847 |
+
{
|
| 7848 |
+
"epoch": 0.7312109318214857,
|
| 7849 |
+
"grad_norm": 2.416215419769287,
|
| 7850 |
+
"learning_rate": 5.043221986405045e-05,
|
| 7851 |
+
"loss": 2.039,
|
| 7852 |
+
"step": 5030
|
| 7853 |
+
},
|
| 7854 |
+
{
|
| 7855 |
+
"epoch": 0.7319377816543102,
|
| 7856 |
+
"grad_norm": 2.211178779602051,
|
| 7857 |
+
"learning_rate": 5.035273201812967e-05,
|
| 7858 |
+
"loss": 2.0104,
|
| 7859 |
+
"step": 5035
|
| 7860 |
+
},
|
| 7861 |
+
{
|
| 7862 |
+
"epoch": 0.7326646314871348,
|
| 7863 |
+
"grad_norm": 1.9860055446624756,
|
| 7864 |
+
"learning_rate": 5.0273243531846745e-05,
|
| 7865 |
+
"loss": 1.8645,
|
| 7866 |
+
"step": 5040
|
| 7867 |
+
},
|
| 7868 |
+
{
|
| 7869 |
+
"epoch": 0.7333914813199593,
|
| 7870 |
+
"grad_norm": 2.3574624061584473,
|
| 7871 |
+
"learning_rate": 5.0193754605313855e-05,
|
| 7872 |
+
"loss": 2.1474,
|
| 7873 |
+
"step": 5045
|
| 7874 |
+
},
|
| 7875 |
+
{
|
| 7876 |
+
"epoch": 0.7341183311527838,
|
| 7877 |
+
"grad_norm": 2.370668888092041,
|
| 7878 |
+
"learning_rate": 5.01142654386443e-05,
|
| 7879 |
+
"loss": 1.97,
|
| 7880 |
+
"step": 5050
|
| 7881 |
+
},
|
| 7882 |
+
{
|
| 7883 |
+
"epoch": 0.7341183311527838,
|
| 7884 |
+
"eval_loss": 1.8463149070739746,
|
| 7885 |
+
"eval_runtime": 20.4628,
|
| 7886 |
+
"eval_samples_per_second": 161.317,
|
| 7887 |
+
"eval_steps_per_second": 10.116,
|
| 7888 |
+
"step": 5050
|
| 7889 |
+
},
|
| 7890 |
+
{
|
| 7891 |
+
"epoch": 0.7348451809856084,
|
| 7892 |
+
"grad_norm": 2.351590394973755,
|
| 7893 |
+
"learning_rate": 5.0034776231951914e-05,
|
| 7894 |
+
"loss": 1.9559,
|
| 7895 |
+
"step": 5055
|
| 7896 |
+
},
|
| 7897 |
+
{
|
| 7898 |
+
"epoch": 0.7355720308184329,
|
| 7899 |
+
"grad_norm": 2.62426495552063,
|
| 7900 |
+
"learning_rate": 4.995528718535072e-05,
|
| 7901 |
+
"loss": 2.0377,
|
| 7902 |
+
"step": 5060
|
| 7903 |
+
},
|
| 7904 |
+
{
|
| 7905 |
+
"epoch": 0.7362988806512575,
|
| 7906 |
+
"grad_norm": 2.2182393074035645,
|
| 7907 |
+
"learning_rate": 4.9875798498954274e-05,
|
| 7908 |
+
"loss": 2.0352,
|
| 7909 |
+
"step": 5065
|
| 7910 |
+
},
|
| 7911 |
+
{
|
| 7912 |
+
"epoch": 0.737025730484082,
|
| 7913 |
+
"grad_norm": 2.1070525646209717,
|
| 7914 |
+
"learning_rate": 4.979631037287528e-05,
|
| 7915 |
+
"loss": 1.9733,
|
| 7916 |
+
"step": 5070
|
| 7917 |
+
},
|
| 7918 |
+
{
|
| 7919 |
+
"epoch": 0.7377525803169065,
|
| 7920 |
+
"grad_norm": 2.4891433715820312,
|
| 7921 |
+
"learning_rate": 4.971682300722495e-05,
|
| 7922 |
+
"loss": 2.1022,
|
| 7923 |
+
"step": 5075
|
| 7924 |
+
},
|
| 7925 |
+
{
|
| 7926 |
+
"epoch": 0.738479430149731,
|
| 7927 |
+
"grad_norm": 2.638141632080078,
|
| 7928 |
+
"learning_rate": 4.9637336602112685e-05,
|
| 7929 |
+
"loss": 2.1233,
|
| 7930 |
+
"step": 5080
|
| 7931 |
+
},
|
| 7932 |
+
{
|
| 7933 |
+
"epoch": 0.7392062799825556,
|
| 7934 |
+
"grad_norm": 2.174260377883911,
|
| 7935 |
+
"learning_rate": 4.9557851357645386e-05,
|
| 7936 |
+
"loss": 2.0232,
|
| 7937 |
+
"step": 5085
|
| 7938 |
+
},
|
| 7939 |
+
{
|
| 7940 |
+
"epoch": 0.7399331298153802,
|
| 7941 |
+
"grad_norm": 2.112396717071533,
|
| 7942 |
+
"learning_rate": 4.947836747392708e-05,
|
| 7943 |
+
"loss": 1.8431,
|
| 7944 |
+
"step": 5090
|
| 7945 |
+
},
|
| 7946 |
+
{
|
| 7947 |
+
"epoch": 0.7406599796482047,
|
| 7948 |
+
"grad_norm": 2.135979413986206,
|
| 7949 |
+
"learning_rate": 4.939888515105832e-05,
|
| 7950 |
+
"loss": 1.8346,
|
| 7951 |
+
"step": 5095
|
| 7952 |
+
},
|
| 7953 |
+
{
|
| 7954 |
+
"epoch": 0.7413868294810292,
|
| 7955 |
+
"grad_norm": 2.140866756439209,
|
| 7956 |
+
"learning_rate": 4.931940458913579e-05,
|
| 7957 |
+
"loss": 1.9499,
|
| 7958 |
+
"step": 5100
|
| 7959 |
+
},
|
| 7960 |
+
{
|
| 7961 |
+
"epoch": 0.7413868294810292,
|
| 7962 |
+
"eval_loss": 1.8430671691894531,
|
| 7963 |
+
"eval_runtime": 19.1198,
|
| 7964 |
+
"eval_samples_per_second": 172.648,
|
| 7965 |
+
"eval_steps_per_second": 10.826,
|
| 7966 |
+
"step": 5100
|
| 7967 |
}
|
| 7968 |
],
|
| 7969 |
"logging_steps": 5,
|
|
|
|
| 7992 |
"attributes": {}
|
| 7993 |
}
|
| 7994 |
},
|
| 7995 |
+
"total_flos": 1.3296709309594337e+18,
|
| 7996 |
"train_batch_size": 4,
|
| 7997 |
"trial_name": null,
|
| 7998 |
"trial_params": null
|