Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1850/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1850/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1850/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1850/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1850/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4daa9130e6c255b2708c8eee5cdb609ab30d4959bd189609fe9f12b19d05f404
|
| 3 |
size 98088784
|
last-checkpoint/global_step1850/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab640d4c517058d3bff2089f16221d5ac8ea2ee6327558a4587682a167e1ba74
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1850/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab913468e5c7b3f94a7d54a3d79dedb2828f5e51fb51d00340ec966df8e3a10
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1850/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d6c6f4f3f2baa35c260070f5e4382945067550e6b15387b10d7031e28ec1a4f
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1850/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6b17923443f7c2450294b22e6453eaa42bd4f594a10f85faca7ca0a52008bba
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1850/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccc69f9a9c13d36cd4e387253365067ad4c94d05c2f1ab3d46f9d518a945ea9f
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1850
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:455fcae85aeb76faf352e118d0cf253d10c3d29e2f06cfabbb29fbc8f2f7a554
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b9ba995c6c6ed132c3f4cb1aeaa57d7a5ba11efd8d0082b03fc43d3bd0a2608
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2d4bee624a68686a64406afb400311befecc0c655d3991ddead24890d1c4bf2
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:914a4da0db40a07c1e19adf6a7a7212df61a63b6794b176a431ab9c0064c159b
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85498594364033313bc32ee2cb2af811174fdf184366c917daf31ca0273bcade
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2816,6 +2816,84 @@
|
|
| 2816 |
"eval_samples_per_second": 126.748,
|
| 2817 |
"eval_steps_per_second": 15.852,
|
| 2818 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2819 |
}
|
| 2820 |
],
|
| 2821 |
"logging_steps": 5,
|
|
@@ -2844,7 +2922,7 @@
|
|
| 2844 |
"attributes": {}
|
| 2845 |
}
|
| 2846 |
},
|
| 2847 |
-
"total_flos": 9.
|
| 2848 |
"train_batch_size": 2,
|
| 2849 |
"trial_name": null,
|
| 2850 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6555055379867554,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.8132271892222902,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1850,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2816 |
"eval_samples_per_second": 126.748,
|
| 2817 |
"eval_steps_per_second": 15.852,
|
| 2818 |
"step": 1800
|
| 2819 |
+
},
|
| 2820 |
+
{
|
| 2821 |
+
"epoch": 1.7691365584813226,
|
| 2822 |
+
"grad_norm": 0.19532938301563263,
|
| 2823 |
+
"learning_rate": 8.089658405219626e-05,
|
| 2824 |
+
"loss": 0.6867,
|
| 2825 |
+
"step": 1805
|
| 2826 |
+
},
|
| 2827 |
+
{
|
| 2828 |
+
"epoch": 1.7740355174525413,
|
| 2829 |
+
"grad_norm": 0.25039657950401306,
|
| 2830 |
+
"learning_rate": 8.073879040381839e-05,
|
| 2831 |
+
"loss": 0.7027,
|
| 2832 |
+
"step": 1810
|
| 2833 |
+
},
|
| 2834 |
+
{
|
| 2835 |
+
"epoch": 1.77893447642376,
|
| 2836 |
+
"grad_norm": 0.22185583412647247,
|
| 2837 |
+
"learning_rate": 8.058073634617543e-05,
|
| 2838 |
+
"loss": 0.6872,
|
| 2839 |
+
"step": 1815
|
| 2840 |
+
},
|
| 2841 |
+
{
|
| 2842 |
+
"epoch": 1.7838334353949785,
|
| 2843 |
+
"grad_norm": 0.20697511732578278,
|
| 2844 |
+
"learning_rate": 8.042242350351735e-05,
|
| 2845 |
+
"loss": 0.669,
|
| 2846 |
+
"step": 1820
|
| 2847 |
+
},
|
| 2848 |
+
{
|
| 2849 |
+
"epoch": 1.788732394366197,
|
| 2850 |
+
"grad_norm": 0.2226262092590332,
|
| 2851 |
+
"learning_rate": 8.026385350275358e-05,
|
| 2852 |
+
"loss": 0.6903,
|
| 2853 |
+
"step": 1825
|
| 2854 |
+
},
|
| 2855 |
+
{
|
| 2856 |
+
"epoch": 1.7936313533374157,
|
| 2857 |
+
"grad_norm": 0.20571239292621613,
|
| 2858 |
+
"learning_rate": 8.01050279734362e-05,
|
| 2859 |
+
"loss": 0.6821,
|
| 2860 |
+
"step": 1830
|
| 2861 |
+
},
|
| 2862 |
+
{
|
| 2863 |
+
"epoch": 1.7985303123086345,
|
| 2864 |
+
"grad_norm": 0.22728270292282104,
|
| 2865 |
+
"learning_rate": 7.994594854774328e-05,
|
| 2866 |
+
"loss": 0.6874,
|
| 2867 |
+
"step": 1835
|
| 2868 |
+
},
|
| 2869 |
+
{
|
| 2870 |
+
"epoch": 1.8034292712798532,
|
| 2871 |
+
"grad_norm": 0.24331030249595642,
|
| 2872 |
+
"learning_rate": 7.978661686046204e-05,
|
| 2873 |
+
"loss": 0.6927,
|
| 2874 |
+
"step": 1840
|
| 2875 |
+
},
|
| 2876 |
+
{
|
| 2877 |
+
"epoch": 1.8083282302510717,
|
| 2878 |
+
"grad_norm": 0.19782117009162903,
|
| 2879 |
+
"learning_rate": 7.962703454897207e-05,
|
| 2880 |
+
"loss": 0.6672,
|
| 2881 |
+
"step": 1845
|
| 2882 |
+
},
|
| 2883 |
+
{
|
| 2884 |
+
"epoch": 1.8132271892222902,
|
| 2885 |
+
"grad_norm": 0.20845647156238556,
|
| 2886 |
+
"learning_rate": 7.946720325322857e-05,
|
| 2887 |
+
"loss": 0.6874,
|
| 2888 |
+
"step": 1850
|
| 2889 |
+
},
|
| 2890 |
+
{
|
| 2891 |
+
"epoch": 1.8132271892222902,
|
| 2892 |
+
"eval_loss": 0.6555055379867554,
|
| 2893 |
+
"eval_runtime": 15.6124,
|
| 2894 |
+
"eval_samples_per_second": 125.477,
|
| 2895 |
+
"eval_steps_per_second": 15.693,
|
| 2896 |
+
"step": 1850
|
| 2897 |
}
|
| 2898 |
],
|
| 2899 |
"logging_steps": 5,
|
|
|
|
| 2922 |
"attributes": {}
|
| 2923 |
}
|
| 2924 |
},
|
| 2925 |
+
"total_flos": 9.54956406476243e+17,
|
| 2926 |
"train_batch_size": 2,
|
| 2927 |
"trial_name": null,
|
| 2928 |
"trial_params": null
|