Training in progress, step 370, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step370/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step370/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step370/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 838922248
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50f547452116a91ced741817d0158e20a2639a40c36dbdb9597bab5096bfeede
|
| 3 |
size 838922248
|
last-checkpoint/global_step370/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c45dc08a197f7c0281aa8ebc4fe6f95eb165aae4591c45db4addb9a8beeb7e02
|
| 3 |
+
size 2516610128
|
last-checkpoint/global_step370/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f26506e2982686927c242f0ecd57a2754730d3774a4fd1d990c895969fc1ac3a
|
| 3 |
+
size 2516610256
|
last-checkpoint/global_step370/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d53b6de8062ec711428eeb0959e27bec1400d3240c9b90c61b6f742a6c8f971
|
| 3 |
+
size 839203436
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step370
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14512
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfcfee17e6b489c56e369f8424fd3148d93d6475de75e98a3419a0b00e9010a1
|
| 3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14512
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eda4d898981379b275d5069d167e4e5e3bb8d0d2583814bf72968446d9b1be71
|
| 3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f2e1dfa07caf61548c4783222b53e3b067d5dd48ff04c0299d1b38fd0e8701c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 350,
|
| 3 |
"best_metric": 1.5130423307418823,
|
| 4 |
"best_model_checkpoint": "miner_id_24/checkpoint-350",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 10,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2824,6 +2824,84 @@
|
|
| 2824 |
"eval_samples_per_second": 9.292,
|
| 2825 |
"eval_steps_per_second": 1.162,
|
| 2826 |
"step": 360
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2827 |
}
|
| 2828 |
],
|
| 2829 |
"logging_steps": 1,
|
|
@@ -2838,7 +2916,7 @@
|
|
| 2838 |
"early_stopping_threshold": 0.0
|
| 2839 |
},
|
| 2840 |
"attributes": {
|
| 2841 |
-
"early_stopping_patience_counter":
|
| 2842 |
}
|
| 2843 |
},
|
| 2844 |
"TrainerControl": {
|
|
@@ -2852,7 +2930,7 @@
|
|
| 2852 |
"attributes": {}
|
| 2853 |
}
|
| 2854 |
},
|
| 2855 |
-
"total_flos": 2.
|
| 2856 |
"train_batch_size": 4,
|
| 2857 |
"trial_name": null,
|
| 2858 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 350,
|
| 3 |
"best_metric": 1.5130423307418823,
|
| 4 |
"best_model_checkpoint": "miner_id_24/checkpoint-350",
|
| 5 |
+
"epoch": 0.062292184014478724,
|
| 6 |
"eval_steps": 10,
|
| 7 |
+
"global_step": 370,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2824 |
"eval_samples_per_second": 9.292,
|
| 2825 |
"eval_steps_per_second": 1.162,
|
| 2826 |
"step": 360
|
| 2827 |
+
},
|
| 2828 |
+
{
|
| 2829 |
+
"epoch": 0.06077696872764005,
|
| 2830 |
+
"grad_norm": 1.6546894311904907,
|
| 2831 |
+
"learning_rate": 7.312691451204178e-06,
|
| 2832 |
+
"loss": 1.4844,
|
| 2833 |
+
"step": 361
|
| 2834 |
+
},
|
| 2835 |
+
{
|
| 2836 |
+
"epoch": 0.06094532598173324,
|
| 2837 |
+
"grad_norm": 0.9022347331047058,
|
| 2838 |
+
"learning_rate": 7.2984687052230585e-06,
|
| 2839 |
+
"loss": 1.5351,
|
| 2840 |
+
"step": 362
|
| 2841 |
+
},
|
| 2842 |
+
{
|
| 2843 |
+
"epoch": 0.06111368323582642,
|
| 2844 |
+
"grad_norm": 1.407510757446289,
|
| 2845 |
+
"learning_rate": 7.284222338919758e-06,
|
| 2846 |
+
"loss": 1.4135,
|
| 2847 |
+
"step": 363
|
| 2848 |
+
},
|
| 2849 |
+
{
|
| 2850 |
+
"epoch": 0.06128204048991961,
|
| 2851 |
+
"grad_norm": 1.2970539331436157,
|
| 2852 |
+
"learning_rate": 7.269952498697734e-06,
|
| 2853 |
+
"loss": 1.4892,
|
| 2854 |
+
"step": 364
|
| 2855 |
+
},
|
| 2856 |
+
{
|
| 2857 |
+
"epoch": 0.06145039774401279,
|
| 2858 |
+
"grad_norm": 1.5928813219070435,
|
| 2859 |
+
"learning_rate": 7.255659331201673e-06,
|
| 2860 |
+
"loss": 1.6161,
|
| 2861 |
+
"step": 365
|
| 2862 |
+
},
|
| 2863 |
+
{
|
| 2864 |
+
"epoch": 0.06161875499810598,
|
| 2865 |
+
"grad_norm": 1.8556829690933228,
|
| 2866 |
+
"learning_rate": 7.241342983315985e-06,
|
| 2867 |
+
"loss": 1.6214,
|
| 2868 |
+
"step": 366
|
| 2869 |
+
},
|
| 2870 |
+
{
|
| 2871 |
+
"epoch": 0.06178711225219917,
|
| 2872 |
+
"grad_norm": 1.4815845489501953,
|
| 2873 |
+
"learning_rate": 7.227003602163296e-06,
|
| 2874 |
+
"loss": 1.6008,
|
| 2875 |
+
"step": 367
|
| 2876 |
+
},
|
| 2877 |
+
{
|
| 2878 |
+
"epoch": 0.061955469506292354,
|
| 2879 |
+
"grad_norm": 1.992004632949829,
|
| 2880 |
+
"learning_rate": 7.212641335102932e-06,
|
| 2881 |
+
"loss": 1.5366,
|
| 2882 |
+
"step": 368
|
| 2883 |
+
},
|
| 2884 |
+
{
|
| 2885 |
+
"epoch": 0.06212382676038554,
|
| 2886 |
+
"grad_norm": 1.532273292541504,
|
| 2887 |
+
"learning_rate": 7.198256329729412e-06,
|
| 2888 |
+
"loss": 1.4833,
|
| 2889 |
+
"step": 369
|
| 2890 |
+
},
|
| 2891 |
+
{
|
| 2892 |
+
"epoch": 0.062292184014478724,
|
| 2893 |
+
"grad_norm": 2.722139358520508,
|
| 2894 |
+
"learning_rate": 7.183848733870917e-06,
|
| 2895 |
+
"loss": 1.9184,
|
| 2896 |
+
"step": 370
|
| 2897 |
+
},
|
| 2898 |
+
{
|
| 2899 |
+
"epoch": 0.062292184014478724,
|
| 2900 |
+
"eval_loss": 1.5153603553771973,
|
| 2901 |
+
"eval_runtime": 1075.4788,
|
| 2902 |
+
"eval_samples_per_second": 9.302,
|
| 2903 |
+
"eval_steps_per_second": 1.163,
|
| 2904 |
+
"step": 370
|
| 2905 |
}
|
| 2906 |
],
|
| 2907 |
"logging_steps": 1,
|
|
|
|
| 2916 |
"early_stopping_threshold": 0.0
|
| 2917 |
},
|
| 2918 |
"attributes": {
|
| 2919 |
+
"early_stopping_patience_counter": 2
|
| 2920 |
}
|
| 2921 |
},
|
| 2922 |
"TrainerControl": {
|
|
|
|
| 2930 |
"attributes": {}
|
| 2931 |
}
|
| 2932 |
},
|
| 2933 |
+
"total_flos": 2.3058275709537485e+18,
|
| 2934 |
"train_batch_size": 4,
|
| 2935 |
"trial_name": null,
|
| 2936 |
"trial_params": null
|