Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2050/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:686af49d9716db5c977676982b73eead050c55447dd141d1a5c60a378798cf92
|
| 3 |
size 98088784
|
last-checkpoint/global_step2050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45616c41df00fa1bfbd9ac14ff79e146c79f9632defbfe6dfc06ff18bd55abf8
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8b0c02e60bc84e4c5f99dab0099e748a296a98b4797f47b06eb7a9d8d0e1d95
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9179f8475663925d8ca4053d3b76b16cb0678735977baa2c289802bd3353cb81
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:812d057a57265c21da7802dcc76e16c7773408a25f3ff1c2fc4c2ce8cdab9440
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2050/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f61c444e501f1817872bca2e24f714b3ce7cf6bf9e837542c155318cadca656
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2050
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2136a743114bf3ac6a2be5bd25d57e50f3b32784a92a3ed2d3a6f4e8dfe65997
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:725ed86be2ad34a12a575d123d89423e1ccfc36d42388d609eed78168e20aa8e
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa0c254473a5362fb78028095a7ded74230ab3341cfdf45850d3246e013416c5
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a8d80b4bb14ece5fb16e97007c2bfac9158f816b33de1c332f8d229aa7c6235
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76c0ef42f2619bcfd07a1df041b3a7fb00343474a95d72bd4b490c04b2b99687
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3128,6 +3128,84 @@
|
|
| 3128 |
"eval_samples_per_second": 127.041,
|
| 3129 |
"eval_steps_per_second": 15.888,
|
| 3130 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3131 |
}
|
| 3132 |
],
|
| 3133 |
"logging_steps": 5,
|
|
@@ -3156,7 +3234,7 @@
|
|
| 3156 |
"attributes": {}
|
| 3157 |
}
|
| 3158 |
},
|
| 3159 |
-
"total_flos": 1.
|
| 3160 |
"train_batch_size": 2,
|
| 3161 |
"trial_name": null,
|
| 3162 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6483769416809082,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.0097979179424375,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2050,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3128 |
"eval_samples_per_second": 127.041,
|
| 3129 |
"eval_steps_per_second": 15.888,
|
| 3130 |
"step": 2000
|
| 3131 |
+
},
|
| 3132 |
+
{
|
| 3133 |
+
"epoch": 1.9650949173300674,
|
| 3134 |
+
"grad_norm": 0.223761647939682,
|
| 3135 |
+
"learning_rate": 7.439799842198776e-05,
|
| 3136 |
+
"loss": 0.674,
|
| 3137 |
+
"step": 2005
|
| 3138 |
+
},
|
| 3139 |
+
{
|
| 3140 |
+
"epoch": 1.9699938763012859,
|
| 3141 |
+
"grad_norm": 0.19198189675807953,
|
| 3142 |
+
"learning_rate": 7.423108004787508e-05,
|
| 3143 |
+
"loss": 0.6874,
|
| 3144 |
+
"step": 2010
|
| 3145 |
+
},
|
| 3146 |
+
{
|
| 3147 |
+
"epoch": 1.9748928352725046,
|
| 3148 |
+
"grad_norm": 0.20100003480911255,
|
| 3149 |
+
"learning_rate": 7.406396814129006e-05,
|
| 3150 |
+
"loss": 0.6881,
|
| 3151 |
+
"step": 2015
|
| 3152 |
+
},
|
| 3153 |
+
{
|
| 3154 |
+
"epoch": 1.9797917942437233,
|
| 3155 |
+
"grad_norm": 0.22284255921840668,
|
| 3156 |
+
"learning_rate": 7.389666441956613e-05,
|
| 3157 |
+
"loss": 0.6904,
|
| 3158 |
+
"step": 2020
|
| 3159 |
+
},
|
| 3160 |
+
{
|
| 3161 |
+
"epoch": 1.9846907532149418,
|
| 3162 |
+
"grad_norm": 0.2002260684967041,
|
| 3163 |
+
"learning_rate": 7.372917060200785e-05,
|
| 3164 |
+
"loss": 0.6763,
|
| 3165 |
+
"step": 2025
|
| 3166 |
+
},
|
| 3167 |
+
{
|
| 3168 |
+
"epoch": 1.9895897121861603,
|
| 3169 |
+
"grad_norm": 0.21440419554710388,
|
| 3170 |
+
"learning_rate": 7.356148840987336e-05,
|
| 3171 |
+
"loss": 0.6819,
|
| 3172 |
+
"step": 2030
|
| 3173 |
+
},
|
| 3174 |
+
{
|
| 3175 |
+
"epoch": 1.994488671157379,
|
| 3176 |
+
"grad_norm": 0.22840850055217743,
|
| 3177 |
+
"learning_rate": 7.339361956635661e-05,
|
| 3178 |
+
"loss": 0.6935,
|
| 3179 |
+
"step": 2035
|
| 3180 |
+
},
|
| 3181 |
+
{
|
| 3182 |
+
"epoch": 1.9993876301285978,
|
| 3183 |
+
"grad_norm": 0.2073492854833603,
|
| 3184 |
+
"learning_rate": 7.322556579656973e-05,
|
| 3185 |
+
"loss": 0.6927,
|
| 3186 |
+
"step": 2040
|
| 3187 |
+
},
|
| 3188 |
+
{
|
| 3189 |
+
"epoch": 2.0048989589712187,
|
| 3190 |
+
"grad_norm": 0.19991783797740936,
|
| 3191 |
+
"learning_rate": 7.305732882752519e-05,
|
| 3192 |
+
"loss": 0.7925,
|
| 3193 |
+
"step": 2045
|
| 3194 |
+
},
|
| 3195 |
+
{
|
| 3196 |
+
"epoch": 2.0097979179424375,
|
| 3197 |
+
"grad_norm": 0.22880135476589203,
|
| 3198 |
+
"learning_rate": 7.288891038811815e-05,
|
| 3199 |
+
"loss": 0.6637,
|
| 3200 |
+
"step": 2050
|
| 3201 |
+
},
|
| 3202 |
+
{
|
| 3203 |
+
"epoch": 2.0097979179424375,
|
| 3204 |
+
"eval_loss": 0.6483769416809082,
|
| 3205 |
+
"eval_runtime": 15.4537,
|
| 3206 |
+
"eval_samples_per_second": 126.766,
|
| 3207 |
+
"eval_steps_per_second": 15.854,
|
| 3208 |
+
"step": 2050
|
| 3209 |
}
|
| 3210 |
],
|
| 3211 |
"logging_steps": 5,
|
|
|
|
| 3234 |
"attributes": {}
|
| 3235 |
}
|
| 3236 |
},
|
| 3237 |
+
"total_flos": 1.0570918347292017e+18,
|
| 3238 |
"train_batch_size": 2,
|
| 3239 |
"trial_name": null,
|
| 3240 |
"trial_params": null
|