Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:771f7595253335d0f7b3e5d9548620ff920977b25d1013493890387e97d73a3d
|
| 3 |
size 1037269336
|
last-checkpoint/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9db33c2934109344e9790a6a24923827069a5cbff5be4dfeed5abed210416129
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:364e2e4fe8bf27c48c8f22149e5bd025ff98fd03141a0e70f24ca3970e7aaa3c
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55a2042c1c983f42df9ed5fb3f3fafa7a0335dbe50cfa52e7369122eaabfc304
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28737b93e49b1723caea50a45e6cadf6ea49a3cc984e46af27919d5adfe9bb18
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step4000/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04e0eca13b582c26cfb2623ba0e865f3d272dfc90cc1e2db92804e4596e915c3
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step4000
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7738b79cde91732aa1ae36546c20e2adfb138db06ede459f3546964f4c72f003
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c87bb0bbd4a5d934e9e0ee64426668f65a3c0671e53f80788bd09202aaa80ce
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3438bbb08774094f199cd5833a18b6fec0ce5cda0f318f97029e7d59620cafc6
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4893134b5c11d042dab70821374bd20a7f7800fefcc8fad1ea78520c80bfcce6
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7278ee28e675006b1a18eabb528c5e753ec5c79a4c5c843c134b5fc72246eac3
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6092,6 +6092,162 @@
|
|
| 6092 |
"eval_samples_per_second": 173.341,
|
| 6093 |
"eval_steps_per_second": 10.87,
|
| 6094 |
"step": 3900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6095 |
}
|
| 6096 |
],
|
| 6097 |
"logging_steps": 5,
|
|
@@ -6120,7 +6276,7 @@
|
|
| 6120 |
"attributes": {}
|
| 6121 |
}
|
| 6122 |
},
|
| 6123 |
-
"total_flos": 1.
|
| 6124 |
"train_batch_size": 4,
|
| 6125 |
"trial_name": null,
|
| 6126 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.9395991563796997,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5814798662596308,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 4000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6092 |
"eval_samples_per_second": 173.341,
|
| 6093 |
"eval_steps_per_second": 10.87,
|
| 6094 |
"step": 3900
|
| 6095 |
+
},
|
| 6096 |
+
{
|
| 6097 |
+
"epoch": 0.5676697194359646,
|
| 6098 |
+
"grad_norm": 2.6036624908447266,
|
| 6099 |
+
"learning_rate": 6.791836657414602e-05,
|
| 6100 |
+
"loss": 2.1123,
|
| 6101 |
+
"step": 3905
|
| 6102 |
+
},
|
| 6103 |
+
{
|
| 6104 |
+
"epoch": 0.5683965692687891,
|
| 6105 |
+
"grad_norm": 2.3896546363830566,
|
| 6106 |
+
"learning_rate": 6.784405353654967e-05,
|
| 6107 |
+
"loss": 1.9911,
|
| 6108 |
+
"step": 3910
|
| 6109 |
+
},
|
| 6110 |
+
{
|
| 6111 |
+
"epoch": 0.5691234191016136,
|
| 6112 |
+
"grad_norm": 2.328312635421753,
|
| 6113 |
+
"learning_rate": 6.776969582421008e-05,
|
| 6114 |
+
"loss": 2.15,
|
| 6115 |
+
"step": 3915
|
| 6116 |
+
},
|
| 6117 |
+
{
|
| 6118 |
+
"epoch": 0.5698502689344381,
|
| 6119 |
+
"grad_norm": 2.710876941680908,
|
| 6120 |
+
"learning_rate": 6.769529362432273e-05,
|
| 6121 |
+
"loss": 1.9971,
|
| 6122 |
+
"step": 3920
|
| 6123 |
+
},
|
| 6124 |
+
{
|
| 6125 |
+
"epoch": 0.5705771187672627,
|
| 6126 |
+
"grad_norm": 2.569784164428711,
|
| 6127 |
+
"learning_rate": 6.762084712419506e-05,
|
| 6128 |
+
"loss": 2.0124,
|
| 6129 |
+
"step": 3925
|
| 6130 |
+
},
|
| 6131 |
+
{
|
| 6132 |
+
"epoch": 0.5713039686000873,
|
| 6133 |
+
"grad_norm": 2.488879919052124,
|
| 6134 |
+
"learning_rate": 6.754635651124603e-05,
|
| 6135 |
+
"loss": 2.0063,
|
| 6136 |
+
"step": 3930
|
| 6137 |
+
},
|
| 6138 |
+
{
|
| 6139 |
+
"epoch": 0.5720308184329118,
|
| 6140 |
+
"grad_norm": 2.3385536670684814,
|
| 6141 |
+
"learning_rate": 6.747182197300568e-05,
|
| 6142 |
+
"loss": 1.9629,
|
| 6143 |
+
"step": 3935
|
| 6144 |
+
},
|
| 6145 |
+
{
|
| 6146 |
+
"epoch": 0.5727576682657363,
|
| 6147 |
+
"grad_norm": 2.078852415084839,
|
| 6148 |
+
"learning_rate": 6.739724369711464e-05,
|
| 6149 |
+
"loss": 1.8292,
|
| 6150 |
+
"step": 3940
|
| 6151 |
+
},
|
| 6152 |
+
{
|
| 6153 |
+
"epoch": 0.5734845180985608,
|
| 6154 |
+
"grad_norm": 2.723219156265259,
|
| 6155 |
+
"learning_rate": 6.732262187132362e-05,
|
| 6156 |
+
"loss": 1.9587,
|
| 6157 |
+
"step": 3945
|
| 6158 |
+
},
|
| 6159 |
+
{
|
| 6160 |
+
"epoch": 0.5742113679313854,
|
| 6161 |
+
"grad_norm": 2.4456677436828613,
|
| 6162 |
+
"learning_rate": 6.724795668349295e-05,
|
| 6163 |
+
"loss": 2.1195,
|
| 6164 |
+
"step": 3950
|
| 6165 |
+
},
|
| 6166 |
+
{
|
| 6167 |
+
"epoch": 0.5742113679313854,
|
| 6168 |
+
"eval_loss": 1.9503754377365112,
|
| 6169 |
+
"eval_runtime": 21.006,
|
| 6170 |
+
"eval_samples_per_second": 157.145,
|
| 6171 |
+
"eval_steps_per_second": 9.854,
|
| 6172 |
+
"step": 3950
|
| 6173 |
+
},
|
| 6174 |
+
{
|
| 6175 |
+
"epoch": 0.57493821776421,
|
| 6176 |
+
"grad_norm": 2.2807230949401855,
|
| 6177 |
+
"learning_rate": 6.71732483215922e-05,
|
| 6178 |
+
"loss": 2.0122,
|
| 6179 |
+
"step": 3955
|
| 6180 |
+
},
|
| 6181 |
+
{
|
| 6182 |
+
"epoch": 0.5756650675970345,
|
| 6183 |
+
"grad_norm": 2.6762518882751465,
|
| 6184 |
+
"learning_rate": 6.709849697369953e-05,
|
| 6185 |
+
"loss": 2.1176,
|
| 6186 |
+
"step": 3960
|
| 6187 |
+
},
|
| 6188 |
+
{
|
| 6189 |
+
"epoch": 0.576391917429859,
|
| 6190 |
+
"grad_norm": 2.549398899078369,
|
| 6191 |
+
"learning_rate": 6.70237028280014e-05,
|
| 6192 |
+
"loss": 2.1504,
|
| 6193 |
+
"step": 3965
|
| 6194 |
+
},
|
| 6195 |
+
{
|
| 6196 |
+
"epoch": 0.5771187672626835,
|
| 6197 |
+
"grad_norm": 2.400339365005493,
|
| 6198 |
+
"learning_rate": 6.6948866072792e-05,
|
| 6199 |
+
"loss": 2.1282,
|
| 6200 |
+
"step": 3970
|
| 6201 |
+
},
|
| 6202 |
+
{
|
| 6203 |
+
"epoch": 0.5778456170955081,
|
| 6204 |
+
"grad_norm": 2.5607948303222656,
|
| 6205 |
+
"learning_rate": 6.687398689647273e-05,
|
| 6206 |
+
"loss": 2.0596,
|
| 6207 |
+
"step": 3975
|
| 6208 |
+
},
|
| 6209 |
+
{
|
| 6210 |
+
"epoch": 0.5785724669283326,
|
| 6211 |
+
"grad_norm": 2.790510892868042,
|
| 6212 |
+
"learning_rate": 6.679906548755185e-05,
|
| 6213 |
+
"loss": 2.0354,
|
| 6214 |
+
"step": 3980
|
| 6215 |
+
},
|
| 6216 |
+
{
|
| 6217 |
+
"epoch": 0.5792993167611571,
|
| 6218 |
+
"grad_norm": 2.543358325958252,
|
| 6219 |
+
"learning_rate": 6.672410203464392e-05,
|
| 6220 |
+
"loss": 2.2136,
|
| 6221 |
+
"step": 3985
|
| 6222 |
+
},
|
| 6223 |
+
{
|
| 6224 |
+
"epoch": 0.5800261665939817,
|
| 6225 |
+
"grad_norm": 2.59621524810791,
|
| 6226 |
+
"learning_rate": 6.664909672646934e-05,
|
| 6227 |
+
"loss": 2.1201,
|
| 6228 |
+
"step": 3990
|
| 6229 |
+
},
|
| 6230 |
+
{
|
| 6231 |
+
"epoch": 0.5807530164268062,
|
| 6232 |
+
"grad_norm": 2.42059063911438,
|
| 6233 |
+
"learning_rate": 6.657404975185387e-05,
|
| 6234 |
+
"loss": 2.0,
|
| 6235 |
+
"step": 3995
|
| 6236 |
+
},
|
| 6237 |
+
{
|
| 6238 |
+
"epoch": 0.5814798662596308,
|
| 6239 |
+
"grad_norm": 2.4144132137298584,
|
| 6240 |
+
"learning_rate": 6.64989612997282e-05,
|
| 6241 |
+
"loss": 2.146,
|
| 6242 |
+
"step": 4000
|
| 6243 |
+
},
|
| 6244 |
+
{
|
| 6245 |
+
"epoch": 0.5814798662596308,
|
| 6246 |
+
"eval_loss": 1.9395991563796997,
|
| 6247 |
+
"eval_runtime": 19.1182,
|
| 6248 |
+
"eval_samples_per_second": 172.663,
|
| 6249 |
+
"eval_steps_per_second": 10.827,
|
| 6250 |
+
"step": 4000
|
| 6251 |
}
|
| 6252 |
],
|
| 6253 |
"logging_steps": 5,
|
|
|
|
| 6276 |
"attributes": {}
|
| 6277 |
}
|
| 6278 |
},
|
| 6279 |
+
"total_flos": 1.0434609863437844e+18,
|
| 6280 |
"train_batch_size": 4,
|
| 6281 |
"trial_name": null,
|
| 6282 |
"trial_params": null
|