Training in progress, step 850000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f5555a3264f90296bf2491c36524039e7c271dde5eda23b82bfd2628385f7d8
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b915770d637a85b511b8ce95890fcc501fb257fa086404572aeee5022ceaa62
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8772dddd6d938192c990459978e84fe841726f65afecb856e653e8f92d71ae84
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:746d31cdff1a7ba11b9440e11b64572184b75f7cc296f7a28b273c21a8883fc1
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f51720bf9dead85db06417d336f89d75a1f13549e41b0ebc50dae00a0a00b614
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce8c9d7e6207f2bddc524a75c48baf98682e25fb31bb85ed872f40875802e4db
|
| 3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fbbdae9c95471a40e6c6d019353b081fa6055bc839ed4f2163c0c1b80837934
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 12.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6222,11 +6222,85 @@
|
|
| 6222 |
"eval_samples_per_second": 1334.897,
|
| 6223 |
"eval_steps_per_second": 21.358,
|
| 6224 |
"step": 840000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6225 |
}
|
| 6226 |
],
|
| 6227 |
"max_steps": 1000000,
|
| 6228 |
"num_train_epochs": 16,
|
| 6229 |
-
"total_flos": 5.
|
| 6230 |
"trial_name": null,
|
| 6231 |
"trial_params": null
|
| 6232 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.979675355414052,
|
| 5 |
+
"global_step": 850000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6222 |
"eval_samples_per_second": 1334.897,
|
| 6223 |
"eval_steps_per_second": 21.358,
|
| 6224 |
"step": 840000
|
| 6225 |
+
},
|
| 6226 |
+
{
|
| 6227 |
+
"epoch": 12.84,
|
| 6228 |
+
"learning_rate": 1.945553091971727e-05,
|
| 6229 |
+
"loss": 0.2334,
|
| 6230 |
+
"step": 841000
|
| 6231 |
+
},
|
| 6232 |
+
{
|
| 6233 |
+
"epoch": 12.86,
|
| 6234 |
+
"learning_rate": 1.933967761107847e-05,
|
| 6235 |
+
"loss": 0.234,
|
| 6236 |
+
"step": 842000
|
| 6237 |
+
},
|
| 6238 |
+
{
|
| 6239 |
+
"epoch": 12.87,
|
| 6240 |
+
"learning_rate": 1.9224487674006694e-05,
|
| 6241 |
+
"loss": 0.234,
|
| 6242 |
+
"step": 843000
|
| 6243 |
+
},
|
| 6244 |
+
{
|
| 6245 |
+
"epoch": 12.89,
|
| 6246 |
+
"learning_rate": 1.9109962368200602e-05,
|
| 6247 |
+
"loss": 0.2379,
|
| 6248 |
+
"step": 844000
|
| 6249 |
+
},
|
| 6250 |
+
{
|
| 6251 |
+
"epoch": 12.9,
|
| 6252 |
+
"learning_rate": 1.8996102946090586e-05,
|
| 6253 |
+
"loss": 0.2335,
|
| 6254 |
+
"step": 845000
|
| 6255 |
+
},
|
| 6256 |
+
{
|
| 6257 |
+
"epoch": 12.9,
|
| 6258 |
+
"eval_runtime": 0.7039,
|
| 6259 |
+
"eval_samples_per_second": 1420.612,
|
| 6260 |
+
"eval_steps_per_second": 22.73,
|
| 6261 |
+
"step": 845000
|
| 6262 |
+
},
|
| 6263 |
+
{
|
| 6264 |
+
"epoch": 12.92,
|
| 6265 |
+
"learning_rate": 1.888291065282509e-05,
|
| 6266 |
+
"loss": 0.2338,
|
| 6267 |
+
"step": 846000
|
| 6268 |
+
},
|
| 6269 |
+
{
|
| 6270 |
+
"epoch": 12.93,
|
| 6271 |
+
"learning_rate": 1.8770386726256865e-05,
|
| 6272 |
+
"loss": 0.2329,
|
| 6273 |
+
"step": 847000
|
| 6274 |
+
},
|
| 6275 |
+
{
|
| 6276 |
+
"epoch": 12.95,
|
| 6277 |
+
"learning_rate": 1.8658532396929565e-05,
|
| 6278 |
+
"loss": 0.2334,
|
| 6279 |
+
"step": 848000
|
| 6280 |
+
},
|
| 6281 |
+
{
|
| 6282 |
+
"epoch": 12.96,
|
| 6283 |
+
"learning_rate": 1.8547348888064178e-05,
|
| 6284 |
+
"loss": 0.2341,
|
| 6285 |
+
"step": 849000
|
| 6286 |
+
},
|
| 6287 |
+
{
|
| 6288 |
+
"epoch": 12.98,
|
| 6289 |
+
"learning_rate": 1.8436837415545772e-05,
|
| 6290 |
+
"loss": 0.2356,
|
| 6291 |
+
"step": 850000
|
| 6292 |
+
},
|
| 6293 |
+
{
|
| 6294 |
+
"epoch": 12.98,
|
| 6295 |
+
"eval_runtime": 0.8308,
|
| 6296 |
+
"eval_samples_per_second": 1203.685,
|
| 6297 |
+
"eval_steps_per_second": 19.259,
|
| 6298 |
+
"step": 850000
|
| 6299 |
}
|
| 6300 |
],
|
| 6301 |
"max_steps": 1000000,
|
| 6302 |
"num_train_epochs": 16,
|
| 6303 |
+
"total_flos": 5.958514302761722e+22,
|
| 6304 |
"trial_name": null,
|
| 6305 |
"trial_params": null
|
| 6306 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b915770d637a85b511b8ce95890fcc501fb257fa086404572aeee5022ceaa62
|
| 3 |
size 449471589
|