Training in progress, step 870000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e93b07dd856f0dcb5f8d337f717d57dfe290919f17cc810a0ab94d8971b7b8e
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10f95eaff5ba49e944cce01defd24612a1c006bd8b68e7d8b6c201dec46570c0
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be9a5b19b5e6086c1b46537730410e6434224c5e3b8a150ae1f6ffa0b0403da5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d406f3a150dc327386b1fe7c40ac833ace65767a7a63b870db21104ccf5eb518
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e79e63475cece03ae5d2720177141fff64fcde5033ba647f27c46dc1cce1e92
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64496346cbbe377de7eb74096cb4a928e4b77370483c82ca56a5a6927676dd2a
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d368f4f79d735aeb82977d11fd8d84913a3919ff8ecbae0982e3d606c331447e
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 13.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6370,11 +6370,85 @@
|
|
| 6370 |
"eval_samples_per_second": 1321.327,
|
| 6371 |
"eval_steps_per_second": 21.141,
|
| 6372 |
"step": 860000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6373 |
}
|
| 6374 |
],
|
| 6375 |
"max_steps": 1000000,
|
| 6376 |
"num_train_epochs": 16,
|
| 6377 |
-
"total_flos": 6.
|
| 6378 |
"trial_name": null,
|
| 6379 |
"trial_params": null
|
| 6380 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 13.285079481423795,
|
| 5 |
+
"global_step": 870000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6370 |
"eval_samples_per_second": 1321.327,
|
| 6371 |
"eval_steps_per_second": 21.141,
|
| 6372 |
"step": 860000
|
| 6373 |
+
},
|
| 6374 |
+
{
|
| 6375 |
+
"epoch": 13.15,
|
| 6376 |
+
"learning_rate": 1.726590597701708e-05,
|
| 6377 |
+
"loss": 0.2322,
|
| 6378 |
+
"step": 861000
|
| 6379 |
+
},
|
| 6380 |
+
{
|
| 6381 |
+
"epoch": 13.16,
|
| 6382 |
+
"learning_rate": 1.7163551091488952e-05,
|
| 6383 |
+
"loss": 0.2375,
|
| 6384 |
+
"step": 862000
|
| 6385 |
+
},
|
| 6386 |
+
{
|
| 6387 |
+
"epoch": 13.18,
|
| 6388 |
+
"learning_rate": 1.7061883375298788e-05,
|
| 6389 |
+
"loss": 0.2328,
|
| 6390 |
+
"step": 863000
|
| 6391 |
+
},
|
| 6392 |
+
{
|
| 6393 |
+
"epoch": 13.19,
|
| 6394 |
+
"learning_rate": 1.6960903940268456e-05,
|
| 6395 |
+
"loss": 0.2323,
|
| 6396 |
+
"step": 864000
|
| 6397 |
+
},
|
| 6398 |
+
{
|
| 6399 |
+
"epoch": 13.21,
|
| 6400 |
+
"learning_rate": 1.6860613890692876e-05,
|
| 6401 |
+
"loss": 0.2334,
|
| 6402 |
+
"step": 865000
|
| 6403 |
+
},
|
| 6404 |
+
{
|
| 6405 |
+
"epoch": 13.21,
|
| 6406 |
+
"eval_runtime": 0.7389,
|
| 6407 |
+
"eval_samples_per_second": 1353.416,
|
| 6408 |
+
"eval_steps_per_second": 21.655,
|
| 6409 |
+
"step": 865000
|
| 6410 |
+
},
|
| 6411 |
+
{
|
| 6412 |
+
"epoch": 13.22,
|
| 6413 |
+
"learning_rate": 1.6761014323327962e-05,
|
| 6414 |
+
"loss": 0.233,
|
| 6415 |
+
"step": 866000
|
| 6416 |
+
},
|
| 6417 |
+
{
|
| 6418 |
+
"epoch": 13.24,
|
| 6419 |
+
"learning_rate": 1.6662106327378645e-05,
|
| 6420 |
+
"loss": 0.2334,
|
| 6421 |
+
"step": 867000
|
| 6422 |
+
},
|
| 6423 |
+
{
|
| 6424 |
+
"epoch": 13.25,
|
| 6425 |
+
"learning_rate": 1.6563890984486884e-05,
|
| 6426 |
+
"loss": 0.2333,
|
| 6427 |
+
"step": 868000
|
| 6428 |
+
},
|
| 6429 |
+
{
|
| 6430 |
+
"epoch": 13.27,
|
| 6431 |
+
"learning_rate": 1.6466369368719955e-05,
|
| 6432 |
+
"loss": 0.2324,
|
| 6433 |
+
"step": 869000
|
| 6434 |
+
},
|
| 6435 |
+
{
|
| 6436 |
+
"epoch": 13.29,
|
| 6437 |
+
"learning_rate": 1.6369542546558626e-05,
|
| 6438 |
+
"loss": 0.2324,
|
| 6439 |
+
"step": 870000
|
| 6440 |
+
},
|
| 6441 |
+
{
|
| 6442 |
+
"epoch": 13.29,
|
| 6443 |
+
"eval_runtime": 0.8823,
|
| 6444 |
+
"eval_samples_per_second": 1133.455,
|
| 6445 |
+
"eval_steps_per_second": 18.135,
|
| 6446 |
+
"step": 870000
|
| 6447 |
}
|
| 6448 |
],
|
| 6449 |
"max_steps": 1000000,
|
| 6450 |
"num_train_epochs": 16,
|
| 6451 |
+
"total_flos": 6.098714167754268e+22,
|
| 6452 |
"trial_name": null,
|
| 6453 |
"trial_params": null
|
| 6454 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10f95eaff5ba49e944cce01defd24612a1c006bd8b68e7d8b6c201dec46570c0
|
| 3 |
size 449471589
|