Training in progress, step 480000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:125a953eae139b71c861d769dd5fcb3f2876cb3ba5332474ace67ff7903ad282
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cb09afc7f60da26cfbf13286b33ee5d8eaf949d0691655a730175e631e257c3
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8310a18059ec0119b7d5189ac12986b598d8b8b1ef1bbfc8c8957369e8337ad7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4a54b152e9b8fc53442ea8e45557e4a5adac5097977f729107da5bb580c1c20
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c3be6e2beda8fd5ff4d10e6a6d31003cc62098e71db295fd431821efbbfb1a9
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14567
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1acdc1aa6e4187c8d0aa9e0711043619de98803eb54f6ce34ce5eccae47291d
|
| 3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d89c33267da2eca03288d19643b70286b13de68f683e137d6b6c77f428e64db6
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 7.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -3484,11 +3484,85 @@
|
|
| 3484 |
"eval_samples_per_second": 1116.774,
|
| 3485 |
"eval_steps_per_second": 17.868,
|
| 3486 |
"step": 470000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3487 |
}
|
| 3488 |
],
|
| 3489 |
"max_steps": 1000000,
|
| 3490 |
"num_train_epochs": 16,
|
| 3491 |
-
"total_flos": 3.
|
| 3492 |
"trial_name": null,
|
| 3493 |
"trial_params": null
|
| 3494 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.329699024233817,
|
| 5 |
+
"global_step": 480000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 3484 |
"eval_samples_per_second": 1116.774,
|
| 3485 |
"eval_steps_per_second": 17.868,
|
| 3486 |
"step": 470000
|
| 3487 |
+
},
|
| 3488 |
+
{
|
| 3489 |
+
"epoch": 7.19,
|
| 3490 |
+
"learning_rate": 9.243390115142761e-05,
|
| 3491 |
+
"loss": 0.2678,
|
| 3492 |
+
"step": 471000
|
| 3493 |
+
},
|
| 3494 |
+
{
|
| 3495 |
+
"epoch": 7.21,
|
| 3496 |
+
"learning_rate": 9.220602892445661e-05,
|
| 3497 |
+
"loss": 0.2678,
|
| 3498 |
+
"step": 472000
|
| 3499 |
+
},
|
| 3500 |
+
{
|
| 3501 |
+
"epoch": 7.22,
|
| 3502 |
+
"learning_rate": 9.197802321430889e-05,
|
| 3503 |
+
"loss": 0.2679,
|
| 3504 |
+
"step": 473000
|
| 3505 |
+
},
|
| 3506 |
+
{
|
| 3507 |
+
"epoch": 7.24,
|
| 3508 |
+
"learning_rate": 9.174988651441833e-05,
|
| 3509 |
+
"loss": 0.2673,
|
| 3510 |
+
"step": 474000
|
| 3511 |
+
},
|
| 3512 |
+
{
|
| 3513 |
+
"epoch": 7.25,
|
| 3514 |
+
"learning_rate": 9.152162131965137e-05,
|
| 3515 |
+
"loss": 0.2675,
|
| 3516 |
+
"step": 475000
|
| 3517 |
+
},
|
| 3518 |
+
{
|
| 3519 |
+
"epoch": 7.25,
|
| 3520 |
+
"eval_runtime": 1.0353,
|
| 3521 |
+
"eval_samples_per_second": 965.922,
|
| 3522 |
+
"eval_steps_per_second": 15.455,
|
| 3523 |
+
"step": 475000
|
| 3524 |
+
},
|
| 3525 |
+
{
|
| 3526 |
+
"epoch": 7.27,
|
| 3527 |
+
"learning_rate": 9.129323012627956e-05,
|
| 3528 |
+
"loss": 0.2693,
|
| 3529 |
+
"step": 476000
|
| 3530 |
+
},
|
| 3531 |
+
{
|
| 3532 |
+
"epoch": 7.28,
|
| 3533 |
+
"learning_rate": 9.106471543195244e-05,
|
| 3534 |
+
"loss": 0.2675,
|
| 3535 |
+
"step": 477000
|
| 3536 |
+
},
|
| 3537 |
+
{
|
| 3538 |
+
"epoch": 7.3,
|
| 3539 |
+
"learning_rate": 9.08360797356701e-05,
|
| 3540 |
+
"loss": 0.2679,
|
| 3541 |
+
"step": 478000
|
| 3542 |
+
},
|
| 3543 |
+
{
|
| 3544 |
+
"epoch": 7.31,
|
| 3545 |
+
"learning_rate": 9.060732553775582e-05,
|
| 3546 |
+
"loss": 0.2672,
|
| 3547 |
+
"step": 479000
|
| 3548 |
+
},
|
| 3549 |
+
{
|
| 3550 |
+
"epoch": 7.33,
|
| 3551 |
+
"learning_rate": 9.037845533982892e-05,
|
| 3552 |
+
"loss": 0.267,
|
| 3553 |
+
"step": 480000
|
| 3554 |
+
},
|
| 3555 |
+
{
|
| 3556 |
+
"epoch": 7.33,
|
| 3557 |
+
"eval_runtime": 1.0347,
|
| 3558 |
+
"eval_samples_per_second": 966.468,
|
| 3559 |
+
"eval_steps_per_second": 15.463,
|
| 3560 |
+
"step": 480000
|
| 3561 |
}
|
| 3562 |
],
|
| 3563 |
"max_steps": 1000000,
|
| 3564 |
"num_train_epochs": 16,
|
| 3565 |
+
"total_flos": 3.3648079299796217e+22,
|
| 3566 |
"trial_name": null,
|
| 3567 |
"trial_params": null
|
| 3568 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cb09afc7f60da26cfbf13286b33ee5d8eaf949d0691655a730175e631e257c3
|
| 3 |
size 449471589
|