Training in progress, step 880000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f011531683b29e6010c43a1101dacea9ed4a18363936f0bda8424b0b64fd61d5
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:761b97519f5fabc1226f543fdfda1e57b01e50e22126717fe93929f009e6d948
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:935da3b045c8c8b8ce754d2c39cf0981b2085a82929dd47cd40a448687388e6a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbdeea7578a634cee6754d432445c1d3c22f4d3700bcbfbce695f5cbf096739a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6c5ff5b48c10910314f1846b5d56765a69ff07e0a1c2179950e7e708be45c72
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:538db9627b3319da412d82dd2d788799c67c9fd4db76e45825c3a463b4ec891a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cefdc80aebec1bb7388820de673e9b9482e6730079e6308c4991cb6f1f6a1673
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:078a7ec7b0b49e632a586c20c54dd7869580bfe51e6466659e4a1c049cec1d5c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0ee34e421a4579fb938367d875bd6d0e9cb1e7cc940564954160dbbd10ab58c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7f6c3ac5b3d9ff9275bcc6f2b68cf34b2b33a4d26ba0d17a6dea955ffb4848b
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6c3b9739e151e083ac7876c028ada3eeb37ab066447f172eee69fac580c5323
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10446,11 +10446,131 @@
|
|
| 10446 |
"learning_rate": 1.6369542546558626e-05,
|
| 10447 |
"loss": 0.2864,
|
| 10448 |
"step": 870000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10449 |
}
|
| 10450 |
],
|
| 10451 |
"max_steps": 1000000,
|
| 10452 |
"num_train_epochs": 2,
|
| 10453 |
-
"total_flos": 5.
|
| 10454 |
"trial_name": null,
|
| 10455 |
"trial_params": null
|
| 10456 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2989453209078372,
|
| 5 |
+
"global_step": 880000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10446 |
"learning_rate": 1.6369542546558626e-05,
|
| 10447 |
"loss": 0.2864,
|
| 10448 |
"step": 870000
|
| 10449 |
+
},
|
| 10450 |
+
{
|
| 10451 |
+
"epoch": 1.28,
|
| 10452 |
+
"learning_rate": 1.6321390014277996e-05,
|
| 10453 |
+
"loss": 0.2868,
|
| 10454 |
+
"step": 870500
|
| 10455 |
+
},
|
| 10456 |
+
{
|
| 10457 |
+
"epoch": 1.28,
|
| 10458 |
+
"learning_rate": 1.6273411576885517e-05,
|
| 10459 |
+
"loss": 0.2869,
|
| 10460 |
+
"step": 871000
|
| 10461 |
+
},
|
| 10462 |
+
{
|
| 10463 |
+
"epoch": 1.28,
|
| 10464 |
+
"learning_rate": 1.6225607365552378e-05,
|
| 10465 |
+
"loss": 0.2863,
|
| 10466 |
+
"step": 871500
|
| 10467 |
+
},
|
| 10468 |
+
{
|
| 10469 |
+
"epoch": 1.28,
|
| 10470 |
+
"learning_rate": 1.617797751097349e-05,
|
| 10471 |
+
"loss": 0.2866,
|
| 10472 |
+
"step": 872000
|
| 10473 |
+
},
|
| 10474 |
+
{
|
| 10475 |
+
"epoch": 1.28,
|
| 10476 |
+
"learning_rate": 1.6130522143367032e-05,
|
| 10477 |
+
"loss": 0.2868,
|
| 10478 |
+
"step": 872500
|
| 10479 |
+
},
|
| 10480 |
+
{
|
| 10481 |
+
"epoch": 1.28,
|
| 10482 |
+
"learning_rate": 1.608324139247421e-05,
|
| 10483 |
+
"loss": 0.2864,
|
| 10484 |
+
"step": 873000
|
| 10485 |
+
},
|
| 10486 |
+
{
|
| 10487 |
+
"epoch": 1.29,
|
| 10488 |
+
"learning_rate": 1.6036135387558756e-05,
|
| 10489 |
+
"loss": 0.2862,
|
| 10490 |
+
"step": 873500
|
| 10491 |
+
},
|
| 10492 |
+
{
|
| 10493 |
+
"epoch": 1.29,
|
| 10494 |
+
"learning_rate": 1.5989204257406693e-05,
|
| 10495 |
+
"loss": 0.286,
|
| 10496 |
+
"step": 874000
|
| 10497 |
+
},
|
| 10498 |
+
{
|
| 10499 |
+
"epoch": 1.29,
|
| 10500 |
+
"learning_rate": 1.594244813032595e-05,
|
| 10501 |
+
"loss": 0.286,
|
| 10502 |
+
"step": 874500
|
| 10503 |
+
},
|
| 10504 |
+
{
|
| 10505 |
+
"epoch": 1.29,
|
| 10506 |
+
"learning_rate": 1.5895867134145974e-05,
|
| 10507 |
+
"loss": 0.2861,
|
| 10508 |
+
"step": 875000
|
| 10509 |
+
},
|
| 10510 |
+
{
|
| 10511 |
+
"epoch": 1.29,
|
| 10512 |
+
"learning_rate": 1.5849461396217467e-05,
|
| 10513 |
+
"loss": 0.2855,
|
| 10514 |
+
"step": 875500
|
| 10515 |
+
},
|
| 10516 |
+
{
|
| 10517 |
+
"epoch": 1.29,
|
| 10518 |
+
"learning_rate": 1.5803231043411912e-05,
|
| 10519 |
+
"loss": 0.286,
|
| 10520 |
+
"step": 876000
|
| 10521 |
+
},
|
| 10522 |
+
{
|
| 10523 |
+
"epoch": 1.29,
|
| 10524 |
+
"learning_rate": 1.575717620212132e-05,
|
| 10525 |
+
"loss": 0.2867,
|
| 10526 |
+
"step": 876500
|
| 10527 |
+
},
|
| 10528 |
+
{
|
| 10529 |
+
"epoch": 1.29,
|
| 10530 |
+
"learning_rate": 1.5711296998257902e-05,
|
| 10531 |
+
"loss": 0.2867,
|
| 10532 |
+
"step": 877000
|
| 10533 |
+
},
|
| 10534 |
+
{
|
| 10535 |
+
"epoch": 1.29,
|
| 10536 |
+
"learning_rate": 1.5665593557253623e-05,
|
| 10537 |
+
"loss": 0.2865,
|
| 10538 |
+
"step": 877500
|
| 10539 |
+
},
|
| 10540 |
+
{
|
| 10541 |
+
"epoch": 1.29,
|
| 10542 |
+
"learning_rate": 1.562006600405996e-05,
|
| 10543 |
+
"loss": 0.286,
|
| 10544 |
+
"step": 878000
|
| 10545 |
+
},
|
| 10546 |
+
{
|
| 10547 |
+
"epoch": 1.3,
|
| 10548 |
+
"learning_rate": 1.5574714463147512e-05,
|
| 10549 |
+
"loss": 0.2857,
|
| 10550 |
+
"step": 878500
|
| 10551 |
+
},
|
| 10552 |
+
{
|
| 10553 |
+
"epoch": 1.3,
|
| 10554 |
+
"learning_rate": 1.5529539058505624e-05,
|
| 10555 |
+
"loss": 0.286,
|
| 10556 |
+
"step": 879000
|
| 10557 |
+
},
|
| 10558 |
+
{
|
| 10559 |
+
"epoch": 1.3,
|
| 10560 |
+
"learning_rate": 1.5484539913642175e-05,
|
| 10561 |
+
"loss": 0.2862,
|
| 10562 |
+
"step": 879500
|
| 10563 |
+
},
|
| 10564 |
+
{
|
| 10565 |
+
"epoch": 1.3,
|
| 10566 |
+
"learning_rate": 1.543971715158307e-05,
|
| 10567 |
+
"loss": 0.2863,
|
| 10568 |
+
"step": 880000
|
| 10569 |
}
|
| 10570 |
],
|
| 10571 |
"max_steps": 1000000,
|
| 10572 |
"num_train_epochs": 2,
|
| 10573 |
+
"total_flos": 5.949418752030229e+22,
|
| 10574 |
"trial_name": null,
|
| 10575 |
"trial_params": null
|
| 10576 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:761b97519f5fabc1226f543fdfda1e57b01e50e22126717fe93929f009e6d948
|
| 3 |
size 449450757
|