Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bba175f94af3131ddc7e585c8d0c85376ebd1433f20a5a01a35d8488fc39885b
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc6bd31ef6b75d2ff57b791613279c5afe6c8244312a64f00fb084519b8aaac6
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9606,11 +9606,211 @@
|
|
| 9606 |
"eval_samples_per_second": 1170.393,
|
| 9607 |
"eval_steps_per_second": 18.343,
|
| 9608 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9609 |
}
|
| 9610 |
],
|
| 9611 |
"max_steps": 500000,
|
| 9612 |
"num_train_epochs": 12,
|
| 9613 |
-
"total_flos": 1.
|
| 9614 |
"trial_name": null,
|
| 9615 |
"trial_params": null
|
| 9616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.929693076374019,
|
| 5 |
+
"global_step": 490000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9606 |
"eval_samples_per_second": 1170.393,
|
| 9607 |
"eval_steps_per_second": 18.343,
|
| 9608 |
"step": 480000
|
| 9609 |
+
},
|
| 9610 |
+
{
|
| 9611 |
+
"epoch": 10.72,
|
| 9612 |
+
"learning_rate": 1.1204252864868377e-05,
|
| 9613 |
+
"loss": 0.2538,
|
| 9614 |
+
"step": 480500
|
| 9615 |
+
},
|
| 9616 |
+
{
|
| 9617 |
+
"epoch": 10.73,
|
| 9618 |
+
"learning_rate": 1.1143368309400725e-05,
|
| 9619 |
+
"loss": 0.2539,
|
| 9620 |
+
"step": 481000
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 10.73,
|
| 9624 |
+
"eval_loss": 0.23603801429271698,
|
| 9625 |
+
"eval_runtime": 1.9718,
|
| 9626 |
+
"eval_samples_per_second": 1164.912,
|
| 9627 |
+
"eval_steps_per_second": 18.257,
|
| 9628 |
+
"step": 481000
|
| 9629 |
+
},
|
| 9630 |
+
{
|
| 9631 |
+
"epoch": 10.74,
|
| 9632 |
+
"learning_rate": 1.1084056947009348e-05,
|
| 9633 |
+
"loss": 0.2538,
|
| 9634 |
+
"step": 481500
|
| 9635 |
+
},
|
| 9636 |
+
{
|
| 9637 |
+
"epoch": 10.75,
|
| 9638 |
+
"learning_rate": 1.1026319426313837e-05,
|
| 9639 |
+
"loss": 0.2538,
|
| 9640 |
+
"step": 482000
|
| 9641 |
+
},
|
| 9642 |
+
{
|
| 9643 |
+
"epoch": 10.75,
|
| 9644 |
+
"eval_loss": 0.23858527839183807,
|
| 9645 |
+
"eval_runtime": 1.961,
|
| 9646 |
+
"eval_samples_per_second": 1171.312,
|
| 9647 |
+
"eval_steps_per_second": 18.358,
|
| 9648 |
+
"step": 482000
|
| 9649 |
+
},
|
| 9650 |
+
{
|
| 9651 |
+
"epoch": 10.76,
|
| 9652 |
+
"learning_rate": 1.097015637872247e-05,
|
| 9653 |
+
"loss": 0.2538,
|
| 9654 |
+
"step": 482500
|
| 9655 |
+
},
|
| 9656 |
+
{
|
| 9657 |
+
"epoch": 10.77,
|
| 9658 |
+
"learning_rate": 1.0915568418425301e-05,
|
| 9659 |
+
"loss": 0.2537,
|
| 9660 |
+
"step": 483000
|
| 9661 |
+
},
|
| 9662 |
+
{
|
| 9663 |
+
"epoch": 10.77,
|
| 9664 |
+
"eval_loss": 0.23714858293533325,
|
| 9665 |
+
"eval_runtime": 2.009,
|
| 9666 |
+
"eval_samples_per_second": 1143.375,
|
| 9667 |
+
"eval_steps_per_second": 17.92,
|
| 9668 |
+
"step": 483000
|
| 9669 |
+
},
|
| 9670 |
+
{
|
| 9671 |
+
"epoch": 10.78,
|
| 9672 |
+
"learning_rate": 1.0862556142387571e-05,
|
| 9673 |
+
"loss": 0.2539,
|
| 9674 |
+
"step": 483500
|
| 9675 |
+
},
|
| 9676 |
+
{
|
| 9677 |
+
"epoch": 10.8,
|
| 9678 |
+
"learning_rate": 1.081112013034298e-05,
|
| 9679 |
+
"loss": 0.2537,
|
| 9680 |
+
"step": 484000
|
| 9681 |
+
},
|
| 9682 |
+
{
|
| 9683 |
+
"epoch": 10.8,
|
| 9684 |
+
"eval_loss": 0.23877692222595215,
|
| 9685 |
+
"eval_runtime": 1.9856,
|
| 9686 |
+
"eval_samples_per_second": 1156.824,
|
| 9687 |
+
"eval_steps_per_second": 18.13,
|
| 9688 |
+
"step": 484000
|
| 9689 |
+
},
|
| 9690 |
+
{
|
| 9691 |
+
"epoch": 10.81,
|
| 9692 |
+
"learning_rate": 1.0761260944787561e-05,
|
| 9693 |
+
"loss": 0.2551,
|
| 9694 |
+
"step": 484500
|
| 9695 |
+
},
|
| 9696 |
+
{
|
| 9697 |
+
"epoch": 10.82,
|
| 9698 |
+
"learning_rate": 1.0712979130973347e-05,
|
| 9699 |
+
"loss": 0.2542,
|
| 9700 |
+
"step": 485000
|
| 9701 |
+
},
|
| 9702 |
+
{
|
| 9703 |
+
"epoch": 10.82,
|
| 9704 |
+
"eval_loss": 0.23765695095062256,
|
| 9705 |
+
"eval_runtime": 1.9888,
|
| 9706 |
+
"eval_samples_per_second": 1154.965,
|
| 9707 |
+
"eval_steps_per_second": 18.101,
|
| 9708 |
+
"step": 485000
|
| 9709 |
+
},
|
| 9710 |
+
{
|
| 9711 |
+
"epoch": 10.83,
|
| 9712 |
+
"learning_rate": 1.0666275216902535e-05,
|
| 9713 |
+
"loss": 0.2539,
|
| 9714 |
+
"step": 485500
|
| 9715 |
+
},
|
| 9716 |
+
{
|
| 9717 |
+
"epoch": 10.84,
|
| 9718 |
+
"learning_rate": 1.0621149713321656e-05,
|
| 9719 |
+
"loss": 0.2539,
|
| 9720 |
+
"step": 486000
|
| 9721 |
+
},
|
| 9722 |
+
{
|
| 9723 |
+
"epoch": 10.84,
|
| 9724 |
+
"eval_loss": 0.23621481657028198,
|
| 9725 |
+
"eval_runtime": 1.9428,
|
| 9726 |
+
"eval_samples_per_second": 1182.329,
|
| 9727 |
+
"eval_steps_per_second": 18.53,
|
| 9728 |
+
"step": 486000
|
| 9729 |
+
},
|
| 9730 |
+
{
|
| 9731 |
+
"epoch": 10.85,
|
| 9732 |
+
"learning_rate": 1.0577603113715964e-05,
|
| 9733 |
+
"loss": 0.2539,
|
| 9734 |
+
"step": 486500
|
| 9735 |
+
},
|
| 9736 |
+
{
|
| 9737 |
+
"epoch": 10.86,
|
| 9738 |
+
"learning_rate": 1.0535635894304106e-05,
|
| 9739 |
+
"loss": 0.2535,
|
| 9740 |
+
"step": 487000
|
| 9741 |
+
},
|
| 9742 |
+
{
|
| 9743 |
+
"epoch": 10.86,
|
| 9744 |
+
"eval_loss": 0.23733575642108917,
|
| 9745 |
+
"eval_runtime": 1.9603,
|
| 9746 |
+
"eval_samples_per_second": 1171.773,
|
| 9747 |
+
"eval_steps_per_second": 18.365,
|
| 9748 |
+
"step": 487000
|
| 9749 |
+
},
|
| 9750 |
+
{
|
| 9751 |
+
"epoch": 10.87,
|
| 9752 |
+
"learning_rate": 1.0495248514032875e-05,
|
| 9753 |
+
"loss": 0.2539,
|
| 9754 |
+
"step": 487500
|
| 9755 |
+
},
|
| 9756 |
+
{
|
| 9757 |
+
"epoch": 10.89,
|
| 9758 |
+
"learning_rate": 1.045644141457218e-05,
|
| 9759 |
+
"loss": 0.2533,
|
| 9760 |
+
"step": 488000
|
| 9761 |
+
},
|
| 9762 |
+
{
|
| 9763 |
+
"epoch": 10.89,
|
| 9764 |
+
"eval_loss": 0.23612964153289795,
|
| 9765 |
+
"eval_runtime": 1.9923,
|
| 9766 |
+
"eval_samples_per_second": 1152.93,
|
| 9767 |
+
"eval_steps_per_second": 18.069,
|
| 9768 |
+
"step": 488000
|
| 9769 |
+
},
|
| 9770 |
+
{
|
| 9771 |
+
"epoch": 10.9,
|
| 9772 |
+
"learning_rate": 1.0419215020310254e-05,
|
| 9773 |
+
"loss": 0.2534,
|
| 9774 |
+
"step": 488500
|
| 9775 |
+
},
|
| 9776 |
+
{
|
| 9777 |
+
"epoch": 10.91,
|
| 9778 |
+
"learning_rate": 1.0383569738348988e-05,
|
| 9779 |
+
"loss": 0.2533,
|
| 9780 |
+
"step": 489000
|
| 9781 |
+
},
|
| 9782 |
+
{
|
| 9783 |
+
"epoch": 10.91,
|
| 9784 |
+
"eval_loss": 0.2368190884590149,
|
| 9785 |
+
"eval_runtime": 1.9507,
|
| 9786 |
+
"eval_samples_per_second": 1177.524,
|
| 9787 |
+
"eval_steps_per_second": 18.455,
|
| 9788 |
+
"step": 489000
|
| 9789 |
+
},
|
| 9790 |
+
{
|
| 9791 |
+
"epoch": 10.92,
|
| 9792 |
+
"learning_rate": 1.0349505958499436e-05,
|
| 9793 |
+
"loss": 0.2534,
|
| 9794 |
+
"step": 489500
|
| 9795 |
+
},
|
| 9796 |
+
{
|
| 9797 |
+
"epoch": 10.93,
|
| 9798 |
+
"learning_rate": 1.0317024053277693e-05,
|
| 9799 |
+
"loss": 0.2535,
|
| 9800 |
+
"step": 490000
|
| 9801 |
+
},
|
| 9802 |
+
{
|
| 9803 |
+
"epoch": 10.93,
|
| 9804 |
+
"eval_loss": 0.23948417603969574,
|
| 9805 |
+
"eval_runtime": 2.0351,
|
| 9806 |
+
"eval_samples_per_second": 1128.67,
|
| 9807 |
+
"eval_steps_per_second": 17.689,
|
| 9808 |
+
"step": 490000
|
| 9809 |
}
|
| 9810 |
],
|
| 9811 |
"max_steps": 500000,
|
| 9812 |
"num_train_epochs": 12,
|
| 9813 |
+
"total_flos": 1.5654585257336347e+22,
|
| 9814 |
"trial_name": null,
|
| 9815 |
"trial_params": null
|
| 9816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc6bd31ef6b75d2ff57b791613279c5afe6c8244312a64f00fb084519b8aaac6
|
| 3 |
size 102501541
|