Training in progress, step 990000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb4cd9e789adbd1802119018bcfc4f0b6dba2541ced8918776537c19936d2aa3
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21e636c80ed6aaf4e2b5d21598685c1a08b0a8d8edf7041e56552898357162ca
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2358905887cd0ce80c53b6e8a0174e039c4c5bd62c6c91c86f0312f9b46fcf7
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -19606,11 +19606,211 @@
|
|
| 19606 |
"eval_samples_per_second": 887.915,
|
| 19607 |
"eval_steps_per_second": 13.916,
|
| 19608 |
"step": 980000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19609 |
}
|
| 19610 |
],
|
| 19611 |
"max_steps": 1000000,
|
| 19612 |
"num_train_epochs": 12,
|
| 19613 |
-
"total_flos": 6.
|
| 19614 |
"trial_name": null,
|
| 19615 |
"trial_params": null
|
| 19616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.041343698069438,
|
| 5 |
+
"global_step": 990000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 19606 |
"eval_samples_per_second": 887.915,
|
| 19607 |
"eval_steps_per_second": 13.916,
|
| 19608 |
"step": 980000
|
| 19609 |
+
},
|
| 19610 |
+
{
|
| 19611 |
+
"epoch": 10.94,
|
| 19612 |
+
"learning_rate": 1.0145492062574731e-05,
|
| 19613 |
+
"loss": 0.1798,
|
| 19614 |
+
"step": 980500
|
| 19615 |
+
},
|
| 19616 |
+
{
|
| 19617 |
+
"epoch": 10.94,
|
| 19618 |
+
"learning_rate": 1.0138129010020992e-05,
|
| 19619 |
+
"loss": 0.1797,
|
| 19620 |
+
"step": 981000
|
| 19621 |
+
},
|
| 19622 |
+
{
|
| 19623 |
+
"epoch": 10.94,
|
| 19624 |
+
"eval_loss": 0.17310407757759094,
|
| 19625 |
+
"eval_runtime": 2.575,
|
| 19626 |
+
"eval_samples_per_second": 892.044,
|
| 19627 |
+
"eval_steps_per_second": 13.981,
|
| 19628 |
+
"step": 981000
|
| 19629 |
+
},
|
| 19630 |
+
{
|
| 19631 |
+
"epoch": 10.95,
|
| 19632 |
+
"learning_rate": 1.0130956957154867e-05,
|
| 19633 |
+
"loss": 0.1796,
|
| 19634 |
+
"step": 981500
|
| 19635 |
+
},
|
| 19636 |
+
{
|
| 19637 |
+
"epoch": 10.95,
|
| 19638 |
+
"learning_rate": 1.0123975923584488e-05,
|
| 19639 |
+
"loss": 0.1795,
|
| 19640 |
+
"step": 982000
|
| 19641 |
+
},
|
| 19642 |
+
{
|
| 19643 |
+
"epoch": 10.95,
|
| 19644 |
+
"eval_loss": 0.17133940756320953,
|
| 19645 |
+
"eval_runtime": 2.6289,
|
| 19646 |
+
"eval_samples_per_second": 873.748,
|
| 19647 |
+
"eval_steps_per_second": 13.694,
|
| 19648 |
+
"step": 982000
|
| 19649 |
+
},
|
| 19650 |
+
{
|
| 19651 |
+
"epoch": 10.96,
|
| 19652 |
+
"learning_rate": 1.0117185928395721e-05,
|
| 19653 |
+
"loss": 0.1797,
|
| 19654 |
+
"step": 982500
|
| 19655 |
+
},
|
| 19656 |
+
{
|
| 19657 |
+
"epoch": 10.96,
|
| 19658 |
+
"learning_rate": 1.0110586990152152e-05,
|
| 19659 |
+
"loss": 0.1796,
|
| 19660 |
+
"step": 983000
|
| 19661 |
+
},
|
| 19662 |
+
{
|
| 19663 |
+
"epoch": 10.96,
|
| 19664 |
+
"eval_loss": 0.17200584709644318,
|
| 19665 |
+
"eval_runtime": 2.6007,
|
| 19666 |
+
"eval_samples_per_second": 883.236,
|
| 19667 |
+
"eval_steps_per_second": 13.843,
|
| 19668 |
+
"step": 983000
|
| 19669 |
+
},
|
| 19670 |
+
{
|
| 19671 |
+
"epoch": 10.97,
|
| 19672 |
+
"learning_rate": 1.0104179126895039e-05,
|
| 19673 |
+
"loss": 0.1797,
|
| 19674 |
+
"step": 983500
|
| 19675 |
+
},
|
| 19676 |
+
{
|
| 19677 |
+
"epoch": 10.97,
|
| 19678 |
+
"learning_rate": 1.0097962356143219e-05,
|
| 19679 |
+
"loss": 0.1797,
|
| 19680 |
+
"step": 984000
|
| 19681 |
+
},
|
| 19682 |
+
{
|
| 19683 |
+
"epoch": 10.97,
|
| 19684 |
+
"eval_loss": 0.17025373876094818,
|
| 19685 |
+
"eval_runtime": 2.5803,
|
| 19686 |
+
"eval_samples_per_second": 890.197,
|
| 19687 |
+
"eval_steps_per_second": 13.952,
|
| 19688 |
+
"step": 984000
|
| 19689 |
+
},
|
| 19690 |
+
{
|
| 19691 |
+
"epoch": 10.98,
|
| 19692 |
+
"learning_rate": 1.009193669489312e-05,
|
| 19693 |
+
"loss": 0.1797,
|
| 19694 |
+
"step": 984500
|
| 19695 |
+
},
|
| 19696 |
+
{
|
| 19697 |
+
"epoch": 10.99,
|
| 19698 |
+
"learning_rate": 1.0086102159618668e-05,
|
| 19699 |
+
"loss": 0.1796,
|
| 19700 |
+
"step": 985000
|
| 19701 |
+
},
|
| 19702 |
+
{
|
| 19703 |
+
"epoch": 10.99,
|
| 19704 |
+
"eval_loss": 0.17083962261676788,
|
| 19705 |
+
"eval_runtime": 2.5712,
|
| 19706 |
+
"eval_samples_per_second": 893.348,
|
| 19707 |
+
"eval_steps_per_second": 14.001,
|
| 19708 |
+
"step": 985000
|
| 19709 |
+
},
|
| 19710 |
+
{
|
| 19711 |
+
"epoch": 10.99,
|
| 19712 |
+
"learning_rate": 1.0080458766271252e-05,
|
| 19713 |
+
"loss": 0.1798,
|
| 19714 |
+
"step": 985500
|
| 19715 |
+
},
|
| 19716 |
+
{
|
| 19717 |
+
"epoch": 11.0,
|
| 19718 |
+
"learning_rate": 1.0075006530279694e-05,
|
| 19719 |
+
"loss": 0.1797,
|
| 19720 |
+
"step": 986000
|
| 19721 |
+
},
|
| 19722 |
+
{
|
| 19723 |
+
"epoch": 11.0,
|
| 19724 |
+
"eval_loss": 0.1690717339515686,
|
| 19725 |
+
"eval_runtime": 2.588,
|
| 19726 |
+
"eval_samples_per_second": 887.569,
|
| 19727 |
+
"eval_steps_per_second": 13.911,
|
| 19728 |
+
"step": 986000
|
| 19729 |
+
},
|
| 19730 |
+
{
|
| 19731 |
+
"epoch": 11.0,
|
| 19732 |
+
"learning_rate": 1.0069745466550205e-05,
|
| 19733 |
+
"loss": 0.1794,
|
| 19734 |
+
"step": 986500
|
| 19735 |
+
},
|
| 19736 |
+
{
|
| 19737 |
+
"epoch": 11.01,
|
| 19738 |
+
"learning_rate": 1.0064675589466339e-05,
|
| 19739 |
+
"loss": 0.1796,
|
| 19740 |
+
"step": 987000
|
| 19741 |
+
},
|
| 19742 |
+
{
|
| 19743 |
+
"epoch": 11.01,
|
| 19744 |
+
"eval_loss": 0.16997100412845612,
|
| 19745 |
+
"eval_runtime": 2.5938,
|
| 19746 |
+
"eval_samples_per_second": 885.56,
|
| 19747 |
+
"eval_steps_per_second": 13.879,
|
| 19748 |
+
"step": 987000
|
| 19749 |
+
},
|
| 19750 |
+
{
|
| 19751 |
+
"epoch": 11.01,
|
| 19752 |
+
"learning_rate": 1.005979691288893e-05,
|
| 19753 |
+
"loss": 0.1795,
|
| 19754 |
+
"step": 987500
|
| 19755 |
+
},
|
| 19756 |
+
{
|
| 19757 |
+
"epoch": 11.02,
|
| 19758 |
+
"learning_rate": 1.0055109450156098e-05,
|
| 19759 |
+
"loss": 0.1791,
|
| 19760 |
+
"step": 988000
|
| 19761 |
+
},
|
| 19762 |
+
{
|
| 19763 |
+
"epoch": 11.02,
|
| 19764 |
+
"eval_loss": 0.1697554588317871,
|
| 19765 |
+
"eval_runtime": 2.5898,
|
| 19766 |
+
"eval_samples_per_second": 886.931,
|
| 19767 |
+
"eval_steps_per_second": 13.901,
|
| 19768 |
+
"step": 988000
|
| 19769 |
+
},
|
| 19770 |
+
{
|
| 19771 |
+
"epoch": 11.02,
|
| 19772 |
+
"learning_rate": 1.0050613214083197e-05,
|
| 19773 |
+
"loss": 0.1797,
|
| 19774 |
+
"step": 988500
|
| 19775 |
+
},
|
| 19776 |
+
{
|
| 19777 |
+
"epoch": 11.03,
|
| 19778 |
+
"learning_rate": 1.0046308216962759e-05,
|
| 19779 |
+
"loss": 0.1795,
|
| 19780 |
+
"step": 989000
|
| 19781 |
+
},
|
| 19782 |
+
{
|
| 19783 |
+
"epoch": 11.03,
|
| 19784 |
+
"eval_loss": 0.1698392927646637,
|
| 19785 |
+
"eval_runtime": 2.6586,
|
| 19786 |
+
"eval_samples_per_second": 863.992,
|
| 19787 |
+
"eval_steps_per_second": 13.541,
|
| 19788 |
+
"step": 989000
|
| 19789 |
+
},
|
| 19790 |
+
{
|
| 19791 |
+
"epoch": 11.04,
|
| 19792 |
+
"learning_rate": 1.0042194470564472e-05,
|
| 19793 |
+
"loss": 0.1796,
|
| 19794 |
+
"step": 989500
|
| 19795 |
+
},
|
| 19796 |
+
{
|
| 19797 |
+
"epoch": 11.04,
|
| 19798 |
+
"learning_rate": 1.0038271986135177e-05,
|
| 19799 |
+
"loss": 0.1799,
|
| 19800 |
+
"step": 990000
|
| 19801 |
+
},
|
| 19802 |
+
{
|
| 19803 |
+
"epoch": 11.04,
|
| 19804 |
+
"eval_loss": 0.16946464776992798,
|
| 19805 |
+
"eval_runtime": 2.6152,
|
| 19806 |
+
"eval_samples_per_second": 878.327,
|
| 19807 |
+
"eval_steps_per_second": 13.766,
|
| 19808 |
+
"step": 990000
|
| 19809 |
}
|
| 19810 |
],
|
| 19811 |
"max_steps": 1000000,
|
| 19812 |
"num_train_epochs": 12,
|
| 19813 |
+
"total_flos": 6.9398656010816955e+22,
|
| 19814 |
"trial_name": null,
|
| 19815 |
"trial_params": null
|
| 19816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21e636c80ed6aaf4e2b5d21598685c1a08b0a8d8edf7041e56552898357162ca
|
| 3 |
size 449471589
|