Training in progress, step 990000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0c84155e298a5a6b7ee070a8179e83bcd232eb2e5869a2fab62635fe434a15b
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6119a41cc84dfc82b1ad8e160d2de4f6723c4c70df2567e7b05d5135b2ad413c
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d92331d191f9b54e8682c7179f1d46d8e4298cbee5bf860f80c01313d919cbf
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b3f3f7f2089f141dbb6bfd17083ac633b398668fbdd29b4239ed78cd16828ef
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a1e11f0bc52abbbd19f8d4cb2af2cb9140b19cf7c5678a2b90ba0e9593a2025
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bffc19433b361587fa636d9fe7f366ad621d886a3e88e7b929e925c972115064
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:baf40a9092f2a61c65fa8d4b01ad107ecd14742f4e2c480971b7e313c2ab669b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35d8ccac7b1580c03e5bd6e78ceed5d6b1e846215ceb31ec6a8e03829af4c92e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a564015df3012aed298103a98abad0123fee1b00b18b24191670937f37a6281
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3839c7b56c36b9afc190821fb76a047c055639f3ce0f32d8cdce397cba6fa5f
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8600853d84ba2918d905e70ba99b5bea80c1839bef1da439315875bb3ad462d
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11766,11 +11766,131 @@
|
|
| 11766 |
"learning_rate": 1.0153046094685783e-05,
|
| 11767 |
"loss": 0.2831,
|
| 11768 |
"step": 980000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11769 |
}
|
| 11770 |
],
|
| 11771 |
"max_steps": 1000000,
|
| 11772 |
"num_train_epochs": 2,
|
| 11773 |
-
"total_flos": 6.
|
| 11774 |
"trial_name": null,
|
| 11775 |
"trial_params": null
|
| 11776 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.5181718895735843,
|
| 5 |
+
"global_step": 990000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11766 |
"learning_rate": 1.0153046094685783e-05,
|
| 11767 |
"loss": 0.2831,
|
| 11768 |
"step": 980000
|
| 11769 |
+
},
|
| 11770 |
+
{
|
| 11771 |
+
"epoch": 1.5,
|
| 11772 |
+
"learning_rate": 1.0145492062574731e-05,
|
| 11773 |
+
"loss": 0.2826,
|
| 11774 |
+
"step": 980500
|
| 11775 |
+
},
|
| 11776 |
+
{
|
| 11777 |
+
"epoch": 1.5,
|
| 11778 |
+
"learning_rate": 1.0138129010020992e-05,
|
| 11779 |
+
"loss": 0.2831,
|
| 11780 |
+
"step": 981000
|
| 11781 |
+
},
|
| 11782 |
+
{
|
| 11783 |
+
"epoch": 1.5,
|
| 11784 |
+
"learning_rate": 1.0130956957154867e-05,
|
| 11785 |
+
"loss": 0.2831,
|
| 11786 |
+
"step": 981500
|
| 11787 |
+
},
|
| 11788 |
+
{
|
| 11789 |
+
"epoch": 1.5,
|
| 11790 |
+
"learning_rate": 1.0123975923584488e-05,
|
| 11791 |
+
"loss": 0.2824,
|
| 11792 |
+
"step": 982000
|
| 11793 |
+
},
|
| 11794 |
+
{
|
| 11795 |
+
"epoch": 1.5,
|
| 11796 |
+
"learning_rate": 1.0117185928395721e-05,
|
| 11797 |
+
"loss": 0.2828,
|
| 11798 |
+
"step": 982500
|
| 11799 |
+
},
|
| 11800 |
+
{
|
| 11801 |
+
"epoch": 1.5,
|
| 11802 |
+
"learning_rate": 1.0110586990152152e-05,
|
| 11803 |
+
"loss": 0.2831,
|
| 11804 |
+
"step": 983000
|
| 11805 |
+
},
|
| 11806 |
+
{
|
| 11807 |
+
"epoch": 1.51,
|
| 11808 |
+
"learning_rate": 1.0104179126895039e-05,
|
| 11809 |
+
"loss": 0.2829,
|
| 11810 |
+
"step": 983500
|
| 11811 |
+
},
|
| 11812 |
+
{
|
| 11813 |
+
"epoch": 1.51,
|
| 11814 |
+
"learning_rate": 1.0097962356143219e-05,
|
| 11815 |
+
"loss": 0.2831,
|
| 11816 |
+
"step": 984000
|
| 11817 |
+
},
|
| 11818 |
+
{
|
| 11819 |
+
"epoch": 1.51,
|
| 11820 |
+
"learning_rate": 1.009193669489312e-05,
|
| 11821 |
+
"loss": 0.2834,
|
| 11822 |
+
"step": 984500
|
| 11823 |
+
},
|
| 11824 |
+
{
|
| 11825 |
+
"epoch": 1.51,
|
| 11826 |
+
"learning_rate": 1.0086102159618668e-05,
|
| 11827 |
+
"loss": 0.2834,
|
| 11828 |
+
"step": 985000
|
| 11829 |
+
},
|
| 11830 |
+
{
|
| 11831 |
+
"epoch": 1.51,
|
| 11832 |
+
"learning_rate": 1.0080458766271252e-05,
|
| 11833 |
+
"loss": 0.2832,
|
| 11834 |
+
"step": 985500
|
| 11835 |
+
},
|
| 11836 |
+
{
|
| 11837 |
+
"epoch": 1.51,
|
| 11838 |
+
"learning_rate": 1.0075006530279694e-05,
|
| 11839 |
+
"loss": 0.2833,
|
| 11840 |
+
"step": 986000
|
| 11841 |
+
},
|
| 11842 |
+
{
|
| 11843 |
+
"epoch": 1.51,
|
| 11844 |
+
"learning_rate": 1.0069745466550205e-05,
|
| 11845 |
+
"loss": 0.2827,
|
| 11846 |
+
"step": 986500
|
| 11847 |
+
},
|
| 11848 |
+
{
|
| 11849 |
+
"epoch": 1.51,
|
| 11850 |
+
"learning_rate": 1.0064675589466339e-05,
|
| 11851 |
+
"loss": 0.2821,
|
| 11852 |
+
"step": 987000
|
| 11853 |
+
},
|
| 11854 |
+
{
|
| 11855 |
+
"epoch": 1.51,
|
| 11856 |
+
"learning_rate": 1.005979691288893e-05,
|
| 11857 |
+
"loss": 0.2834,
|
| 11858 |
+
"step": 987500
|
| 11859 |
+
},
|
| 11860 |
+
{
|
| 11861 |
+
"epoch": 1.51,
|
| 11862 |
+
"learning_rate": 1.0055109450156098e-05,
|
| 11863 |
+
"loss": 0.2831,
|
| 11864 |
+
"step": 988000
|
| 11865 |
+
},
|
| 11866 |
+
{
|
| 11867 |
+
"epoch": 1.52,
|
| 11868 |
+
"learning_rate": 1.0050613214083197e-05,
|
| 11869 |
+
"loss": 0.2822,
|
| 11870 |
+
"step": 988500
|
| 11871 |
+
},
|
| 11872 |
+
{
|
| 11873 |
+
"epoch": 1.52,
|
| 11874 |
+
"learning_rate": 1.0046308216962759e-05,
|
| 11875 |
+
"loss": 0.2826,
|
| 11876 |
+
"step": 989000
|
| 11877 |
+
},
|
| 11878 |
+
{
|
| 11879 |
+
"epoch": 1.52,
|
| 11880 |
+
"learning_rate": 1.0042194470564472e-05,
|
| 11881 |
+
"loss": 0.2824,
|
| 11882 |
+
"step": 989500
|
| 11883 |
+
},
|
| 11884 |
+
{
|
| 11885 |
+
"epoch": 1.52,
|
| 11886 |
+
"learning_rate": 1.0038271986135177e-05,
|
| 11887 |
+
"loss": 0.2827,
|
| 11888 |
+
"step": 990000
|
| 11889 |
}
|
| 11890 |
],
|
| 11891 |
"max_steps": 1000000,
|
| 11892 |
"num_train_epochs": 2,
|
| 11893 |
+
"total_flos": 6.693099686599179e+22,
|
| 11894 |
"trial_name": null,
|
| 11895 |
"trial_params": null
|
| 11896 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6119a41cc84dfc82b1ad8e160d2de4f6723c4c70df2567e7b05d5135b2ad413c
|
| 3 |
size 449450757
|