Training in progress, step 930000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91affa27b1d7efbf8500bedc38da98f478db48755c9fd801061dc5b4d75ce322
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87aca36ccecce90e6bf8bad654e975fd48ce737fd544cee69dc518d7e58bab00
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3d510ecd6732523942dd3424a57ac538e5be0c4138342f6ae56c3919c1d5b02
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:574b715a429374590efb47af45146a59e5fe229195a8d8f19378988db5b1824a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd2a278c4b4e6f37059e27f47d47f9a954e78837662918db9679befac603ce60
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bdc5281c9ba61c674d20be79979807c56cc05caf6ef31ad1992614f01419db4
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b0e1b3397ffbcdba72f77e0e72529212805b3efe290aff36c3ecd969d87bca4
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 14.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6814,11 +6814,85 @@
|
|
| 6814 |
"eval_samples_per_second": 1300.053,
|
| 6815 |
"eval_steps_per_second": 20.801,
|
| 6816 |
"step": 920000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6817 |
}
|
| 6818 |
],
|
| 6819 |
"max_steps": 1000000,
|
| 6820 |
"num_train_epochs": 16,
|
| 6821 |
-
"total_flos": 6.
|
| 6822 |
"trial_name": null,
|
| 6823 |
"trial_params": null
|
| 6824 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.201291859453022,
|
| 5 |
+
"global_step": 930000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6814 |
"eval_samples_per_second": 1300.053,
|
| 6815 |
"eval_steps_per_second": 20.801,
|
| 6816 |
"step": 920000
|
| 6817 |
+
},
|
| 6818 |
+
{
|
| 6819 |
+
"epoch": 14.06,
|
| 6820 |
+
"learning_rate": 1.2375216942216713e-05,
|
| 6821 |
+
"loss": 0.2306,
|
| 6822 |
+
"step": 921000
|
| 6823 |
+
},
|
| 6824 |
+
{
|
| 6825 |
+
"epoch": 14.08,
|
| 6826 |
+
"learning_rate": 1.2315797137640906e-05,
|
| 6827 |
+
"loss": 0.2298,
|
| 6828 |
+
"step": 922000
|
| 6829 |
+
},
|
| 6830 |
+
{
|
| 6831 |
+
"epoch": 14.09,
|
| 6832 |
+
"learning_rate": 1.225711751666363e-05,
|
| 6833 |
+
"loss": 0.2295,
|
| 6834 |
+
"step": 923000
|
| 6835 |
+
},
|
| 6836 |
+
{
|
| 6837 |
+
"epoch": 14.11,
|
| 6838 |
+
"learning_rate": 1.2199178720995825e-05,
|
| 6839 |
+
"loss": 0.2299,
|
| 6840 |
+
"step": 924000
|
| 6841 |
+
},
|
| 6842 |
+
{
|
| 6843 |
+
"epoch": 14.12,
|
| 6844 |
+
"learning_rate": 1.2141981384246874e-05,
|
| 6845 |
+
"loss": 0.23,
|
| 6846 |
+
"step": 925000
|
| 6847 |
+
},
|
| 6848 |
+
{
|
| 6849 |
+
"epoch": 14.12,
|
| 6850 |
+
"eval_runtime": 0.827,
|
| 6851 |
+
"eval_samples_per_second": 1209.23,
|
| 6852 |
+
"eval_steps_per_second": 19.348,
|
| 6853 |
+
"step": 925000
|
| 6854 |
+
},
|
| 6855 |
+
{
|
| 6856 |
+
"epoch": 14.14,
|
| 6857 |
+
"learning_rate": 1.2085526131917685e-05,
|
| 6858 |
+
"loss": 0.2294,
|
| 6859 |
+
"step": 926000
|
| 6860 |
+
},
|
| 6861 |
+
{
|
| 6862 |
+
"epoch": 14.16,
|
| 6863 |
+
"learning_rate": 1.2029813581393866e-05,
|
| 6864 |
+
"loss": 0.2289,
|
| 6865 |
+
"step": 927000
|
| 6866 |
+
},
|
| 6867 |
+
{
|
| 6868 |
+
"epoch": 14.17,
|
| 6869 |
+
"learning_rate": 1.197484434193893e-05,
|
| 6870 |
+
"loss": 0.2295,
|
| 6871 |
+
"step": 928000
|
| 6872 |
+
},
|
| 6873 |
+
{
|
| 6874 |
+
"epoch": 14.19,
|
| 6875 |
+
"learning_rate": 1.192061901468768e-05,
|
| 6876 |
+
"loss": 0.2293,
|
| 6877 |
+
"step": 929000
|
| 6878 |
+
},
|
| 6879 |
+
{
|
| 6880 |
+
"epoch": 14.2,
|
| 6881 |
+
"learning_rate": 1.1867138192639601e-05,
|
| 6882 |
+
"loss": 0.2293,
|
| 6883 |
+
"step": 930000
|
| 6884 |
+
},
|
| 6885 |
+
{
|
| 6886 |
+
"epoch": 14.2,
|
| 6887 |
+
"eval_runtime": 0.9644,
|
| 6888 |
+
"eval_samples_per_second": 1036.936,
|
| 6889 |
+
"eval_steps_per_second": 16.591,
|
| 6890 |
+
"step": 930000
|
| 6891 |
}
|
| 6892 |
],
|
| 6893 |
"max_steps": 1000000,
|
| 6894 |
"num_train_epochs": 16,
|
| 6895 |
+
"total_flos": 6.519315076868202e+22,
|
| 6896 |
"trial_name": null,
|
| 6897 |
"trial_params": null
|
| 6898 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87aca36ccecce90e6bf8bad654e975fd48ce737fd544cee69dc518d7e58bab00
|
| 3 |
size 449471589
|