Training in progress, step 45000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +103 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893438545
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:974a1e790d3ca33c068dbaca3cc4297bfcbca347437b8b9cb62025728f09e96e
|
| 3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5a9d277649a19c26574f85a21d72156907a1fa49083c31e96d8eb40d2455fc4
|
| 3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15523
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe764e4b14b1a41f602255c5ad661b82e79519ef62fb0a01c7236478ec943d57
|
| 3 |
size 15523
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3c1e2756d4a4685b9df2f62aad4f5988b6c0b032f0fc7cb98d4e77d5c23a8e8
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:584cc9786ad375313cf5cd3cfaf9ab2fa5956cf7f817f1132bf8fc5dbd46f871
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 3.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -806,11 +806,111 @@
|
|
| 806 |
"eval_samples_per_second": 32.806,
|
| 807 |
"eval_steps_per_second": 1.05,
|
| 808 |
"step": 40000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
}
|
| 810 |
],
|
| 811 |
"max_steps": 1000000,
|
| 812 |
"num_train_epochs": 86,
|
| 813 |
-
"total_flos":
|
| 814 |
"trial_name": null,
|
| 815 |
"trial_params": null
|
| 816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.8278325961211297,
|
| 5 |
+
"global_step": 45000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 806 |
"eval_samples_per_second": 32.806,
|
| 807 |
"eval_steps_per_second": 1.05,
|
| 808 |
"step": 40000
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 3.45,
|
| 812 |
+
"learning_rate": 9.999999999999999e-06,
|
| 813 |
+
"loss": 0.4167,
|
| 814 |
+
"step": 40500
|
| 815 |
+
},
|
| 816 |
+
{
|
| 817 |
+
"epoch": 3.49,
|
| 818 |
+
"learning_rate": 9.999999999999999e-06,
|
| 819 |
+
"loss": 0.417,
|
| 820 |
+
"step": 41000
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"epoch": 3.49,
|
| 824 |
+
"eval_loss": 0.38663551211357117,
|
| 825 |
+
"eval_runtime": 24.5074,
|
| 826 |
+
"eval_samples_per_second": 20.402,
|
| 827 |
+
"eval_steps_per_second": 0.653,
|
| 828 |
+
"step": 41000
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"epoch": 3.53,
|
| 832 |
+
"learning_rate": 9.999999999999999e-06,
|
| 833 |
+
"loss": 0.4168,
|
| 834 |
+
"step": 41500
|
| 835 |
+
},
|
| 836 |
+
{
|
| 837 |
+
"epoch": 3.57,
|
| 838 |
+
"learning_rate": 9.999999999999999e-06,
|
| 839 |
+
"loss": 0.4159,
|
| 840 |
+
"step": 42000
|
| 841 |
+
},
|
| 842 |
+
{
|
| 843 |
+
"epoch": 3.57,
|
| 844 |
+
"eval_loss": 0.38440173864364624,
|
| 845 |
+
"eval_runtime": 30.9795,
|
| 846 |
+
"eval_samples_per_second": 16.14,
|
| 847 |
+
"eval_steps_per_second": 0.516,
|
| 848 |
+
"step": 42000
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"epoch": 3.62,
|
| 852 |
+
"learning_rate": 9.999999999999999e-06,
|
| 853 |
+
"loss": 0.4151,
|
| 854 |
+
"step": 42500
|
| 855 |
+
},
|
| 856 |
+
{
|
| 857 |
+
"epoch": 3.66,
|
| 858 |
+
"learning_rate": 9.999999999999999e-06,
|
| 859 |
+
"loss": 0.4155,
|
| 860 |
+
"step": 43000
|
| 861 |
+
},
|
| 862 |
+
{
|
| 863 |
+
"epoch": 3.66,
|
| 864 |
+
"eval_loss": 0.3864738941192627,
|
| 865 |
+
"eval_runtime": 24.9969,
|
| 866 |
+
"eval_samples_per_second": 20.002,
|
| 867 |
+
"eval_steps_per_second": 0.64,
|
| 868 |
+
"step": 43000
|
| 869 |
+
},
|
| 870 |
+
{
|
| 871 |
+
"epoch": 3.7,
|
| 872 |
+
"learning_rate": 9.999999999999999e-06,
|
| 873 |
+
"loss": 0.4157,
|
| 874 |
+
"step": 43500
|
| 875 |
+
},
|
| 876 |
+
{
|
| 877 |
+
"epoch": 3.74,
|
| 878 |
+
"learning_rate": 9.999999999999999e-06,
|
| 879 |
+
"loss": 0.4158,
|
| 880 |
+
"step": 44000
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"epoch": 3.74,
|
| 884 |
+
"eval_loss": 0.3862515091896057,
|
| 885 |
+
"eval_runtime": 28.5688,
|
| 886 |
+
"eval_samples_per_second": 17.502,
|
| 887 |
+
"eval_steps_per_second": 0.56,
|
| 888 |
+
"step": 44000
|
| 889 |
+
},
|
| 890 |
+
{
|
| 891 |
+
"epoch": 3.79,
|
| 892 |
+
"learning_rate": 9.999999999999999e-06,
|
| 893 |
+
"loss": 0.4147,
|
| 894 |
+
"step": 44500
|
| 895 |
+
},
|
| 896 |
+
{
|
| 897 |
+
"epoch": 3.83,
|
| 898 |
+
"learning_rate": 9.999999999999999e-06,
|
| 899 |
+
"loss": 0.4134,
|
| 900 |
+
"step": 45000
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"epoch": 3.83,
|
| 904 |
+
"eval_loss": 0.38480713963508606,
|
| 905 |
+
"eval_runtime": 27.3513,
|
| 906 |
+
"eval_samples_per_second": 18.281,
|
| 907 |
+
"eval_steps_per_second": 0.585,
|
| 908 |
+
"step": 45000
|
| 909 |
}
|
| 910 |
],
|
| 911 |
"max_steps": 1000000,
|
| 912 |
"num_train_epochs": 86,
|
| 913 |
+
"total_flos": 2.0700998918659003e+21,
|
| 914 |
"trial_name": null,
|
| 915 |
"trial_params": null
|
| 916 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5a9d277649a19c26574f85a21d72156907a1fa49083c31e96d8eb40d2455fc4
|
| 3 |
size 449471589
|