Training in progress, step 910000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:833419c3e72d7f9d08d84324dff99c4df5021ccb89fb201b7ceab844fd1fe7de
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:178bde3b89657b90235cfe9d9ea492b4c3952edf2d20ecb0ed27ebbb80775260
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44cbeb264fb2a81130e0bac112a18c08ec1d410c1c39e23c023c635c67938a81
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dbfbe0d14b5c95909c74387495649181894d175bc5bdbfa086f89dc9ba7b8e5
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61414351d7a2b424bf69a03902f1cdb6a014111a0704f27525f615bba9831275
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8736baa79faca75f1de8fbb0415f2f8cedffbb08cddfa2e1e02291f66849ffd
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43bc53bd3230780ced8543e570f3a1576c876dc5bb9da2beb20be127155a27a1
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:425858a9ea3774cea4e80f4044a1828fb43f83f6100a0fd4aa4ea1985d9d8afe
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e99df6b45e45b719db86866dd43b0e8f476f0d4749f66d2f42f7183415384c8
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ffd4b3aa19562ff729663fcb259b8c71f90c0398d12b236c6885ec68255bc91
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa4db73f02c4f7c376b4a473b74af0d764f1f4d822f644308482e4f6b4c6cad2
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10806,11 +10806,131 @@
|
|
| 10806 |
"learning_rate": 1.3792793080955574e-05,
|
| 10807 |
"loss": 0.2852,
|
| 10808 |
"step": 900000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10809 |
}
|
| 10810 |
],
|
| 10811 |
"max_steps": 1000000,
|
| 10812 |
"num_train_epochs": 2,
|
| 10813 |
-
"total_flos": 6.
|
| 10814 |
"trial_name": null,
|
| 10815 |
"trial_params": null
|
| 10816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3587343850894045,
|
| 5 |
+
"global_step": 910000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10806 |
"learning_rate": 1.3792793080955574e-05,
|
| 10807 |
"loss": 0.2852,
|
| 10808 |
"step": 900000
|
| 10809 |
+
},
|
| 10810 |
+
{
|
| 10811 |
+
"epoch": 1.34,
|
| 10812 |
+
"learning_rate": 1.3755301948017599e-05,
|
| 10813 |
+
"loss": 0.2853,
|
| 10814 |
+
"step": 900500
|
| 10815 |
+
},
|
| 10816 |
+
{
|
| 10817 |
+
"epoch": 1.34,
|
| 10818 |
+
"learning_rate": 1.3717991925554562e-05,
|
| 10819 |
+
"loss": 0.285,
|
| 10820 |
+
"step": 901000
|
| 10821 |
+
},
|
| 10822 |
+
{
|
| 10823 |
+
"epoch": 1.34,
|
| 10824 |
+
"learning_rate": 1.368086311557062e-05,
|
| 10825 |
+
"loss": 0.2854,
|
| 10826 |
+
"step": 901500
|
| 10827 |
+
},
|
| 10828 |
+
{
|
| 10829 |
+
"epoch": 1.34,
|
| 10830 |
+
"learning_rate": 1.3643915619574529e-05,
|
| 10831 |
+
"loss": 0.2849,
|
| 10832 |
+
"step": 902000
|
| 10833 |
+
},
|
| 10834 |
+
{
|
| 10835 |
+
"epoch": 1.34,
|
| 10836 |
+
"learning_rate": 1.3607149538579341e-05,
|
| 10837 |
+
"loss": 0.2853,
|
| 10838 |
+
"step": 902500
|
| 10839 |
+
},
|
| 10840 |
+
{
|
| 10841 |
+
"epoch": 1.34,
|
| 10842 |
+
"learning_rate": 1.35705649731021e-05,
|
| 10843 |
+
"loss": 0.2849,
|
| 10844 |
+
"step": 903000
|
| 10845 |
+
},
|
| 10846 |
+
{
|
| 10847 |
+
"epoch": 1.35,
|
| 10848 |
+
"learning_rate": 1.3534162023163642e-05,
|
| 10849 |
+
"loss": 0.2853,
|
| 10850 |
+
"step": 903500
|
| 10851 |
+
},
|
| 10852 |
+
{
|
| 10853 |
+
"epoch": 1.35,
|
| 10854 |
+
"learning_rate": 1.3497940788288195e-05,
|
| 10855 |
+
"loss": 0.2853,
|
| 10856 |
+
"step": 904000
|
| 10857 |
+
},
|
| 10858 |
+
{
|
| 10859 |
+
"epoch": 1.35,
|
| 10860 |
+
"learning_rate": 1.3461901367503262e-05,
|
| 10861 |
+
"loss": 0.2858,
|
| 10862 |
+
"step": 904500
|
| 10863 |
+
},
|
| 10864 |
+
{
|
| 10865 |
+
"epoch": 1.35,
|
| 10866 |
+
"learning_rate": 1.3426043859339253e-05,
|
| 10867 |
+
"loss": 0.2854,
|
| 10868 |
+
"step": 905000
|
| 10869 |
+
},
|
| 10870 |
+
{
|
| 10871 |
+
"epoch": 1.35,
|
| 10872 |
+
"learning_rate": 1.3390368361829197e-05,
|
| 10873 |
+
"loss": 0.2853,
|
| 10874 |
+
"step": 905500
|
| 10875 |
+
},
|
| 10876 |
+
{
|
| 10877 |
+
"epoch": 1.35,
|
| 10878 |
+
"learning_rate": 1.3354874972508582e-05,
|
| 10879 |
+
"loss": 0.2851,
|
| 10880 |
+
"step": 906000
|
| 10881 |
+
},
|
| 10882 |
+
{
|
| 10883 |
+
"epoch": 1.35,
|
| 10884 |
+
"learning_rate": 1.3319563788414934e-05,
|
| 10885 |
+
"loss": 0.2847,
|
| 10886 |
+
"step": 906500
|
| 10887 |
+
},
|
| 10888 |
+
{
|
| 10889 |
+
"epoch": 1.35,
|
| 10890 |
+
"learning_rate": 1.3284434906087695e-05,
|
| 10891 |
+
"loss": 0.2846,
|
| 10892 |
+
"step": 907000
|
| 10893 |
+
},
|
| 10894 |
+
{
|
| 10895 |
+
"epoch": 1.35,
|
| 10896 |
+
"learning_rate": 1.3249488421567911e-05,
|
| 10897 |
+
"loss": 0.2851,
|
| 10898 |
+
"step": 907500
|
| 10899 |
+
},
|
| 10900 |
+
{
|
| 10901 |
+
"epoch": 1.35,
|
| 10902 |
+
"learning_rate": 1.3214724430397915e-05,
|
| 10903 |
+
"loss": 0.285,
|
| 10904 |
+
"step": 908000
|
| 10905 |
+
},
|
| 10906 |
+
{
|
| 10907 |
+
"epoch": 1.36,
|
| 10908 |
+
"learning_rate": 1.3180143027621145e-05,
|
| 10909 |
+
"loss": 0.2852,
|
| 10910 |
+
"step": 908500
|
| 10911 |
+
},
|
| 10912 |
+
{
|
| 10913 |
+
"epoch": 1.36,
|
| 10914 |
+
"learning_rate": 1.314574430778182e-05,
|
| 10915 |
+
"loss": 0.2844,
|
| 10916 |
+
"step": 909000
|
| 10917 |
+
},
|
| 10918 |
+
{
|
| 10919 |
+
"epoch": 1.36,
|
| 10920 |
+
"learning_rate": 1.311152836492473e-05,
|
| 10921 |
+
"loss": 0.2848,
|
| 10922 |
+
"step": 909500
|
| 10923 |
+
},
|
| 10924 |
+
{
|
| 10925 |
+
"epoch": 1.36,
|
| 10926 |
+
"learning_rate": 1.3077495292594966e-05,
|
| 10927 |
+
"loss": 0.285,
|
| 10928 |
+
"step": 910000
|
| 10929 |
}
|
| 10930 |
],
|
| 10931 |
"max_steps": 1000000,
|
| 10932 |
"num_train_epochs": 2,
|
| 10933 |
+
"total_flos": 6.152247724689881e+22,
|
| 10934 |
"trial_name": null,
|
| 10935 |
"trial_params": null
|
| 10936 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:178bde3b89657b90235cfe9d9ea492b4c3952edf2d20ecb0ed27ebbb80775260
|
| 3 |
size 449450757
|