Training in progress, step 69000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e12ef6a026db88916d2bccaa887c346b617f8bf524f61c49a560d4c1854fb6f1
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:287d41aa21a4eb242c8834811d8cbeecb6b0fd5e8162f8a93804fb2ec7aa6398
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9b12b8c5c5c31953b64891eb7a5a87fe3243666cbd4801ead4f6238d85d2c9c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17944b85b9d02378f311e5505f3d2beb901e13fa7a7306f1d0d6ef90c3394bf6
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c35b076a46134f931f65ad614d3a133b44af15affbef2c6984eacb0867534788
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00652d787b0217457a14651c8e87f0d8ab4c5f0af3727292f0c3f9d4e718cb0a
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc0d2189df36213ee36dacfa0f47fda988de8257ffa315d320b6c0176d420bf1
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -23808,6 +23808,356 @@
|
|
| 23808 |
"learning_rate": 0.00048333134129621366,
|
| 23809 |
"loss": 16.5557,
|
| 23810 |
"step": 68000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23811 |
}
|
| 23812 |
],
|
| 23813 |
"logging_steps": 20,
|
|
@@ -23827,7 +24177,7 @@
|
|
| 23827 |
"attributes": {}
|
| 23828 |
}
|
| 23829 |
},
|
| 23830 |
-
"total_flos":
|
| 23831 |
"train_batch_size": 48,
|
| 23832 |
"trial_name": null,
|
| 23833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.10221071405293626,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 69000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 23808 |
"learning_rate": 0.00048333134129621366,
|
| 23809 |
"loss": 16.5557,
|
| 23810 |
"step": 68000
|
| 23811 |
+
},
|
| 23812 |
+
{
|
| 23813 |
+
"epoch": 0.10075902565044528,
|
| 23814 |
+
"grad_norm": 7.46875,
|
| 23815 |
+
"learning_rate": 0.00048332640236120605,
|
| 23816 |
+
"loss": 16.6099,
|
| 23817 |
+
"step": 68020
|
| 23818 |
+
},
|
| 23819 |
+
{
|
| 23820 |
+
"epoch": 0.10078865194437367,
|
| 23821 |
+
"grad_norm": 6.84375,
|
| 23822 |
+
"learning_rate": 0.0004833214634261985,
|
| 23823 |
+
"loss": 16.6517,
|
| 23824 |
+
"step": 68040
|
| 23825 |
+
},
|
| 23826 |
+
{
|
| 23827 |
+
"epoch": 0.10081827823830206,
|
| 23828 |
+
"grad_norm": 7.4375,
|
| 23829 |
+
"learning_rate": 0.0004833165244911909,
|
| 23830 |
+
"loss": 16.5766,
|
| 23831 |
+
"step": 68060
|
| 23832 |
+
},
|
| 23833 |
+
{
|
| 23834 |
+
"epoch": 0.10084790453223044,
|
| 23835 |
+
"grad_norm": 6.71875,
|
| 23836 |
+
"learning_rate": 0.0004833115855561834,
|
| 23837 |
+
"loss": 16.606,
|
| 23838 |
+
"step": 68080
|
| 23839 |
+
},
|
| 23840 |
+
{
|
| 23841 |
+
"epoch": 0.10087753082615883,
|
| 23842 |
+
"grad_norm": 6.59375,
|
| 23843 |
+
"learning_rate": 0.0004833066466211758,
|
| 23844 |
+
"loss": 16.5779,
|
| 23845 |
+
"step": 68100
|
| 23846 |
+
},
|
| 23847 |
+
{
|
| 23848 |
+
"epoch": 0.10090715712008722,
|
| 23849 |
+
"grad_norm": 6.8125,
|
| 23850 |
+
"learning_rate": 0.00048330170768616824,
|
| 23851 |
+
"loss": 16.6219,
|
| 23852 |
+
"step": 68120
|
| 23853 |
+
},
|
| 23854 |
+
{
|
| 23855 |
+
"epoch": 0.1009367834140156,
|
| 23856 |
+
"grad_norm": 7.15625,
|
| 23857 |
+
"learning_rate": 0.0004832967687511607,
|
| 23858 |
+
"loss": 16.5954,
|
| 23859 |
+
"step": 68140
|
| 23860 |
+
},
|
| 23861 |
+
{
|
| 23862 |
+
"epoch": 0.10096640970794399,
|
| 23863 |
+
"grad_norm": 7.0,
|
| 23864 |
+
"learning_rate": 0.0004832918298161531,
|
| 23865 |
+
"loss": 16.5884,
|
| 23866 |
+
"step": 68160
|
| 23867 |
+
},
|
| 23868 |
+
{
|
| 23869 |
+
"epoch": 0.10099603600187239,
|
| 23870 |
+
"grad_norm": 6.78125,
|
| 23871 |
+
"learning_rate": 0.00048328689088114553,
|
| 23872 |
+
"loss": 16.5813,
|
| 23873 |
+
"step": 68180
|
| 23874 |
+
},
|
| 23875 |
+
{
|
| 23876 |
+
"epoch": 0.10102566229580077,
|
| 23877 |
+
"grad_norm": 6.46875,
|
| 23878 |
+
"learning_rate": 0.0004832819519461379,
|
| 23879 |
+
"loss": 16.6294,
|
| 23880 |
+
"step": 68200
|
| 23881 |
+
},
|
| 23882 |
+
{
|
| 23883 |
+
"epoch": 0.10105528858972916,
|
| 23884 |
+
"grad_norm": 7.34375,
|
| 23885 |
+
"learning_rate": 0.0004832770130111304,
|
| 23886 |
+
"loss": 16.627,
|
| 23887 |
+
"step": 68220
|
| 23888 |
+
},
|
| 23889 |
+
{
|
| 23890 |
+
"epoch": 0.10108491488365755,
|
| 23891 |
+
"grad_norm": 7.0,
|
| 23892 |
+
"learning_rate": 0.0004832720740761228,
|
| 23893 |
+
"loss": 16.5516,
|
| 23894 |
+
"step": 68240
|
| 23895 |
+
},
|
| 23896 |
+
{
|
| 23897 |
+
"epoch": 0.10111454117758593,
|
| 23898 |
+
"grad_norm": 7.40625,
|
| 23899 |
+
"learning_rate": 0.00048326713514111526,
|
| 23900 |
+
"loss": 16.6234,
|
| 23901 |
+
"step": 68260
|
| 23902 |
+
},
|
| 23903 |
+
{
|
| 23904 |
+
"epoch": 0.10114416747151432,
|
| 23905 |
+
"grad_norm": 7.1875,
|
| 23906 |
+
"learning_rate": 0.00048326219620610766,
|
| 23907 |
+
"loss": 16.5566,
|
| 23908 |
+
"step": 68280
|
| 23909 |
+
},
|
| 23910 |
+
{
|
| 23911 |
+
"epoch": 0.10117379376544271,
|
| 23912 |
+
"grad_norm": 7.1875,
|
| 23913 |
+
"learning_rate": 0.00048325725727110016,
|
| 23914 |
+
"loss": 16.6184,
|
| 23915 |
+
"step": 68300
|
| 23916 |
+
},
|
| 23917 |
+
{
|
| 23918 |
+
"epoch": 0.1012034200593711,
|
| 23919 |
+
"grad_norm": 7.46875,
|
| 23920 |
+
"learning_rate": 0.00048325231833609255,
|
| 23921 |
+
"loss": 16.6634,
|
| 23922 |
+
"step": 68320
|
| 23923 |
+
},
|
| 23924 |
+
{
|
| 23925 |
+
"epoch": 0.10123304635329948,
|
| 23926 |
+
"grad_norm": 7.75,
|
| 23927 |
+
"learning_rate": 0.000483247379401085,
|
| 23928 |
+
"loss": 16.5374,
|
| 23929 |
+
"step": 68340
|
| 23930 |
+
},
|
| 23931 |
+
{
|
| 23932 |
+
"epoch": 0.10126267264722787,
|
| 23933 |
+
"grad_norm": 7.40625,
|
| 23934 |
+
"learning_rate": 0.0004832424404660774,
|
| 23935 |
+
"loss": 16.6118,
|
| 23936 |
+
"step": 68360
|
| 23937 |
+
},
|
| 23938 |
+
{
|
| 23939 |
+
"epoch": 0.10129229894115625,
|
| 23940 |
+
"grad_norm": 7.03125,
|
| 23941 |
+
"learning_rate": 0.0004832375015310699,
|
| 23942 |
+
"loss": 16.5952,
|
| 23943 |
+
"step": 68380
|
| 23944 |
+
},
|
| 23945 |
+
{
|
| 23946 |
+
"epoch": 0.10132192523508464,
|
| 23947 |
+
"grad_norm": 7.0,
|
| 23948 |
+
"learning_rate": 0.0004832325625960623,
|
| 23949 |
+
"loss": 16.5866,
|
| 23950 |
+
"step": 68400
|
| 23951 |
+
},
|
| 23952 |
+
{
|
| 23953 |
+
"epoch": 0.10135155152901303,
|
| 23954 |
+
"grad_norm": 7.09375,
|
| 23955 |
+
"learning_rate": 0.00048322762366105474,
|
| 23956 |
+
"loss": 16.5999,
|
| 23957 |
+
"step": 68420
|
| 23958 |
+
},
|
| 23959 |
+
{
|
| 23960 |
+
"epoch": 0.10138117782294141,
|
| 23961 |
+
"grad_norm": 6.9375,
|
| 23962 |
+
"learning_rate": 0.0004832226847260472,
|
| 23963 |
+
"loss": 16.553,
|
| 23964 |
+
"step": 68440
|
| 23965 |
+
},
|
| 23966 |
+
{
|
| 23967 |
+
"epoch": 0.1014108041168698,
|
| 23968 |
+
"grad_norm": 6.4375,
|
| 23969 |
+
"learning_rate": 0.00048321774579103963,
|
| 23970 |
+
"loss": 16.6224,
|
| 23971 |
+
"step": 68460
|
| 23972 |
+
},
|
| 23973 |
+
{
|
| 23974 |
+
"epoch": 0.10144043041079819,
|
| 23975 |
+
"grad_norm": 7.09375,
|
| 23976 |
+
"learning_rate": 0.00048321280685603203,
|
| 23977 |
+
"loss": 16.5789,
|
| 23978 |
+
"step": 68480
|
| 23979 |
+
},
|
| 23980 |
+
{
|
| 23981 |
+
"epoch": 0.10147005670472659,
|
| 23982 |
+
"grad_norm": 6.96875,
|
| 23983 |
+
"learning_rate": 0.0004832078679210244,
|
| 23984 |
+
"loss": 16.6379,
|
| 23985 |
+
"step": 68500
|
| 23986 |
+
},
|
| 23987 |
+
{
|
| 23988 |
+
"epoch": 0.10149968299865497,
|
| 23989 |
+
"grad_norm": 6.0625,
|
| 23990 |
+
"learning_rate": 0.0004832029289860169,
|
| 23991 |
+
"loss": 16.5628,
|
| 23992 |
+
"step": 68520
|
| 23993 |
+
},
|
| 23994 |
+
{
|
| 23995 |
+
"epoch": 0.10152930929258336,
|
| 23996 |
+
"grad_norm": 6.875,
|
| 23997 |
+
"learning_rate": 0.0004831979900510093,
|
| 23998 |
+
"loss": 16.5675,
|
| 23999 |
+
"step": 68540
|
| 24000 |
+
},
|
| 24001 |
+
{
|
| 24002 |
+
"epoch": 0.10155893558651174,
|
| 24003 |
+
"grad_norm": 6.96875,
|
| 24004 |
+
"learning_rate": 0.00048319305111600176,
|
| 24005 |
+
"loss": 16.591,
|
| 24006 |
+
"step": 68560
|
| 24007 |
+
},
|
| 24008 |
+
{
|
| 24009 |
+
"epoch": 0.10158856188044013,
|
| 24010 |
+
"grad_norm": 6.8125,
|
| 24011 |
+
"learning_rate": 0.00048318811218099416,
|
| 24012 |
+
"loss": 16.5641,
|
| 24013 |
+
"step": 68580
|
| 24014 |
+
},
|
| 24015 |
+
{
|
| 24016 |
+
"epoch": 0.10161818817436852,
|
| 24017 |
+
"grad_norm": 6.5625,
|
| 24018 |
+
"learning_rate": 0.00048318317324598666,
|
| 24019 |
+
"loss": 16.5867,
|
| 24020 |
+
"step": 68600
|
| 24021 |
+
},
|
| 24022 |
+
{
|
| 24023 |
+
"epoch": 0.1016478144682969,
|
| 24024 |
+
"grad_norm": 6.4375,
|
| 24025 |
+
"learning_rate": 0.00048317823431097905,
|
| 24026 |
+
"loss": 16.5886,
|
| 24027 |
+
"step": 68620
|
| 24028 |
+
},
|
| 24029 |
+
{
|
| 24030 |
+
"epoch": 0.10167744076222529,
|
| 24031 |
+
"grad_norm": 7.625,
|
| 24032 |
+
"learning_rate": 0.0004831732953759715,
|
| 24033 |
+
"loss": 16.5648,
|
| 24034 |
+
"step": 68640
|
| 24035 |
+
},
|
| 24036 |
+
{
|
| 24037 |
+
"epoch": 0.10170706705615368,
|
| 24038 |
+
"grad_norm": 6.46875,
|
| 24039 |
+
"learning_rate": 0.0004831683564409639,
|
| 24040 |
+
"loss": 16.5727,
|
| 24041 |
+
"step": 68660
|
| 24042 |
+
},
|
| 24043 |
+
{
|
| 24044 |
+
"epoch": 0.10173669335008206,
|
| 24045 |
+
"grad_norm": 6.75,
|
| 24046 |
+
"learning_rate": 0.0004831634175059564,
|
| 24047 |
+
"loss": 16.5931,
|
| 24048 |
+
"step": 68680
|
| 24049 |
+
},
|
| 24050 |
+
{
|
| 24051 |
+
"epoch": 0.10176631964401045,
|
| 24052 |
+
"grad_norm": 6.6875,
|
| 24053 |
+
"learning_rate": 0.0004831584785709488,
|
| 24054 |
+
"loss": 16.6137,
|
| 24055 |
+
"step": 68700
|
| 24056 |
+
},
|
| 24057 |
+
{
|
| 24058 |
+
"epoch": 0.10179594593793884,
|
| 24059 |
+
"grad_norm": 7.15625,
|
| 24060 |
+
"learning_rate": 0.00048315353963594124,
|
| 24061 |
+
"loss": 16.5733,
|
| 24062 |
+
"step": 68720
|
| 24063 |
+
},
|
| 24064 |
+
{
|
| 24065 |
+
"epoch": 0.10182557223186722,
|
| 24066 |
+
"grad_norm": 7.15625,
|
| 24067 |
+
"learning_rate": 0.0004831486007009337,
|
| 24068 |
+
"loss": 16.5379,
|
| 24069 |
+
"step": 68740
|
| 24070 |
+
},
|
| 24071 |
+
{
|
| 24072 |
+
"epoch": 0.10185519852579561,
|
| 24073 |
+
"grad_norm": 5.59375,
|
| 24074 |
+
"learning_rate": 0.00048314366176592613,
|
| 24075 |
+
"loss": 16.6031,
|
| 24076 |
+
"step": 68760
|
| 24077 |
+
},
|
| 24078 |
+
{
|
| 24079 |
+
"epoch": 0.101884824819724,
|
| 24080 |
+
"grad_norm": 6.6875,
|
| 24081 |
+
"learning_rate": 0.00048313872283091853,
|
| 24082 |
+
"loss": 16.5457,
|
| 24083 |
+
"step": 68780
|
| 24084 |
+
},
|
| 24085 |
+
{
|
| 24086 |
+
"epoch": 0.10191445111365238,
|
| 24087 |
+
"grad_norm": 6.125,
|
| 24088 |
+
"learning_rate": 0.000483133783895911,
|
| 24089 |
+
"loss": 16.5053,
|
| 24090 |
+
"step": 68800
|
| 24091 |
+
},
|
| 24092 |
+
{
|
| 24093 |
+
"epoch": 0.10194407740758078,
|
| 24094 |
+
"grad_norm": 7.1875,
|
| 24095 |
+
"learning_rate": 0.0004831288449609034,
|
| 24096 |
+
"loss": 16.576,
|
| 24097 |
+
"step": 68820
|
| 24098 |
+
},
|
| 24099 |
+
{
|
| 24100 |
+
"epoch": 0.10197370370150917,
|
| 24101 |
+
"grad_norm": 6.875,
|
| 24102 |
+
"learning_rate": 0.0004831239060258958,
|
| 24103 |
+
"loss": 16.6092,
|
| 24104 |
+
"step": 68840
|
| 24105 |
+
},
|
| 24106 |
+
{
|
| 24107 |
+
"epoch": 0.10200332999543756,
|
| 24108 |
+
"grad_norm": 6.5,
|
| 24109 |
+
"learning_rate": 0.00048311896709088827,
|
| 24110 |
+
"loss": 16.598,
|
| 24111 |
+
"step": 68860
|
| 24112 |
+
},
|
| 24113 |
+
{
|
| 24114 |
+
"epoch": 0.10203295628936594,
|
| 24115 |
+
"grad_norm": 6.3125,
|
| 24116 |
+
"learning_rate": 0.00048311402815588066,
|
| 24117 |
+
"loss": 16.5953,
|
| 24118 |
+
"step": 68880
|
| 24119 |
+
},
|
| 24120 |
+
{
|
| 24121 |
+
"epoch": 0.10206258258329433,
|
| 24122 |
+
"grad_norm": 6.34375,
|
| 24123 |
+
"learning_rate": 0.00048310908922087316,
|
| 24124 |
+
"loss": 16.5686,
|
| 24125 |
+
"step": 68900
|
| 24126 |
+
},
|
| 24127 |
+
{
|
| 24128 |
+
"epoch": 0.10209220887722271,
|
| 24129 |
+
"grad_norm": 7.3125,
|
| 24130 |
+
"learning_rate": 0.00048310415028586555,
|
| 24131 |
+
"loss": 16.5649,
|
| 24132 |
+
"step": 68920
|
| 24133 |
+
},
|
| 24134 |
+
{
|
| 24135 |
+
"epoch": 0.1021218351711511,
|
| 24136 |
+
"grad_norm": 7.15625,
|
| 24137 |
+
"learning_rate": 0.000483099211350858,
|
| 24138 |
+
"loss": 16.5833,
|
| 24139 |
+
"step": 68940
|
| 24140 |
+
},
|
| 24141 |
+
{
|
| 24142 |
+
"epoch": 0.10215146146507949,
|
| 24143 |
+
"grad_norm": 7.0,
|
| 24144 |
+
"learning_rate": 0.0004830942724158504,
|
| 24145 |
+
"loss": 16.5813,
|
| 24146 |
+
"step": 68960
|
| 24147 |
+
},
|
| 24148 |
+
{
|
| 24149 |
+
"epoch": 0.10218108775900787,
|
| 24150 |
+
"grad_norm": 7.125,
|
| 24151 |
+
"learning_rate": 0.0004830893334808429,
|
| 24152 |
+
"loss": 16.6435,
|
| 24153 |
+
"step": 68980
|
| 24154 |
+
},
|
| 24155 |
+
{
|
| 24156 |
+
"epoch": 0.10221071405293626,
|
| 24157 |
+
"grad_norm": 6.6875,
|
| 24158 |
+
"learning_rate": 0.0004830843945458353,
|
| 24159 |
+
"loss": 16.6299,
|
| 24160 |
+
"step": 69000
|
| 24161 |
}
|
| 24162 |
],
|
| 24163 |
"logging_steps": 20,
|
|
|
|
| 24177 |
"attributes": {}
|
| 24178 |
}
|
| 24179 |
},
|
| 24180 |
+
"total_flos": 5.073098616395845e+19,
|
| 24181 |
"train_batch_size": 48,
|
| 24182 |
"trial_name": null,
|
| 24183 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc3f551404b0d7edd833494ee70d9c95a722ebd26deaead78190bce345559dbd
|
| 3 |
size 5432
|