Training in progress, step 2200, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2066752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34e7dd2d9b6f0970cb6393fa01c4d5b46e08a118b2ca11c92398326a18aca9b6
|
| 3 |
size 2066752
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4121235
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ea6c445c08f665e093952010dc41c9cfe5bc6fd09fae8a9ddc99dbd25132738
|
| 3 |
size 4121235
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14391
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:606f8ae83137b5e17dffec803b5eb8d484f9023ac65a91db2b3909da806f7963
|
| 3 |
size 14391
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93112c230b7ca5a82c24435d90248d5e745b06d96f80a988308b962666674dd0
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 100,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -14876,6 +14876,714 @@
|
|
| 14876 |
"eval_samples_per_second": 1.709,
|
| 14877 |
"eval_steps_per_second": 0.214,
|
| 14878 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14879 |
}
|
| 14880 |
],
|
| 14881 |
"logging_steps": 1,
|
|
@@ -14895,7 +15603,7 @@
|
|
| 14895 |
"attributes": {}
|
| 14896 |
}
|
| 14897 |
},
|
| 14898 |
-
"total_flos":
|
| 14899 |
"train_batch_size": 1,
|
| 14900 |
"trial_name": null,
|
| 14901 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.09501597996026605,
|
| 6 |
"eval_steps": 100,
|
| 7 |
+
"global_step": 2200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 14876 |
"eval_samples_per_second": 1.709,
|
| 14877 |
"eval_steps_per_second": 0.214,
|
| 14878 |
"step": 2100
|
| 14879 |
+
},
|
| 14880 |
+
{
|
| 14881 |
+
"epoch": 0.09074026086205407,
|
| 14882 |
+
"grad_norm": 0.55078125,
|
| 14883 |
+
"learning_rate": 0.0009954814517481774,
|
| 14884 |
+
"loss": 8.298,
|
| 14885 |
+
"step": 2101
|
| 14886 |
+
},
|
| 14887 |
+
{
|
| 14888 |
+
"epoch": 0.09078344994385419,
|
| 14889 |
+
"grad_norm": 0.455078125,
|
| 14890 |
+
"learning_rate": 0.0009954718676495817,
|
| 14891 |
+
"loss": 8.5746,
|
| 14892 |
+
"step": 2102
|
| 14893 |
+
},
|
| 14894 |
+
{
|
| 14895 |
+
"epoch": 0.09082663902565431,
|
| 14896 |
+
"grad_norm": 0.609375,
|
| 14897 |
+
"learning_rate": 0.0009954622734437753,
|
| 14898 |
+
"loss": 8.4441,
|
| 14899 |
+
"step": 2103
|
| 14900 |
+
},
|
| 14901 |
+
{
|
| 14902 |
+
"epoch": 0.09086982810745443,
|
| 14903 |
+
"grad_norm": 0.5546875,
|
| 14904 |
+
"learning_rate": 0.000995452669130954,
|
| 14905 |
+
"loss": 8.5023,
|
| 14906 |
+
"step": 2104
|
| 14907 |
+
},
|
| 14908 |
+
{
|
| 14909 |
+
"epoch": 0.09091301718925456,
|
| 14910 |
+
"grad_norm": 0.8203125,
|
| 14911 |
+
"learning_rate": 0.0009954430547113133,
|
| 14912 |
+
"loss": 7.9511,
|
| 14913 |
+
"step": 2105
|
| 14914 |
+
},
|
| 14915 |
+
{
|
| 14916 |
+
"epoch": 0.09095620627105468,
|
| 14917 |
+
"grad_norm": 0.625,
|
| 14918 |
+
"learning_rate": 0.0009954334301850497,
|
| 14919 |
+
"loss": 7.9691,
|
| 14920 |
+
"step": 2106
|
| 14921 |
+
},
|
| 14922 |
+
{
|
| 14923 |
+
"epoch": 0.0909993953528548,
|
| 14924 |
+
"grad_norm": 0.54296875,
|
| 14925 |
+
"learning_rate": 0.0009954237955523593,
|
| 14926 |
+
"loss": 8.1564,
|
| 14927 |
+
"step": 2107
|
| 14928 |
+
},
|
| 14929 |
+
{
|
| 14930 |
+
"epoch": 0.09104258443465492,
|
| 14931 |
+
"grad_norm": 0.58984375,
|
| 14932 |
+
"learning_rate": 0.0009954141508134388,
|
| 14933 |
+
"loss": 8.5454,
|
| 14934 |
+
"step": 2108
|
| 14935 |
+
},
|
| 14936 |
+
{
|
| 14937 |
+
"epoch": 0.09108577351645504,
|
| 14938 |
+
"grad_norm": 0.66015625,
|
| 14939 |
+
"learning_rate": 0.000995404495968485,
|
| 14940 |
+
"loss": 8.4482,
|
| 14941 |
+
"step": 2109
|
| 14942 |
+
},
|
| 14943 |
+
{
|
| 14944 |
+
"epoch": 0.09112896259825516,
|
| 14945 |
+
"grad_norm": 0.7734375,
|
| 14946 |
+
"learning_rate": 0.0009953948310176945,
|
| 14947 |
+
"loss": 8.6743,
|
| 14948 |
+
"step": 2110
|
| 14949 |
+
},
|
| 14950 |
+
{
|
| 14951 |
+
"epoch": 0.09117215168005528,
|
| 14952 |
+
"grad_norm": 0.65625,
|
| 14953 |
+
"learning_rate": 0.0009953851559612648,
|
| 14954 |
+
"loss": 8.4618,
|
| 14955 |
+
"step": 2111
|
| 14956 |
+
},
|
| 14957 |
+
{
|
| 14958 |
+
"epoch": 0.0912153407618554,
|
| 14959 |
+
"grad_norm": 0.4609375,
|
| 14960 |
+
"learning_rate": 0.0009953754707993931,
|
| 14961 |
+
"loss": 8.4147,
|
| 14962 |
+
"step": 2112
|
| 14963 |
+
},
|
| 14964 |
+
{
|
| 14965 |
+
"epoch": 0.09125852984365553,
|
| 14966 |
+
"grad_norm": 0.5546875,
|
| 14967 |
+
"learning_rate": 0.000995365775532277,
|
| 14968 |
+
"loss": 8.43,
|
| 14969 |
+
"step": 2113
|
| 14970 |
+
},
|
| 14971 |
+
{
|
| 14972 |
+
"epoch": 0.09130171892545565,
|
| 14973 |
+
"grad_norm": 0.609375,
|
| 14974 |
+
"learning_rate": 0.0009953560701601145,
|
| 14975 |
+
"loss": 8.3902,
|
| 14976 |
+
"step": 2114
|
| 14977 |
+
},
|
| 14978 |
+
{
|
| 14979 |
+
"epoch": 0.09134490800725577,
|
| 14980 |
+
"grad_norm": 0.7578125,
|
| 14981 |
+
"learning_rate": 0.0009953463546831032,
|
| 14982 |
+
"loss": 8.3769,
|
| 14983 |
+
"step": 2115
|
| 14984 |
+
},
|
| 14985 |
+
{
|
| 14986 |
+
"epoch": 0.09138809708905589,
|
| 14987 |
+
"grad_norm": 0.71484375,
|
| 14988 |
+
"learning_rate": 0.0009953366291014414,
|
| 14989 |
+
"loss": 8.1538,
|
| 14990 |
+
"step": 2116
|
| 14991 |
+
},
|
| 14992 |
+
{
|
| 14993 |
+
"epoch": 0.09143128617085601,
|
| 14994 |
+
"grad_norm": 0.5546875,
|
| 14995 |
+
"learning_rate": 0.0009953268934153278,
|
| 14996 |
+
"loss": 8.4314,
|
| 14997 |
+
"step": 2117
|
| 14998 |
+
},
|
| 14999 |
+
{
|
| 15000 |
+
"epoch": 0.09147447525265613,
|
| 15001 |
+
"grad_norm": 0.6171875,
|
| 15002 |
+
"learning_rate": 0.0009953171476249606,
|
| 15003 |
+
"loss": 8.3317,
|
| 15004 |
+
"step": 2118
|
| 15005 |
+
},
|
| 15006 |
+
{
|
| 15007 |
+
"epoch": 0.09151766433445625,
|
| 15008 |
+
"grad_norm": 0.734375,
|
| 15009 |
+
"learning_rate": 0.0009953073917305386,
|
| 15010 |
+
"loss": 8.5086,
|
| 15011 |
+
"step": 2119
|
| 15012 |
+
},
|
| 15013 |
+
{
|
| 15014 |
+
"epoch": 0.09156085341625637,
|
| 15015 |
+
"grad_norm": 0.66796875,
|
| 15016 |
+
"learning_rate": 0.0009952976257322612,
|
| 15017 |
+
"loss": 8.4208,
|
| 15018 |
+
"step": 2120
|
| 15019 |
+
},
|
| 15020 |
+
{
|
| 15021 |
+
"epoch": 0.0916040424980565,
|
| 15022 |
+
"grad_norm": 0.8828125,
|
| 15023 |
+
"learning_rate": 0.0009952878496303273,
|
| 15024 |
+
"loss": 8.0259,
|
| 15025 |
+
"step": 2121
|
| 15026 |
+
},
|
| 15027 |
+
{
|
| 15028 |
+
"epoch": 0.09164723157985662,
|
| 15029 |
+
"grad_norm": 0.6015625,
|
| 15030 |
+
"learning_rate": 0.0009952780634249366,
|
| 15031 |
+
"loss": 8.3618,
|
| 15032 |
+
"step": 2122
|
| 15033 |
+
},
|
| 15034 |
+
{
|
| 15035 |
+
"epoch": 0.09169042066165674,
|
| 15036 |
+
"grad_norm": 0.546875,
|
| 15037 |
+
"learning_rate": 0.0009952682671162884,
|
| 15038 |
+
"loss": 8.4246,
|
| 15039 |
+
"step": 2123
|
| 15040 |
+
},
|
| 15041 |
+
{
|
| 15042 |
+
"epoch": 0.09173360974345686,
|
| 15043 |
+
"grad_norm": 0.73046875,
|
| 15044 |
+
"learning_rate": 0.0009952584607045827,
|
| 15045 |
+
"loss": 8.4246,
|
| 15046 |
+
"step": 2124
|
| 15047 |
+
},
|
| 15048 |
+
{
|
| 15049 |
+
"epoch": 0.09177679882525698,
|
| 15050 |
+
"grad_norm": 0.4453125,
|
| 15051 |
+
"learning_rate": 0.0009952486441900195,
|
| 15052 |
+
"loss": 8.729,
|
| 15053 |
+
"step": 2125
|
| 15054 |
+
},
|
| 15055 |
+
{
|
| 15056 |
+
"epoch": 0.0918199879070571,
|
| 15057 |
+
"grad_norm": 0.61328125,
|
| 15058 |
+
"learning_rate": 0.000995238817572799,
|
| 15059 |
+
"loss": 8.2617,
|
| 15060 |
+
"step": 2126
|
| 15061 |
+
},
|
| 15062 |
+
{
|
| 15063 |
+
"epoch": 0.09186317698885722,
|
| 15064 |
+
"grad_norm": 0.796875,
|
| 15065 |
+
"learning_rate": 0.000995228980853122,
|
| 15066 |
+
"loss": 7.8786,
|
| 15067 |
+
"step": 2127
|
| 15068 |
+
},
|
| 15069 |
+
{
|
| 15070 |
+
"epoch": 0.09190636607065734,
|
| 15071 |
+
"grad_norm": 0.5703125,
|
| 15072 |
+
"learning_rate": 0.0009952191340311886,
|
| 15073 |
+
"loss": 8.3024,
|
| 15074 |
+
"step": 2128
|
| 15075 |
+
},
|
| 15076 |
+
{
|
| 15077 |
+
"epoch": 0.09194955515245747,
|
| 15078 |
+
"grad_norm": 0.60546875,
|
| 15079 |
+
"learning_rate": 0.0009952092771072002,
|
| 15080 |
+
"loss": 8.3837,
|
| 15081 |
+
"step": 2129
|
| 15082 |
+
},
|
| 15083 |
+
{
|
| 15084 |
+
"epoch": 0.09199274423425757,
|
| 15085 |
+
"grad_norm": 0.73828125,
|
| 15086 |
+
"learning_rate": 0.0009951994100813575,
|
| 15087 |
+
"loss": 8.2394,
|
| 15088 |
+
"step": 2130
|
| 15089 |
+
},
|
| 15090 |
+
{
|
| 15091 |
+
"epoch": 0.0920359333160577,
|
| 15092 |
+
"grad_norm": 0.76953125,
|
| 15093 |
+
"learning_rate": 0.0009951895329538619,
|
| 15094 |
+
"loss": 8.3308,
|
| 15095 |
+
"step": 2131
|
| 15096 |
+
},
|
| 15097 |
+
{
|
| 15098 |
+
"epoch": 0.09207912239785782,
|
| 15099 |
+
"grad_norm": 0.484375,
|
| 15100 |
+
"learning_rate": 0.0009951796457249148,
|
| 15101 |
+
"loss": 8.1792,
|
| 15102 |
+
"step": 2132
|
| 15103 |
+
},
|
| 15104 |
+
{
|
| 15105 |
+
"epoch": 0.09212231147965794,
|
| 15106 |
+
"grad_norm": 0.439453125,
|
| 15107 |
+
"learning_rate": 0.0009951697483947181,
|
| 15108 |
+
"loss": 8.358,
|
| 15109 |
+
"step": 2133
|
| 15110 |
+
},
|
| 15111 |
+
{
|
| 15112 |
+
"epoch": 0.09216550056145806,
|
| 15113 |
+
"grad_norm": 0.640625,
|
| 15114 |
+
"learning_rate": 0.0009951598409634738,
|
| 15115 |
+
"loss": 8.2675,
|
| 15116 |
+
"step": 2134
|
| 15117 |
+
},
|
| 15118 |
+
{
|
| 15119 |
+
"epoch": 0.09220868964325818,
|
| 15120 |
+
"grad_norm": 0.51953125,
|
| 15121 |
+
"learning_rate": 0.0009951499234313835,
|
| 15122 |
+
"loss": 8.6303,
|
| 15123 |
+
"step": 2135
|
| 15124 |
+
},
|
| 15125 |
+
{
|
| 15126 |
+
"epoch": 0.0922518787250583,
|
| 15127 |
+
"grad_norm": 0.60546875,
|
| 15128 |
+
"learning_rate": 0.00099513999579865,
|
| 15129 |
+
"loss": 8.2709,
|
| 15130 |
+
"step": 2136
|
| 15131 |
+
},
|
| 15132 |
+
{
|
| 15133 |
+
"epoch": 0.09229506780685842,
|
| 15134 |
+
"grad_norm": 0.73828125,
|
| 15135 |
+
"learning_rate": 0.0009951300580654755,
|
| 15136 |
+
"loss": 8.5636,
|
| 15137 |
+
"step": 2137
|
| 15138 |
+
},
|
| 15139 |
+
{
|
| 15140 |
+
"epoch": 0.09233825688865854,
|
| 15141 |
+
"grad_norm": 0.466796875,
|
| 15142 |
+
"learning_rate": 0.0009951201102320628,
|
| 15143 |
+
"loss": 8.3999,
|
| 15144 |
+
"step": 2138
|
| 15145 |
+
},
|
| 15146 |
+
{
|
| 15147 |
+
"epoch": 0.09238144597045866,
|
| 15148 |
+
"grad_norm": 3.6875,
|
| 15149 |
+
"learning_rate": 0.0009951101522986147,
|
| 15150 |
+
"loss": 8.4522,
|
| 15151 |
+
"step": 2139
|
| 15152 |
+
},
|
| 15153 |
+
{
|
| 15154 |
+
"epoch": 0.09242463505225879,
|
| 15155 |
+
"grad_norm": 0.640625,
|
| 15156 |
+
"learning_rate": 0.0009951001842653348,
|
| 15157 |
+
"loss": 8.4834,
|
| 15158 |
+
"step": 2140
|
| 15159 |
+
},
|
| 15160 |
+
{
|
| 15161 |
+
"epoch": 0.0924678241340589,
|
| 15162 |
+
"grad_norm": 0.6484375,
|
| 15163 |
+
"learning_rate": 0.0009950902061324261,
|
| 15164 |
+
"loss": 8.6161,
|
| 15165 |
+
"step": 2141
|
| 15166 |
+
},
|
| 15167 |
+
{
|
| 15168 |
+
"epoch": 0.09251101321585903,
|
| 15169 |
+
"grad_norm": 0.40625,
|
| 15170 |
+
"learning_rate": 0.0009950802179000923,
|
| 15171 |
+
"loss": 8.5365,
|
| 15172 |
+
"step": 2142
|
| 15173 |
+
},
|
| 15174 |
+
{
|
| 15175 |
+
"epoch": 0.09255420229765915,
|
| 15176 |
+
"grad_norm": 0.4765625,
|
| 15177 |
+
"learning_rate": 0.0009950702195685366,
|
| 15178 |
+
"loss": 8.1847,
|
| 15179 |
+
"step": 2143
|
| 15180 |
+
},
|
| 15181 |
+
{
|
| 15182 |
+
"epoch": 0.09259739137945927,
|
| 15183 |
+
"grad_norm": 0.83203125,
|
| 15184 |
+
"learning_rate": 0.0009950602111379635,
|
| 15185 |
+
"loss": 8.4196,
|
| 15186 |
+
"step": 2144
|
| 15187 |
+
},
|
| 15188 |
+
{
|
| 15189 |
+
"epoch": 0.09264058046125939,
|
| 15190 |
+
"grad_norm": 0.482421875,
|
| 15191 |
+
"learning_rate": 0.0009950501926085772,
|
| 15192 |
+
"loss": 8.6901,
|
| 15193 |
+
"step": 2145
|
| 15194 |
+
},
|
| 15195 |
+
{
|
| 15196 |
+
"epoch": 0.09268376954305951,
|
| 15197 |
+
"grad_norm": 0.54296875,
|
| 15198 |
+
"learning_rate": 0.0009950401639805821,
|
| 15199 |
+
"loss": 8.2957,
|
| 15200 |
+
"step": 2146
|
| 15201 |
+
},
|
| 15202 |
+
{
|
| 15203 |
+
"epoch": 0.09272695862485963,
|
| 15204 |
+
"grad_norm": 0.62109375,
|
| 15205 |
+
"learning_rate": 0.0009950301252541823,
|
| 15206 |
+
"loss": 8.0223,
|
| 15207 |
+
"step": 2147
|
| 15208 |
+
},
|
| 15209 |
+
{
|
| 15210 |
+
"epoch": 0.09277014770665976,
|
| 15211 |
+
"grad_norm": 0.6484375,
|
| 15212 |
+
"learning_rate": 0.000995020076429583,
|
| 15213 |
+
"loss": 8.5349,
|
| 15214 |
+
"step": 2148
|
| 15215 |
+
},
|
| 15216 |
+
{
|
| 15217 |
+
"epoch": 0.09281333678845988,
|
| 15218 |
+
"grad_norm": 0.466796875,
|
| 15219 |
+
"learning_rate": 0.000995010017506989,
|
| 15220 |
+
"loss": 8.454,
|
| 15221 |
+
"step": 2149
|
| 15222 |
+
},
|
| 15223 |
+
{
|
| 15224 |
+
"epoch": 0.09285652587026,
|
| 15225 |
+
"grad_norm": 0.4609375,
|
| 15226 |
+
"learning_rate": 0.0009949999484866058,
|
| 15227 |
+
"loss": 8.4627,
|
| 15228 |
+
"step": 2150
|
| 15229 |
+
},
|
| 15230 |
+
{
|
| 15231 |
+
"epoch": 0.09289971495206012,
|
| 15232 |
+
"grad_norm": 0.5390625,
|
| 15233 |
+
"learning_rate": 0.0009949898693686384,
|
| 15234 |
+
"loss": 8.4625,
|
| 15235 |
+
"step": 2151
|
| 15236 |
+
},
|
| 15237 |
+
{
|
| 15238 |
+
"epoch": 0.09294290403386024,
|
| 15239 |
+
"grad_norm": 0.4609375,
|
| 15240 |
+
"learning_rate": 0.0009949797801532924,
|
| 15241 |
+
"loss": 8.3837,
|
| 15242 |
+
"step": 2152
|
| 15243 |
+
},
|
| 15244 |
+
{
|
| 15245 |
+
"epoch": 0.09298609311566036,
|
| 15246 |
+
"grad_norm": 0.462890625,
|
| 15247 |
+
"learning_rate": 0.0009949696808407738,
|
| 15248 |
+
"loss": 8.3657,
|
| 15249 |
+
"step": 2153
|
| 15250 |
+
},
|
| 15251 |
+
{
|
| 15252 |
+
"epoch": 0.09302928219746048,
|
| 15253 |
+
"grad_norm": 0.55859375,
|
| 15254 |
+
"learning_rate": 0.0009949595714312887,
|
| 15255 |
+
"loss": 8.4035,
|
| 15256 |
+
"step": 2154
|
| 15257 |
+
},
|
| 15258 |
+
{
|
| 15259 |
+
"epoch": 0.0930724712792606,
|
| 15260 |
+
"grad_norm": 0.76953125,
|
| 15261 |
+
"learning_rate": 0.0009949494519250434,
|
| 15262 |
+
"loss": 8.3168,
|
| 15263 |
+
"step": 2155
|
| 15264 |
+
},
|
| 15265 |
+
{
|
| 15266 |
+
"epoch": 0.09311566036106073,
|
| 15267 |
+
"grad_norm": 0.421875,
|
| 15268 |
+
"learning_rate": 0.000994939322322244,
|
| 15269 |
+
"loss": 8.3687,
|
| 15270 |
+
"step": 2156
|
| 15271 |
+
},
|
| 15272 |
+
{
|
| 15273 |
+
"epoch": 0.09315884944286085,
|
| 15274 |
+
"grad_norm": 0.68359375,
|
| 15275 |
+
"learning_rate": 0.000994929182623097,
|
| 15276 |
+
"loss": 8.1541,
|
| 15277 |
+
"step": 2157
|
| 15278 |
+
},
|
| 15279 |
+
{
|
| 15280 |
+
"epoch": 0.09320203852466097,
|
| 15281 |
+
"grad_norm": 0.69140625,
|
| 15282 |
+
"learning_rate": 0.0009949190328278098,
|
| 15283 |
+
"loss": 8.3242,
|
| 15284 |
+
"step": 2158
|
| 15285 |
+
},
|
| 15286 |
+
{
|
| 15287 |
+
"epoch": 0.09324522760646109,
|
| 15288 |
+
"grad_norm": 0.490234375,
|
| 15289 |
+
"learning_rate": 0.0009949088729365894,
|
| 15290 |
+
"loss": 8.7448,
|
| 15291 |
+
"step": 2159
|
| 15292 |
+
},
|
| 15293 |
+
{
|
| 15294 |
+
"epoch": 0.09328841668826121,
|
| 15295 |
+
"grad_norm": 0.494140625,
|
| 15296 |
+
"learning_rate": 0.0009948987029496426,
|
| 15297 |
+
"loss": 8.3553,
|
| 15298 |
+
"step": 2160
|
| 15299 |
+
},
|
| 15300 |
+
{
|
| 15301 |
+
"epoch": 0.09333160577006133,
|
| 15302 |
+
"grad_norm": 0.6875,
|
| 15303 |
+
"learning_rate": 0.0009948885228671768,
|
| 15304 |
+
"loss": 8.2736,
|
| 15305 |
+
"step": 2161
|
| 15306 |
+
},
|
| 15307 |
+
{
|
| 15308 |
+
"epoch": 0.09337479485186145,
|
| 15309 |
+
"grad_norm": 0.49609375,
|
| 15310 |
+
"learning_rate": 0.0009948783326894004,
|
| 15311 |
+
"loss": 8.3592,
|
| 15312 |
+
"step": 2162
|
| 15313 |
+
},
|
| 15314 |
+
{
|
| 15315 |
+
"epoch": 0.09341798393366157,
|
| 15316 |
+
"grad_norm": 0.76171875,
|
| 15317 |
+
"learning_rate": 0.0009948681324165206,
|
| 15318 |
+
"loss": 8.5712,
|
| 15319 |
+
"step": 2163
|
| 15320 |
+
},
|
| 15321 |
+
{
|
| 15322 |
+
"epoch": 0.0934611730154617,
|
| 15323 |
+
"grad_norm": 0.455078125,
|
| 15324 |
+
"learning_rate": 0.0009948579220487458,
|
| 15325 |
+
"loss": 8.4489,
|
| 15326 |
+
"step": 2164
|
| 15327 |
+
},
|
| 15328 |
+
{
|
| 15329 |
+
"epoch": 0.09350436209726182,
|
| 15330 |
+
"grad_norm": 0.48828125,
|
| 15331 |
+
"learning_rate": 0.000994847701586284,
|
| 15332 |
+
"loss": 8.3211,
|
| 15333 |
+
"step": 2165
|
| 15334 |
+
},
|
| 15335 |
+
{
|
| 15336 |
+
"epoch": 0.09354755117906194,
|
| 15337 |
+
"grad_norm": 0.6875,
|
| 15338 |
+
"learning_rate": 0.0009948374710293442,
|
| 15339 |
+
"loss": 8.5147,
|
| 15340 |
+
"step": 2166
|
| 15341 |
+
},
|
| 15342 |
+
{
|
| 15343 |
+
"epoch": 0.09359074026086206,
|
| 15344 |
+
"grad_norm": 0.51953125,
|
| 15345 |
+
"learning_rate": 0.0009948272303781345,
|
| 15346 |
+
"loss": 8.5106,
|
| 15347 |
+
"step": 2167
|
| 15348 |
+
},
|
| 15349 |
+
{
|
| 15350 |
+
"epoch": 0.09363392934266218,
|
| 15351 |
+
"grad_norm": 0.5546875,
|
| 15352 |
+
"learning_rate": 0.0009948169796328641,
|
| 15353 |
+
"loss": 8.3855,
|
| 15354 |
+
"step": 2168
|
| 15355 |
+
},
|
| 15356 |
+
{
|
| 15357 |
+
"epoch": 0.0936771184244623,
|
| 15358 |
+
"grad_norm": 0.54296875,
|
| 15359 |
+
"learning_rate": 0.000994806718793742,
|
| 15360 |
+
"loss": 8.5642,
|
| 15361 |
+
"step": 2169
|
| 15362 |
+
},
|
| 15363 |
+
{
|
| 15364 |
+
"epoch": 0.09372030750626242,
|
| 15365 |
+
"grad_norm": 0.671875,
|
| 15366 |
+
"learning_rate": 0.0009947964478609777,
|
| 15367 |
+
"loss": 8.4041,
|
| 15368 |
+
"step": 2170
|
| 15369 |
+
},
|
| 15370 |
+
{
|
| 15371 |
+
"epoch": 0.09376349658806253,
|
| 15372 |
+
"grad_norm": 0.41015625,
|
| 15373 |
+
"learning_rate": 0.0009947861668347807,
|
| 15374 |
+
"loss": 8.6566,
|
| 15375 |
+
"step": 2171
|
| 15376 |
+
},
|
| 15377 |
+
{
|
| 15378 |
+
"epoch": 0.09380668566986265,
|
| 15379 |
+
"grad_norm": 0.45703125,
|
| 15380 |
+
"learning_rate": 0.0009947758757153605,
|
| 15381 |
+
"loss": 8.3748,
|
| 15382 |
+
"step": 2172
|
| 15383 |
+
},
|
| 15384 |
+
{
|
| 15385 |
+
"epoch": 0.09384987475166277,
|
| 15386 |
+
"grad_norm": 0.455078125,
|
| 15387 |
+
"learning_rate": 0.000994765574502927,
|
| 15388 |
+
"loss": 8.4505,
|
| 15389 |
+
"step": 2173
|
| 15390 |
+
},
|
| 15391 |
+
{
|
| 15392 |
+
"epoch": 0.0938930638334629,
|
| 15393 |
+
"grad_norm": 0.76171875,
|
| 15394 |
+
"learning_rate": 0.0009947552631976908,
|
| 15395 |
+
"loss": 8.6221,
|
| 15396 |
+
"step": 2174
|
| 15397 |
+
},
|
| 15398 |
+
{
|
| 15399 |
+
"epoch": 0.09393625291526302,
|
| 15400 |
+
"grad_norm": 0.484375,
|
| 15401 |
+
"learning_rate": 0.0009947449417998617,
|
| 15402 |
+
"loss": 8.1772,
|
| 15403 |
+
"step": 2175
|
| 15404 |
+
},
|
| 15405 |
+
{
|
| 15406 |
+
"epoch": 0.09397944199706314,
|
| 15407 |
+
"grad_norm": 0.62890625,
|
| 15408 |
+
"learning_rate": 0.0009947346103096506,
|
| 15409 |
+
"loss": 8.2067,
|
| 15410 |
+
"step": 2176
|
| 15411 |
+
},
|
| 15412 |
+
{
|
| 15413 |
+
"epoch": 0.09402263107886326,
|
| 15414 |
+
"grad_norm": 1.5078125,
|
| 15415 |
+
"learning_rate": 0.000994724268727268,
|
| 15416 |
+
"loss": 8.7464,
|
| 15417 |
+
"step": 2177
|
| 15418 |
+
},
|
| 15419 |
+
{
|
| 15420 |
+
"epoch": 0.09406582016066338,
|
| 15421 |
+
"grad_norm": 0.56640625,
|
| 15422 |
+
"learning_rate": 0.000994713917052925,
|
| 15423 |
+
"loss": 8.4471,
|
| 15424 |
+
"step": 2178
|
| 15425 |
+
},
|
| 15426 |
+
{
|
| 15427 |
+
"epoch": 0.0941090092424635,
|
| 15428 |
+
"grad_norm": 0.59765625,
|
| 15429 |
+
"learning_rate": 0.000994703555286833,
|
| 15430 |
+
"loss": 8.1524,
|
| 15431 |
+
"step": 2179
|
| 15432 |
+
},
|
| 15433 |
+
{
|
| 15434 |
+
"epoch": 0.09415219832426362,
|
| 15435 |
+
"grad_norm": 0.640625,
|
| 15436 |
+
"learning_rate": 0.000994693183429203,
|
| 15437 |
+
"loss": 8.7342,
|
| 15438 |
+
"step": 2180
|
| 15439 |
+
},
|
| 15440 |
+
{
|
| 15441 |
+
"epoch": 0.09419538740606374,
|
| 15442 |
+
"grad_norm": 0.48046875,
|
| 15443 |
+
"learning_rate": 0.0009946828014802467,
|
| 15444 |
+
"loss": 8.5416,
|
| 15445 |
+
"step": 2181
|
| 15446 |
+
},
|
| 15447 |
+
{
|
| 15448 |
+
"epoch": 0.09423857648786386,
|
| 15449 |
+
"grad_norm": 0.734375,
|
| 15450 |
+
"learning_rate": 0.000994672409440176,
|
| 15451 |
+
"loss": 8.4049,
|
| 15452 |
+
"step": 2182
|
| 15453 |
+
},
|
| 15454 |
+
{
|
| 15455 |
+
"epoch": 0.09428176556966399,
|
| 15456 |
+
"grad_norm": 0.68359375,
|
| 15457 |
+
"learning_rate": 0.0009946620073092026,
|
| 15458 |
+
"loss": 8.5609,
|
| 15459 |
+
"step": 2183
|
| 15460 |
+
},
|
| 15461 |
+
{
|
| 15462 |
+
"epoch": 0.09432495465146411,
|
| 15463 |
+
"grad_norm": 0.578125,
|
| 15464 |
+
"learning_rate": 0.000994651595087539,
|
| 15465 |
+
"loss": 8.3572,
|
| 15466 |
+
"step": 2184
|
| 15467 |
+
},
|
| 15468 |
+
{
|
| 15469 |
+
"epoch": 0.09436814373326423,
|
| 15470 |
+
"grad_norm": 0.49609375,
|
| 15471 |
+
"learning_rate": 0.0009946411727753974,
|
| 15472 |
+
"loss": 8.262,
|
| 15473 |
+
"step": 2185
|
| 15474 |
+
},
|
| 15475 |
+
{
|
| 15476 |
+
"epoch": 0.09441133281506435,
|
| 15477 |
+
"grad_norm": 0.51953125,
|
| 15478 |
+
"learning_rate": 0.0009946307403729906,
|
| 15479 |
+
"loss": 8.6386,
|
| 15480 |
+
"step": 2186
|
| 15481 |
+
},
|
| 15482 |
+
{
|
| 15483 |
+
"epoch": 0.09445452189686447,
|
| 15484 |
+
"grad_norm": 0.78515625,
|
| 15485 |
+
"learning_rate": 0.0009946202978805313,
|
| 15486 |
+
"loss": 8.4235,
|
| 15487 |
+
"step": 2187
|
| 15488 |
+
},
|
| 15489 |
+
{
|
| 15490 |
+
"epoch": 0.09449771097866459,
|
| 15491 |
+
"grad_norm": 0.79296875,
|
| 15492 |
+
"learning_rate": 0.0009946098452982325,
|
| 15493 |
+
"loss": 8.6327,
|
| 15494 |
+
"step": 2188
|
| 15495 |
+
},
|
| 15496 |
+
{
|
| 15497 |
+
"epoch": 0.09454090006046471,
|
| 15498 |
+
"grad_norm": 0.78125,
|
| 15499 |
+
"learning_rate": 0.0009945993826263072,
|
| 15500 |
+
"loss": 8.3352,
|
| 15501 |
+
"step": 2189
|
| 15502 |
+
},
|
| 15503 |
+
{
|
| 15504 |
+
"epoch": 0.09458408914226483,
|
| 15505 |
+
"grad_norm": 0.62109375,
|
| 15506 |
+
"learning_rate": 0.0009945889098649692,
|
| 15507 |
+
"loss": 8.0024,
|
| 15508 |
+
"step": 2190
|
| 15509 |
+
},
|
| 15510 |
+
{
|
| 15511 |
+
"epoch": 0.09462727822406496,
|
| 15512 |
+
"grad_norm": 0.64453125,
|
| 15513 |
+
"learning_rate": 0.0009945784270144321,
|
| 15514 |
+
"loss": 8.2995,
|
| 15515 |
+
"step": 2191
|
| 15516 |
+
},
|
| 15517 |
+
{
|
| 15518 |
+
"epoch": 0.09467046730586508,
|
| 15519 |
+
"grad_norm": 0.76171875,
|
| 15520 |
+
"learning_rate": 0.0009945679340749097,
|
| 15521 |
+
"loss": 8.4291,
|
| 15522 |
+
"step": 2192
|
| 15523 |
+
},
|
| 15524 |
+
{
|
| 15525 |
+
"epoch": 0.0947136563876652,
|
| 15526 |
+
"grad_norm": 0.7109375,
|
| 15527 |
+
"learning_rate": 0.0009945574310466158,
|
| 15528 |
+
"loss": 8.2569,
|
| 15529 |
+
"step": 2193
|
| 15530 |
+
},
|
| 15531 |
+
{
|
| 15532 |
+
"epoch": 0.09475684546946532,
|
| 15533 |
+
"grad_norm": 0.55078125,
|
| 15534 |
+
"learning_rate": 0.000994546917929765,
|
| 15535 |
+
"loss": 8.2783,
|
| 15536 |
+
"step": 2194
|
| 15537 |
+
},
|
| 15538 |
+
{
|
| 15539 |
+
"epoch": 0.09480003455126544,
|
| 15540 |
+
"grad_norm": 0.640625,
|
| 15541 |
+
"learning_rate": 0.0009945363947245717,
|
| 15542 |
+
"loss": 8.2944,
|
| 15543 |
+
"step": 2195
|
| 15544 |
+
},
|
| 15545 |
+
{
|
| 15546 |
+
"epoch": 0.09484322363306556,
|
| 15547 |
+
"grad_norm": 0.578125,
|
| 15548 |
+
"learning_rate": 0.0009945258614312501,
|
| 15549 |
+
"loss": 8.2435,
|
| 15550 |
+
"step": 2196
|
| 15551 |
+
},
|
| 15552 |
+
{
|
| 15553 |
+
"epoch": 0.09488641271486568,
|
| 15554 |
+
"grad_norm": 0.5078125,
|
| 15555 |
+
"learning_rate": 0.0009945153180500156,
|
| 15556 |
+
"loss": 8.4701,
|
| 15557 |
+
"step": 2197
|
| 15558 |
+
},
|
| 15559 |
+
{
|
| 15560 |
+
"epoch": 0.0949296017966658,
|
| 15561 |
+
"grad_norm": 0.63671875,
|
| 15562 |
+
"learning_rate": 0.0009945047645810831,
|
| 15563 |
+
"loss": 8.3213,
|
| 15564 |
+
"step": 2198
|
| 15565 |
+
},
|
| 15566 |
+
{
|
| 15567 |
+
"epoch": 0.09497279087846593,
|
| 15568 |
+
"grad_norm": 0.5234375,
|
| 15569 |
+
"learning_rate": 0.0009944942010246681,
|
| 15570 |
+
"loss": 8.3223,
|
| 15571 |
+
"step": 2199
|
| 15572 |
+
},
|
| 15573 |
+
{
|
| 15574 |
+
"epoch": 0.09501597996026605,
|
| 15575 |
+
"grad_norm": 0.578125,
|
| 15576 |
+
"learning_rate": 0.0009944836273809857,
|
| 15577 |
+
"loss": 8.3426,
|
| 15578 |
+
"step": 2200
|
| 15579 |
+
},
|
| 15580 |
+
{
|
| 15581 |
+
"epoch": 0.09501597996026605,
|
| 15582 |
+
"eval_loss": 8.426615715026855,
|
| 15583 |
+
"eval_runtime": 13.965,
|
| 15584 |
+
"eval_samples_per_second": 1.719,
|
| 15585 |
+
"eval_steps_per_second": 0.215,
|
| 15586 |
+
"step": 2200
|
| 15587 |
}
|
| 15588 |
],
|
| 15589 |
"logging_steps": 1,
|
|
|
|
| 15603 |
"attributes": {}
|
| 15604 |
}
|
| 15605 |
},
|
| 15606 |
+
"total_flos": 7032412569600.0,
|
| 15607 |
"train_batch_size": 1,
|
| 15608 |
"trial_name": null,
|
| 15609 |
"trial_params": null
|