Training in progress, step 19000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319352826
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab7e50a79158952ab424ab7ae47a767f43dd77ab5efa58f1c20364aafc8f4444
|
| 3 |
size 319352826
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 900372486
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:283bafb4e9fbde7c821fb56a77898801608d89a269482dd42ff3b049de3b174d
|
| 3 |
size 900372486
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c58e23cb46599bfa6012c57f3293aeed5f59a390ff6d37d78d6c6802513b8457
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c3045e09a7577232ab7444c86fce4c0ecee225103bf65652dd401b98c1eae36
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:250944245eda57266dafa26260cf889e09b45cb83fffef97827f1db3bbce4e82
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fed5f1522f5929dc84aace87a480ae5413e80fdd410689b437d7ddd60a1a3b7
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b27f15f40a69175f98dea375bde05ce9bab4efa689d0532fb9627215e3ffe02e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5958,6 +5958,706 @@
|
|
| 5958 |
"learning_rate": 4.959256255655081e-05,
|
| 5959 |
"loss": 24.1711,
|
| 5960 |
"step": 17000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5961 |
}
|
| 5962 |
],
|
| 5963 |
"logging_steps": 20,
|
|
@@ -5977,7 +6677,7 @@
|
|
| 5977 |
"attributes": {}
|
| 5978 |
}
|
| 5979 |
},
|
| 5980 |
-
"total_flos": 3.
|
| 5981 |
"train_batch_size": 48,
|
| 5982 |
"trial_name": null,
|
| 5983 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.028144979231967956,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 19000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5958 |
"learning_rate": 4.959256255655081e-05,
|
| 5959 |
"loss": 24.1711,
|
| 5960 |
"step": 17000
|
| 5961 |
+
},
|
| 5962 |
+
{
|
| 5963 |
+
"epoch": 0.02521197613305761,
|
| 5964 |
+
"grad_norm": 7.59375,
|
| 5965 |
+
"learning_rate": 4.9592068663050054e-05,
|
| 5966 |
+
"loss": 24.1804,
|
| 5967 |
+
"step": 17020
|
| 5968 |
+
},
|
| 5969 |
+
{
|
| 5970 |
+
"epoch": 0.025241602426985997,
|
| 5971 |
+
"grad_norm": 7.71875,
|
| 5972 |
+
"learning_rate": 4.959157476954929e-05,
|
| 5973 |
+
"loss": 24.0944,
|
| 5974 |
+
"step": 17040
|
| 5975 |
+
},
|
| 5976 |
+
{
|
| 5977 |
+
"epoch": 0.025271228720914387,
|
| 5978 |
+
"grad_norm": 6.6875,
|
| 5979 |
+
"learning_rate": 4.959108087604854e-05,
|
| 5980 |
+
"loss": 24.0414,
|
| 5981 |
+
"step": 17060
|
| 5982 |
+
},
|
| 5983 |
+
{
|
| 5984 |
+
"epoch": 0.025300855014842773,
|
| 5985 |
+
"grad_norm": 10.3125,
|
| 5986 |
+
"learning_rate": 4.9590586982547784e-05,
|
| 5987 |
+
"loss": 24.1683,
|
| 5988 |
+
"step": 17080
|
| 5989 |
+
},
|
| 5990 |
+
{
|
| 5991 |
+
"epoch": 0.02533048130877116,
|
| 5992 |
+
"grad_norm": 10.625,
|
| 5993 |
+
"learning_rate": 4.959009308904702e-05,
|
| 5994 |
+
"loss": 24.1671,
|
| 5995 |
+
"step": 17100
|
| 5996 |
+
},
|
| 5997 |
+
{
|
| 5998 |
+
"epoch": 0.025360107602699546,
|
| 5999 |
+
"grad_norm": 7.15625,
|
| 6000 |
+
"learning_rate": 4.9589599195546264e-05,
|
| 6001 |
+
"loss": 24.1452,
|
| 6002 |
+
"step": 17120
|
| 6003 |
+
},
|
| 6004 |
+
{
|
| 6005 |
+
"epoch": 0.025389733896627936,
|
| 6006 |
+
"grad_norm": 8.5,
|
| 6007 |
+
"learning_rate": 4.9589105302045515e-05,
|
| 6008 |
+
"loss": 24.245,
|
| 6009 |
+
"step": 17140
|
| 6010 |
+
},
|
| 6011 |
+
{
|
| 6012 |
+
"epoch": 0.025419360190556323,
|
| 6013 |
+
"grad_norm": 7.8125,
|
| 6014 |
+
"learning_rate": 4.958861140854476e-05,
|
| 6015 |
+
"loss": 24.1189,
|
| 6016 |
+
"step": 17160
|
| 6017 |
+
},
|
| 6018 |
+
{
|
| 6019 |
+
"epoch": 0.02544898648448471,
|
| 6020 |
+
"grad_norm": 7.90625,
|
| 6021 |
+
"learning_rate": 4.9588117515043995e-05,
|
| 6022 |
+
"loss": 24.064,
|
| 6023 |
+
"step": 17180
|
| 6024 |
+
},
|
| 6025 |
+
{
|
| 6026 |
+
"epoch": 0.025478612778413096,
|
| 6027 |
+
"grad_norm": 7.96875,
|
| 6028 |
+
"learning_rate": 4.9587623621543245e-05,
|
| 6029 |
+
"loss": 24.2102,
|
| 6030 |
+
"step": 17200
|
| 6031 |
+
},
|
| 6032 |
+
{
|
| 6033 |
+
"epoch": 0.025508239072341485,
|
| 6034 |
+
"grad_norm": 8.5625,
|
| 6035 |
+
"learning_rate": 4.958712972804249e-05,
|
| 6036 |
+
"loss": 24.1539,
|
| 6037 |
+
"step": 17220
|
| 6038 |
+
},
|
| 6039 |
+
{
|
| 6040 |
+
"epoch": 0.025537865366269872,
|
| 6041 |
+
"grad_norm": 8.0625,
|
| 6042 |
+
"learning_rate": 4.9586635834541725e-05,
|
| 6043 |
+
"loss": 24.1354,
|
| 6044 |
+
"step": 17240
|
| 6045 |
+
},
|
| 6046 |
+
{
|
| 6047 |
+
"epoch": 0.02556749166019826,
|
| 6048 |
+
"grad_norm": 12.5625,
|
| 6049 |
+
"learning_rate": 4.958614194104097e-05,
|
| 6050 |
+
"loss": 24.1707,
|
| 6051 |
+
"step": 17260
|
| 6052 |
+
},
|
| 6053 |
+
{
|
| 6054 |
+
"epoch": 0.025597117954126645,
|
| 6055 |
+
"grad_norm": 10.5,
|
| 6056 |
+
"learning_rate": 4.958564804754022e-05,
|
| 6057 |
+
"loss": 24.0564,
|
| 6058 |
+
"step": 17280
|
| 6059 |
+
},
|
| 6060 |
+
{
|
| 6061 |
+
"epoch": 0.025626744248055035,
|
| 6062 |
+
"grad_norm": 8.375,
|
| 6063 |
+
"learning_rate": 4.958515415403946e-05,
|
| 6064 |
+
"loss": 24.0725,
|
| 6065 |
+
"step": 17300
|
| 6066 |
+
},
|
| 6067 |
+
{
|
| 6068 |
+
"epoch": 0.02565637054198342,
|
| 6069 |
+
"grad_norm": 8.3125,
|
| 6070 |
+
"learning_rate": 4.95846602605387e-05,
|
| 6071 |
+
"loss": 24.176,
|
| 6072 |
+
"step": 17320
|
| 6073 |
+
},
|
| 6074 |
+
{
|
| 6075 |
+
"epoch": 0.025685996835911808,
|
| 6076 |
+
"grad_norm": 7.65625,
|
| 6077 |
+
"learning_rate": 4.958416636703794e-05,
|
| 6078 |
+
"loss": 24.1942,
|
| 6079 |
+
"step": 17340
|
| 6080 |
+
},
|
| 6081 |
+
{
|
| 6082 |
+
"epoch": 0.025715623129840194,
|
| 6083 |
+
"grad_norm": 8.375,
|
| 6084 |
+
"learning_rate": 4.958367247353719e-05,
|
| 6085 |
+
"loss": 24.0173,
|
| 6086 |
+
"step": 17360
|
| 6087 |
+
},
|
| 6088 |
+
{
|
| 6089 |
+
"epoch": 0.025745249423768584,
|
| 6090 |
+
"grad_norm": 8.25,
|
| 6091 |
+
"learning_rate": 4.9583178580036436e-05,
|
| 6092 |
+
"loss": 24.0805,
|
| 6093 |
+
"step": 17380
|
| 6094 |
+
},
|
| 6095 |
+
{
|
| 6096 |
+
"epoch": 0.02577487571769697,
|
| 6097 |
+
"grad_norm": 8.6875,
|
| 6098 |
+
"learning_rate": 4.958268468653567e-05,
|
| 6099 |
+
"loss": 24.1161,
|
| 6100 |
+
"step": 17400
|
| 6101 |
+
},
|
| 6102 |
+
{
|
| 6103 |
+
"epoch": 0.025804502011625357,
|
| 6104 |
+
"grad_norm": 7.78125,
|
| 6105 |
+
"learning_rate": 4.9582190793034916e-05,
|
| 6106 |
+
"loss": 24.1395,
|
| 6107 |
+
"step": 17420
|
| 6108 |
+
},
|
| 6109 |
+
{
|
| 6110 |
+
"epoch": 0.025834128305553743,
|
| 6111 |
+
"grad_norm": 8.4375,
|
| 6112 |
+
"learning_rate": 4.9581696899534166e-05,
|
| 6113 |
+
"loss": 24.1628,
|
| 6114 |
+
"step": 17440
|
| 6115 |
+
},
|
| 6116 |
+
{
|
| 6117 |
+
"epoch": 0.025863754599482133,
|
| 6118 |
+
"grad_norm": 8.25,
|
| 6119 |
+
"learning_rate": 4.95812030060334e-05,
|
| 6120 |
+
"loss": 24.1601,
|
| 6121 |
+
"step": 17460
|
| 6122 |
+
},
|
| 6123 |
+
{
|
| 6124 |
+
"epoch": 0.02589338089341052,
|
| 6125 |
+
"grad_norm": 8.1875,
|
| 6126 |
+
"learning_rate": 4.9580709112532646e-05,
|
| 6127 |
+
"loss": 24.1186,
|
| 6128 |
+
"step": 17480
|
| 6129 |
+
},
|
| 6130 |
+
{
|
| 6131 |
+
"epoch": 0.025923007187338906,
|
| 6132 |
+
"grad_norm": 7.625,
|
| 6133 |
+
"learning_rate": 4.9580215219031896e-05,
|
| 6134 |
+
"loss": 24.1463,
|
| 6135 |
+
"step": 17500
|
| 6136 |
+
},
|
| 6137 |
+
{
|
| 6138 |
+
"epoch": 0.025952633481267293,
|
| 6139 |
+
"grad_norm": 8.875,
|
| 6140 |
+
"learning_rate": 4.957972132553114e-05,
|
| 6141 |
+
"loss": 24.142,
|
| 6142 |
+
"step": 17520
|
| 6143 |
+
},
|
| 6144 |
+
{
|
| 6145 |
+
"epoch": 0.025982259775195683,
|
| 6146 |
+
"grad_norm": 9.1875,
|
| 6147 |
+
"learning_rate": 4.9579227432030376e-05,
|
| 6148 |
+
"loss": 24.1378,
|
| 6149 |
+
"step": 17540
|
| 6150 |
+
},
|
| 6151 |
+
{
|
| 6152 |
+
"epoch": 0.02601188606912407,
|
| 6153 |
+
"grad_norm": 9.1875,
|
| 6154 |
+
"learning_rate": 4.957873353852962e-05,
|
| 6155 |
+
"loss": 24.0645,
|
| 6156 |
+
"step": 17560
|
| 6157 |
+
},
|
| 6158 |
+
{
|
| 6159 |
+
"epoch": 0.026041512363052455,
|
| 6160 |
+
"grad_norm": 10.9375,
|
| 6161 |
+
"learning_rate": 4.957823964502887e-05,
|
| 6162 |
+
"loss": 24.0567,
|
| 6163 |
+
"step": 17580
|
| 6164 |
+
},
|
| 6165 |
+
{
|
| 6166 |
+
"epoch": 0.026071138656980845,
|
| 6167 |
+
"grad_norm": 7.71875,
|
| 6168 |
+
"learning_rate": 4.9577745751528107e-05,
|
| 6169 |
+
"loss": 24.0838,
|
| 6170 |
+
"step": 17600
|
| 6171 |
+
},
|
| 6172 |
+
{
|
| 6173 |
+
"epoch": 0.026100764950909232,
|
| 6174 |
+
"grad_norm": 8.125,
|
| 6175 |
+
"learning_rate": 4.957725185802735e-05,
|
| 6176 |
+
"loss": 24.124,
|
| 6177 |
+
"step": 17620
|
| 6178 |
+
},
|
| 6179 |
+
{
|
| 6180 |
+
"epoch": 0.02613039124483762,
|
| 6181 |
+
"grad_norm": 7.78125,
|
| 6182 |
+
"learning_rate": 4.9576757964526593e-05,
|
| 6183 |
+
"loss": 24.0202,
|
| 6184 |
+
"step": 17640
|
| 6185 |
+
},
|
| 6186 |
+
{
|
| 6187 |
+
"epoch": 0.026160017538766005,
|
| 6188 |
+
"grad_norm": 8.0,
|
| 6189 |
+
"learning_rate": 4.9576264071025844e-05,
|
| 6190 |
+
"loss": 24.1524,
|
| 6191 |
+
"step": 17660
|
| 6192 |
+
},
|
| 6193 |
+
{
|
| 6194 |
+
"epoch": 0.026189643832694395,
|
| 6195 |
+
"grad_norm": 7.90625,
|
| 6196 |
+
"learning_rate": 4.957577017752508e-05,
|
| 6197 |
+
"loss": 24.1435,
|
| 6198 |
+
"step": 17680
|
| 6199 |
+
},
|
| 6200 |
+
{
|
| 6201 |
+
"epoch": 0.02621927012662278,
|
| 6202 |
+
"grad_norm": 7.71875,
|
| 6203 |
+
"learning_rate": 4.9575276284024324e-05,
|
| 6204 |
+
"loss": 24.0919,
|
| 6205 |
+
"step": 17700
|
| 6206 |
+
},
|
| 6207 |
+
{
|
| 6208 |
+
"epoch": 0.026248896420551168,
|
| 6209 |
+
"grad_norm": 8.125,
|
| 6210 |
+
"learning_rate": 4.957478239052357e-05,
|
| 6211 |
+
"loss": 24.1923,
|
| 6212 |
+
"step": 17720
|
| 6213 |
+
},
|
| 6214 |
+
{
|
| 6215 |
+
"epoch": 0.026278522714479554,
|
| 6216 |
+
"grad_norm": 7.59375,
|
| 6217 |
+
"learning_rate": 4.957428849702281e-05,
|
| 6218 |
+
"loss": 24.1385,
|
| 6219 |
+
"step": 17740
|
| 6220 |
+
},
|
| 6221 |
+
{
|
| 6222 |
+
"epoch": 0.026308149008407944,
|
| 6223 |
+
"grad_norm": 8.25,
|
| 6224 |
+
"learning_rate": 4.9573794603522054e-05,
|
| 6225 |
+
"loss": 24.0698,
|
| 6226 |
+
"step": 17760
|
| 6227 |
+
},
|
| 6228 |
+
{
|
| 6229 |
+
"epoch": 0.02633777530233633,
|
| 6230 |
+
"grad_norm": 7.8125,
|
| 6231 |
+
"learning_rate": 4.95733007100213e-05,
|
| 6232 |
+
"loss": 24.0787,
|
| 6233 |
+
"step": 17780
|
| 6234 |
+
},
|
| 6235 |
+
{
|
| 6236 |
+
"epoch": 0.026367401596264717,
|
| 6237 |
+
"grad_norm": 8.5625,
|
| 6238 |
+
"learning_rate": 4.957280681652055e-05,
|
| 6239 |
+
"loss": 24.0752,
|
| 6240 |
+
"step": 17800
|
| 6241 |
+
},
|
| 6242 |
+
{
|
| 6243 |
+
"epoch": 0.026397027890193103,
|
| 6244 |
+
"grad_norm": 6.9375,
|
| 6245 |
+
"learning_rate": 4.9572312923019784e-05,
|
| 6246 |
+
"loss": 24.0701,
|
| 6247 |
+
"step": 17820
|
| 6248 |
+
},
|
| 6249 |
+
{
|
| 6250 |
+
"epoch": 0.026426654184121493,
|
| 6251 |
+
"grad_norm": 9.5,
|
| 6252 |
+
"learning_rate": 4.957181902951903e-05,
|
| 6253 |
+
"loss": 24.1038,
|
| 6254 |
+
"step": 17840
|
| 6255 |
+
},
|
| 6256 |
+
{
|
| 6257 |
+
"epoch": 0.02645628047804988,
|
| 6258 |
+
"grad_norm": 8.125,
|
| 6259 |
+
"learning_rate": 4.957132513601827e-05,
|
| 6260 |
+
"loss": 24.0736,
|
| 6261 |
+
"step": 17860
|
| 6262 |
+
},
|
| 6263 |
+
{
|
| 6264 |
+
"epoch": 0.026485906771978266,
|
| 6265 |
+
"grad_norm": 8.25,
|
| 6266 |
+
"learning_rate": 4.957083124251752e-05,
|
| 6267 |
+
"loss": 24.049,
|
| 6268 |
+
"step": 17880
|
| 6269 |
+
},
|
| 6270 |
+
{
|
| 6271 |
+
"epoch": 0.026515533065906653,
|
| 6272 |
+
"grad_norm": 8.75,
|
| 6273 |
+
"learning_rate": 4.957033734901676e-05,
|
| 6274 |
+
"loss": 24.1319,
|
| 6275 |
+
"step": 17900
|
| 6276 |
+
},
|
| 6277 |
+
{
|
| 6278 |
+
"epoch": 0.026545159359835042,
|
| 6279 |
+
"grad_norm": 7.25,
|
| 6280 |
+
"learning_rate": 4.9569843455516e-05,
|
| 6281 |
+
"loss": 24.1071,
|
| 6282 |
+
"step": 17920
|
| 6283 |
+
},
|
| 6284 |
+
{
|
| 6285 |
+
"epoch": 0.02657478565376343,
|
| 6286 |
+
"grad_norm": 10.125,
|
| 6287 |
+
"learning_rate": 4.9569349562015245e-05,
|
| 6288 |
+
"loss": 24.1587,
|
| 6289 |
+
"step": 17940
|
| 6290 |
+
},
|
| 6291 |
+
{
|
| 6292 |
+
"epoch": 0.026604411947691815,
|
| 6293 |
+
"grad_norm": 8.9375,
|
| 6294 |
+
"learning_rate": 4.956885566851449e-05,
|
| 6295 |
+
"loss": 24.0234,
|
| 6296 |
+
"step": 17960
|
| 6297 |
+
},
|
| 6298 |
+
{
|
| 6299 |
+
"epoch": 0.026634038241620202,
|
| 6300 |
+
"grad_norm": 9.25,
|
| 6301 |
+
"learning_rate": 4.956836177501373e-05,
|
| 6302 |
+
"loss": 24.0215,
|
| 6303 |
+
"step": 17980
|
| 6304 |
+
},
|
| 6305 |
+
{
|
| 6306 |
+
"epoch": 0.02666366453554859,
|
| 6307 |
+
"grad_norm": 9.625,
|
| 6308 |
+
"learning_rate": 4.9567867881512975e-05,
|
| 6309 |
+
"loss": 24.0976,
|
| 6310 |
+
"step": 18000
|
| 6311 |
+
},
|
| 6312 |
+
{
|
| 6313 |
+
"epoch": 0.026693290829476978,
|
| 6314 |
+
"grad_norm": 7.65625,
|
| 6315 |
+
"learning_rate": 4.956737398801222e-05,
|
| 6316 |
+
"loss": 24.0219,
|
| 6317 |
+
"step": 18020
|
| 6318 |
+
},
|
| 6319 |
+
{
|
| 6320 |
+
"epoch": 0.026722917123405365,
|
| 6321 |
+
"grad_norm": 7.15625,
|
| 6322 |
+
"learning_rate": 4.956688009451146e-05,
|
| 6323 |
+
"loss": 24.0564,
|
| 6324 |
+
"step": 18040
|
| 6325 |
+
},
|
| 6326 |
+
{
|
| 6327 |
+
"epoch": 0.02675254341733375,
|
| 6328 |
+
"grad_norm": 7.90625,
|
| 6329 |
+
"learning_rate": 4.9566386201010705e-05,
|
| 6330 |
+
"loss": 24.0368,
|
| 6331 |
+
"step": 18060
|
| 6332 |
+
},
|
| 6333 |
+
{
|
| 6334 |
+
"epoch": 0.02678216971126214,
|
| 6335 |
+
"grad_norm": 8.75,
|
| 6336 |
+
"learning_rate": 4.956589230750995e-05,
|
| 6337 |
+
"loss": 23.9664,
|
| 6338 |
+
"step": 18080
|
| 6339 |
+
},
|
| 6340 |
+
{
|
| 6341 |
+
"epoch": 0.026811796005190527,
|
| 6342 |
+
"grad_norm": 7.03125,
|
| 6343 |
+
"learning_rate": 4.956539841400919e-05,
|
| 6344 |
+
"loss": 24.0162,
|
| 6345 |
+
"step": 18100
|
| 6346 |
+
},
|
| 6347 |
+
{
|
| 6348 |
+
"epoch": 0.026841422299118914,
|
| 6349 |
+
"grad_norm": 7.21875,
|
| 6350 |
+
"learning_rate": 4.9564904520508436e-05,
|
| 6351 |
+
"loss": 24.0582,
|
| 6352 |
+
"step": 18120
|
| 6353 |
+
},
|
| 6354 |
+
{
|
| 6355 |
+
"epoch": 0.0268710485930473,
|
| 6356 |
+
"grad_norm": 8.1875,
|
| 6357 |
+
"learning_rate": 4.956441062700768e-05,
|
| 6358 |
+
"loss": 24.0407,
|
| 6359 |
+
"step": 18140
|
| 6360 |
+
},
|
| 6361 |
+
{
|
| 6362 |
+
"epoch": 0.02690067488697569,
|
| 6363 |
+
"grad_norm": 8.625,
|
| 6364 |
+
"learning_rate": 4.956391673350692e-05,
|
| 6365 |
+
"loss": 24.0271,
|
| 6366 |
+
"step": 18160
|
| 6367 |
+
},
|
| 6368 |
+
{
|
| 6369 |
+
"epoch": 0.026930301180904077,
|
| 6370 |
+
"grad_norm": 7.84375,
|
| 6371 |
+
"learning_rate": 4.9563422840006166e-05,
|
| 6372 |
+
"loss": 24.0241,
|
| 6373 |
+
"step": 18180
|
| 6374 |
+
},
|
| 6375 |
+
{
|
| 6376 |
+
"epoch": 0.026959927474832463,
|
| 6377 |
+
"grad_norm": 7.75,
|
| 6378 |
+
"learning_rate": 4.956292894650541e-05,
|
| 6379 |
+
"loss": 24.0187,
|
| 6380 |
+
"step": 18200
|
| 6381 |
+
},
|
| 6382 |
+
{
|
| 6383 |
+
"epoch": 0.02698955376876085,
|
| 6384 |
+
"grad_norm": 6.875,
|
| 6385 |
+
"learning_rate": 4.956243505300465e-05,
|
| 6386 |
+
"loss": 24.0737,
|
| 6387 |
+
"step": 18220
|
| 6388 |
+
},
|
| 6389 |
+
{
|
| 6390 |
+
"epoch": 0.02701918006268924,
|
| 6391 |
+
"grad_norm": 7.6875,
|
| 6392 |
+
"learning_rate": 4.9561941159503896e-05,
|
| 6393 |
+
"loss": 24.0664,
|
| 6394 |
+
"step": 18240
|
| 6395 |
+
},
|
| 6396 |
+
{
|
| 6397 |
+
"epoch": 0.027048806356617626,
|
| 6398 |
+
"grad_norm": 8.0625,
|
| 6399 |
+
"learning_rate": 4.956144726600314e-05,
|
| 6400 |
+
"loss": 23.9322,
|
| 6401 |
+
"step": 18260
|
| 6402 |
+
},
|
| 6403 |
+
{
|
| 6404 |
+
"epoch": 0.027078432650546012,
|
| 6405 |
+
"grad_norm": 9.4375,
|
| 6406 |
+
"learning_rate": 4.956095337250238e-05,
|
| 6407 |
+
"loss": 24.0489,
|
| 6408 |
+
"step": 18280
|
| 6409 |
+
},
|
| 6410 |
+
{
|
| 6411 |
+
"epoch": 0.0271080589444744,
|
| 6412 |
+
"grad_norm": 8.6875,
|
| 6413 |
+
"learning_rate": 4.9560459479001626e-05,
|
| 6414 |
+
"loss": 24.0831,
|
| 6415 |
+
"step": 18300
|
| 6416 |
+
},
|
| 6417 |
+
{
|
| 6418 |
+
"epoch": 0.02713768523840279,
|
| 6419 |
+
"grad_norm": 8.5625,
|
| 6420 |
+
"learning_rate": 4.955996558550087e-05,
|
| 6421 |
+
"loss": 24.0794,
|
| 6422 |
+
"step": 18320
|
| 6423 |
+
},
|
| 6424 |
+
{
|
| 6425 |
+
"epoch": 0.027167311532331175,
|
| 6426 |
+
"grad_norm": 7.59375,
|
| 6427 |
+
"learning_rate": 4.955947169200011e-05,
|
| 6428 |
+
"loss": 23.9772,
|
| 6429 |
+
"step": 18340
|
| 6430 |
+
},
|
| 6431 |
+
{
|
| 6432 |
+
"epoch": 0.02719693782625956,
|
| 6433 |
+
"grad_norm": 7.375,
|
| 6434 |
+
"learning_rate": 4.955897779849936e-05,
|
| 6435 |
+
"loss": 24.1268,
|
| 6436 |
+
"step": 18360
|
| 6437 |
+
},
|
| 6438 |
+
{
|
| 6439 |
+
"epoch": 0.027226564120187948,
|
| 6440 |
+
"grad_norm": 7.625,
|
| 6441 |
+
"learning_rate": 4.95584839049986e-05,
|
| 6442 |
+
"loss": 24.0586,
|
| 6443 |
+
"step": 18380
|
| 6444 |
+
},
|
| 6445 |
+
{
|
| 6446 |
+
"epoch": 0.027256190414116338,
|
| 6447 |
+
"grad_norm": 7.5,
|
| 6448 |
+
"learning_rate": 4.9557990011497844e-05,
|
| 6449 |
+
"loss": 24.0107,
|
| 6450 |
+
"step": 18400
|
| 6451 |
+
},
|
| 6452 |
+
{
|
| 6453 |
+
"epoch": 0.027285816708044724,
|
| 6454 |
+
"grad_norm": 8.3125,
|
| 6455 |
+
"learning_rate": 4.955749611799709e-05,
|
| 6456 |
+
"loss": 24.0178,
|
| 6457 |
+
"step": 18420
|
| 6458 |
+
},
|
| 6459 |
+
{
|
| 6460 |
+
"epoch": 0.02731544300197311,
|
| 6461 |
+
"grad_norm": 7.40625,
|
| 6462 |
+
"learning_rate": 4.955700222449633e-05,
|
| 6463 |
+
"loss": 24.0978,
|
| 6464 |
+
"step": 18440
|
| 6465 |
+
},
|
| 6466 |
+
{
|
| 6467 |
+
"epoch": 0.027345069295901497,
|
| 6468 |
+
"grad_norm": 7.53125,
|
| 6469 |
+
"learning_rate": 4.9556508330995574e-05,
|
| 6470 |
+
"loss": 24.046,
|
| 6471 |
+
"step": 18460
|
| 6472 |
+
},
|
| 6473 |
+
{
|
| 6474 |
+
"epoch": 0.027374695589829887,
|
| 6475 |
+
"grad_norm": 8.75,
|
| 6476 |
+
"learning_rate": 4.955601443749482e-05,
|
| 6477 |
+
"loss": 23.984,
|
| 6478 |
+
"step": 18480
|
| 6479 |
+
},
|
| 6480 |
+
{
|
| 6481 |
+
"epoch": 0.027404321883758274,
|
| 6482 |
+
"grad_norm": 10.1875,
|
| 6483 |
+
"learning_rate": 4.955552054399406e-05,
|
| 6484 |
+
"loss": 23.9507,
|
| 6485 |
+
"step": 18500
|
| 6486 |
+
},
|
| 6487 |
+
{
|
| 6488 |
+
"epoch": 0.02743394817768666,
|
| 6489 |
+
"grad_norm": 8.0625,
|
| 6490 |
+
"learning_rate": 4.9555026650493304e-05,
|
| 6491 |
+
"loss": 24.0807,
|
| 6492 |
+
"step": 18520
|
| 6493 |
+
},
|
| 6494 |
+
{
|
| 6495 |
+
"epoch": 0.027463574471615047,
|
| 6496 |
+
"grad_norm": 8.9375,
|
| 6497 |
+
"learning_rate": 4.955453275699255e-05,
|
| 6498 |
+
"loss": 23.9955,
|
| 6499 |
+
"step": 18540
|
| 6500 |
+
},
|
| 6501 |
+
{
|
| 6502 |
+
"epoch": 0.027493200765543437,
|
| 6503 |
+
"grad_norm": 8.625,
|
| 6504 |
+
"learning_rate": 4.955403886349179e-05,
|
| 6505 |
+
"loss": 23.9839,
|
| 6506 |
+
"step": 18560
|
| 6507 |
+
},
|
| 6508 |
+
{
|
| 6509 |
+
"epoch": 0.027522827059471823,
|
| 6510 |
+
"grad_norm": 7.90625,
|
| 6511 |
+
"learning_rate": 4.9553544969991034e-05,
|
| 6512 |
+
"loss": 23.9817,
|
| 6513 |
+
"step": 18580
|
| 6514 |
+
},
|
| 6515 |
+
{
|
| 6516 |
+
"epoch": 0.02755245335340021,
|
| 6517 |
+
"grad_norm": 8.1875,
|
| 6518 |
+
"learning_rate": 4.955305107649028e-05,
|
| 6519 |
+
"loss": 23.9895,
|
| 6520 |
+
"step": 18600
|
| 6521 |
+
},
|
| 6522 |
+
{
|
| 6523 |
+
"epoch": 0.027582079647328596,
|
| 6524 |
+
"grad_norm": 7.375,
|
| 6525 |
+
"learning_rate": 4.9552557182989514e-05,
|
| 6526 |
+
"loss": 24.0384,
|
| 6527 |
+
"step": 18620
|
| 6528 |
+
},
|
| 6529 |
+
{
|
| 6530 |
+
"epoch": 0.027611705941256986,
|
| 6531 |
+
"grad_norm": 8.6875,
|
| 6532 |
+
"learning_rate": 4.9552063289488765e-05,
|
| 6533 |
+
"loss": 23.9931,
|
| 6534 |
+
"step": 18640
|
| 6535 |
+
},
|
| 6536 |
+
{
|
| 6537 |
+
"epoch": 0.027641332235185372,
|
| 6538 |
+
"grad_norm": 9.375,
|
| 6539 |
+
"learning_rate": 4.955156939598801e-05,
|
| 6540 |
+
"loss": 24.0053,
|
| 6541 |
+
"step": 18660
|
| 6542 |
+
},
|
| 6543 |
+
{
|
| 6544 |
+
"epoch": 0.02767095852911376,
|
| 6545 |
+
"grad_norm": 6.9375,
|
| 6546 |
+
"learning_rate": 4.955107550248725e-05,
|
| 6547 |
+
"loss": 23.9771,
|
| 6548 |
+
"step": 18680
|
| 6549 |
+
},
|
| 6550 |
+
{
|
| 6551 |
+
"epoch": 0.027700584823042145,
|
| 6552 |
+
"grad_norm": 7.125,
|
| 6553 |
+
"learning_rate": 4.9550581608986495e-05,
|
| 6554 |
+
"loss": 23.9985,
|
| 6555 |
+
"step": 18700
|
| 6556 |
+
},
|
| 6557 |
+
{
|
| 6558 |
+
"epoch": 0.027730211116970535,
|
| 6559 |
+
"grad_norm": 8.625,
|
| 6560 |
+
"learning_rate": 4.955008771548574e-05,
|
| 6561 |
+
"loss": 23.929,
|
| 6562 |
+
"step": 18720
|
| 6563 |
+
},
|
| 6564 |
+
{
|
| 6565 |
+
"epoch": 0.02775983741089892,
|
| 6566 |
+
"grad_norm": 9.125,
|
| 6567 |
+
"learning_rate": 4.954959382198498e-05,
|
| 6568 |
+
"loss": 24.1382,
|
| 6569 |
+
"step": 18740
|
| 6570 |
+
},
|
| 6571 |
+
{
|
| 6572 |
+
"epoch": 0.027789463704827308,
|
| 6573 |
+
"grad_norm": 7.46875,
|
| 6574 |
+
"learning_rate": 4.954909992848422e-05,
|
| 6575 |
+
"loss": 24.0646,
|
| 6576 |
+
"step": 18760
|
| 6577 |
+
},
|
| 6578 |
+
{
|
| 6579 |
+
"epoch": 0.027819089998755694,
|
| 6580 |
+
"grad_norm": 10.75,
|
| 6581 |
+
"learning_rate": 4.954860603498347e-05,
|
| 6582 |
+
"loss": 23.9292,
|
| 6583 |
+
"step": 18780
|
| 6584 |
+
},
|
| 6585 |
+
{
|
| 6586 |
+
"epoch": 0.027848716292684084,
|
| 6587 |
+
"grad_norm": 8.875,
|
| 6588 |
+
"learning_rate": 4.954811214148271e-05,
|
| 6589 |
+
"loss": 23.9326,
|
| 6590 |
+
"step": 18800
|
| 6591 |
+
},
|
| 6592 |
+
{
|
| 6593 |
+
"epoch": 0.02787834258661247,
|
| 6594 |
+
"grad_norm": 7.71875,
|
| 6595 |
+
"learning_rate": 4.9547618247981956e-05,
|
| 6596 |
+
"loss": 23.9914,
|
| 6597 |
+
"step": 18820
|
| 6598 |
+
},
|
| 6599 |
+
{
|
| 6600 |
+
"epoch": 0.027907968880540857,
|
| 6601 |
+
"grad_norm": 8.75,
|
| 6602 |
+
"learning_rate": 4.954712435448119e-05,
|
| 6603 |
+
"loss": 23.9243,
|
| 6604 |
+
"step": 18840
|
| 6605 |
+
},
|
| 6606 |
+
{
|
| 6607 |
+
"epoch": 0.027937595174469244,
|
| 6608 |
+
"grad_norm": 9.1875,
|
| 6609 |
+
"learning_rate": 4.954663046098044e-05,
|
| 6610 |
+
"loss": 23.9705,
|
| 6611 |
+
"step": 18860
|
| 6612 |
+
},
|
| 6613 |
+
{
|
| 6614 |
+
"epoch": 0.027967221468397634,
|
| 6615 |
+
"grad_norm": 7.71875,
|
| 6616 |
+
"learning_rate": 4.9546136567479686e-05,
|
| 6617 |
+
"loss": 23.9516,
|
| 6618 |
+
"step": 18880
|
| 6619 |
+
},
|
| 6620 |
+
{
|
| 6621 |
+
"epoch": 0.02799684776232602,
|
| 6622 |
+
"grad_norm": 7.90625,
|
| 6623 |
+
"learning_rate": 4.954564267397893e-05,
|
| 6624 |
+
"loss": 23.9473,
|
| 6625 |
+
"step": 18900
|
| 6626 |
+
},
|
| 6627 |
+
{
|
| 6628 |
+
"epoch": 0.028026474056254407,
|
| 6629 |
+
"grad_norm": 7.09375,
|
| 6630 |
+
"learning_rate": 4.9545148780478166e-05,
|
| 6631 |
+
"loss": 23.9625,
|
| 6632 |
+
"step": 18920
|
| 6633 |
+
},
|
| 6634 |
+
{
|
| 6635 |
+
"epoch": 0.028056100350182793,
|
| 6636 |
+
"grad_norm": 9.3125,
|
| 6637 |
+
"learning_rate": 4.9544654886977416e-05,
|
| 6638 |
+
"loss": 23.9841,
|
| 6639 |
+
"step": 18940
|
| 6640 |
+
},
|
| 6641 |
+
{
|
| 6642 |
+
"epoch": 0.028085726644111183,
|
| 6643 |
+
"grad_norm": 8.1875,
|
| 6644 |
+
"learning_rate": 4.954416099347666e-05,
|
| 6645 |
+
"loss": 23.9801,
|
| 6646 |
+
"step": 18960
|
| 6647 |
+
},
|
| 6648 |
+
{
|
| 6649 |
+
"epoch": 0.02811535293803957,
|
| 6650 |
+
"grad_norm": 7.21875,
|
| 6651 |
+
"learning_rate": 4.9543667099975896e-05,
|
| 6652 |
+
"loss": 23.9958,
|
| 6653 |
+
"step": 18980
|
| 6654 |
+
},
|
| 6655 |
+
{
|
| 6656 |
+
"epoch": 0.028144979231967956,
|
| 6657 |
+
"grad_norm": 8.875,
|
| 6658 |
+
"learning_rate": 4.9543173206475146e-05,
|
| 6659 |
+
"loss": 23.9279,
|
| 6660 |
+
"step": 19000
|
| 6661 |
}
|
| 6662 |
],
|
| 6663 |
"logging_steps": 20,
|
|
|
|
| 6677 |
"attributes": {}
|
| 6678 |
}
|
| 6679 |
},
|
| 6680 |
+
"total_flos": 3.5154933639955874e+19,
|
| 6681 |
"train_batch_size": 48,
|
| 6682 |
"trial_name": null,
|
| 6683 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfb002641c9997dcb364a38bb417a9ff3d31f1f51c9f2ad8e398d4de90a740cf
|
| 3 |
size 5432
|