CocoRoF commited on
Commit
c3541ea
·
verified ·
1 Parent(s): 46a889b

Training in progress, step 30000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba4ebc8d324592b24aa466cb1a17beb4eb518d5cd7415ad4d10867a1f113452a
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:877ef9b1ef203d1c02f31d1f5d81565b3878eb39121c8153f356847a2fd8eef8
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9684e35dfc059389b032c609c2d17105dd7d52f3b875814129afa1ef90d3e36
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78438408549281a57107eeb546cb6b697cb5ef9532b1f55bd2b74f24bbf771c3
3
  size 919972410
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69ec6e3926fa071bede113523efa3dc6e630c3c7958c54a9ca321cf4d62ed145
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb18ac8d6db3307b1c242f7cb069fc8b8dab957434ddfcafcac997cfd6a43abf
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6127ee4f0c13500ec5038fce65af8f7beec63c137c7d4b7c157aa6303cf5879
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bdab708057b5f34a402d9a2b4443f5f93a8e8ee2ddb66d955f0a15ad394ecc5
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da01d1c5eb2cc3a323f97c1f590d13ccfac2a4c5b1479bd378b4e643304f5a4f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599882a30c163a5a2a000c4e74b320ecc4a55aa1b079882fd66aa3d2559d19e7
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a3f04d76c0d3acc7d3dd95a04215f368f35a451ae8cba8a2fdba38cda9ca0a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567c3b482c209c2778fc017e39a38642c488edda20673ef29f571ef7177ad81e
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df7d2c9825dba80cb544920f8cc0c72122f96514e6cd259052a8765b034393e2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f9ffe9a916e778423aaed4ec842923c9ccfdd3d7a4fbad10dc6a3bfc278fb8e
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a20a42d44ff48cc162224010190e898fe28598ddad8cd1896d330a3bb1d8ec3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ede8a81aa3c780fb9c3cb57537752a782c4aed1dcecb7aafd6ca5a7ea90252
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18ac0dc4f09f25179860561fcea7c5c8f997aabdc46a170665f9dc5a72bc27c6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b1c5c0c0afa907d332467e631e6cee80ba476689aa0caa77689ca273d83b3e4
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a16fcb5411ff961b47eff7378d85105fe9837e0492d19ea5ce3b7c4b77aa3b6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73025ac422abb13303ee974109cf39f6f848de7f7013e828d04aa4e2ec0e6757
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6276b39eb0a6a4f547784c30a100b3eee72c8aefbe6f0f7bb1ca7dca8f60dc4b
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1909da172bf01f799e2ef8934ccdab3f4895ac7509eafdfbd8adccaf29c2176
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.46393271119956764,
5
  "eval_steps": 5000,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17547,6 +17547,3514 @@
17547
  "eval_samples_per_second": 3202.139,
17548
  "eval_steps_per_second": 50.035,
17549
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17550
  }
17551
  ],
17552
  "logging_steps": 10,
@@ -17566,7 +21074,7 @@
17566
  "attributes": {}
17567
  }
17568
  },
17569
- "total_flos": 4.36380689760256e+18,
17570
  "train_batch_size": 8,
17571
  "trial_name": null,
17572
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5567192534394811,
5
  "eval_steps": 5000,
6
+ "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17547
  "eval_samples_per_second": 3202.139,
17548
  "eval_steps_per_second": 50.035,
17549
  "step": 25000
17550
+ },
17551
+ {
17552
+ "epoch": 0.46411828428404744,
17553
+ "grad_norm": 37.1875,
17554
+ "learning_rate": 9.927481539107923e-06,
17555
+ "loss": 21.032,
17556
+ "step": 25010
17557
+ },
17558
+ {
17559
+ "epoch": 0.46430385736852725,
17560
+ "grad_norm": 34.0,
17561
+ "learning_rate": 9.927452543321879e-06,
17562
+ "loss": 20.8725,
17563
+ "step": 25020
17564
+ },
17565
+ {
17566
+ "epoch": 0.4644894304530071,
17567
+ "grad_norm": 35.34375,
17568
+ "learning_rate": 9.927423547535836e-06,
17569
+ "loss": 20.9236,
17570
+ "step": 25030
17571
+ },
17572
+ {
17573
+ "epoch": 0.4646750035374869,
17574
+ "grad_norm": 35.90625,
17575
+ "learning_rate": 9.927394551749796e-06,
17576
+ "loss": 20.7448,
17577
+ "step": 25040
17578
+ },
17579
+ {
17580
+ "epoch": 0.4648605766219667,
17581
+ "grad_norm": 33.875,
17582
+ "learning_rate": 9.927365555963753e-06,
17583
+ "loss": 21.2387,
17584
+ "step": 25050
17585
+ },
17586
+ {
17587
+ "epoch": 0.4650461497064466,
17588
+ "grad_norm": 34.8125,
17589
+ "learning_rate": 9.92733656017771e-06,
17590
+ "loss": 20.801,
17591
+ "step": 25060
17592
+ },
17593
+ {
17594
+ "epoch": 0.4652317227909264,
17595
+ "grad_norm": 36.1875,
17596
+ "learning_rate": 9.927307564391668e-06,
17597
+ "loss": 20.7705,
17598
+ "step": 25070
17599
+ },
17600
+ {
17601
+ "epoch": 0.46541729587540626,
17602
+ "grad_norm": 34.53125,
17603
+ "learning_rate": 9.927278568605625e-06,
17604
+ "loss": 20.7148,
17605
+ "step": 25080
17606
+ },
17607
+ {
17608
+ "epoch": 0.46560286895988606,
17609
+ "grad_norm": 34.875,
17610
+ "learning_rate": 9.927249572819583e-06,
17611
+ "loss": 20.4595,
17612
+ "step": 25090
17613
+ },
17614
+ {
17615
+ "epoch": 0.46578844204436587,
17616
+ "grad_norm": 35.46875,
17617
+ "learning_rate": 9.92722057703354e-06,
17618
+ "loss": 20.9314,
17619
+ "step": 25100
17620
+ },
17621
+ {
17622
+ "epoch": 0.46597401512884573,
17623
+ "grad_norm": 33.90625,
17624
+ "learning_rate": 9.927191581247499e-06,
17625
+ "loss": 20.3269,
17626
+ "step": 25110
17627
+ },
17628
+ {
17629
+ "epoch": 0.46615958821332554,
17630
+ "grad_norm": 36.3125,
17631
+ "learning_rate": 9.927162585461456e-06,
17632
+ "loss": 21.2732,
17633
+ "step": 25120
17634
+ },
17635
+ {
17636
+ "epoch": 0.46634516129780534,
17637
+ "grad_norm": 33.46875,
17638
+ "learning_rate": 9.927133589675412e-06,
17639
+ "loss": 21.0565,
17640
+ "step": 25130
17641
+ },
17642
+ {
17643
+ "epoch": 0.4665307343822852,
17644
+ "grad_norm": 35.53125,
17645
+ "learning_rate": 9.927104593889371e-06,
17646
+ "loss": 21.2097,
17647
+ "step": 25140
17648
+ },
17649
+ {
17650
+ "epoch": 0.466716307466765,
17651
+ "grad_norm": 34.78125,
17652
+ "learning_rate": 9.927075598103329e-06,
17653
+ "loss": 20.6643,
17654
+ "step": 25150
17655
+ },
17656
+ {
17657
+ "epoch": 0.4669018805512449,
17658
+ "grad_norm": 33.71875,
17659
+ "learning_rate": 9.927046602317286e-06,
17660
+ "loss": 20.9921,
17661
+ "step": 25160
17662
+ },
17663
+ {
17664
+ "epoch": 0.4670874536357247,
17665
+ "grad_norm": 31.984375,
17666
+ "learning_rate": 9.927017606531244e-06,
17667
+ "loss": 20.5886,
17668
+ "step": 25170
17669
+ },
17670
+ {
17671
+ "epoch": 0.4672730267202045,
17672
+ "grad_norm": 33.90625,
17673
+ "learning_rate": 9.926988610745201e-06,
17674
+ "loss": 20.725,
17675
+ "step": 25180
17676
+ },
17677
+ {
17678
+ "epoch": 0.46745859980468435,
17679
+ "grad_norm": 35.1875,
17680
+ "learning_rate": 9.926959614959158e-06,
17681
+ "loss": 20.9366,
17682
+ "step": 25190
17683
+ },
17684
+ {
17685
+ "epoch": 0.46764417288916416,
17686
+ "grad_norm": 34.6875,
17687
+ "learning_rate": 9.926930619173116e-06,
17688
+ "loss": 21.0935,
17689
+ "step": 25200
17690
+ },
17691
+ {
17692
+ "epoch": 0.46782974597364396,
17693
+ "grad_norm": 34.6875,
17694
+ "learning_rate": 9.926901623387075e-06,
17695
+ "loss": 20.3594,
17696
+ "step": 25210
17697
+ },
17698
+ {
17699
+ "epoch": 0.4680153190581238,
17700
+ "grad_norm": 36.96875,
17701
+ "learning_rate": 9.926872627601032e-06,
17702
+ "loss": 20.7214,
17703
+ "step": 25220
17704
+ },
17705
+ {
17706
+ "epoch": 0.46820089214260363,
17707
+ "grad_norm": 37.375,
17708
+ "learning_rate": 9.92684363181499e-06,
17709
+ "loss": 20.9528,
17710
+ "step": 25230
17711
+ },
17712
+ {
17713
+ "epoch": 0.46838646522708344,
17714
+ "grad_norm": 34.1875,
17715
+ "learning_rate": 9.926814636028947e-06,
17716
+ "loss": 20.7266,
17717
+ "step": 25240
17718
+ },
17719
+ {
17720
+ "epoch": 0.4685720383115633,
17721
+ "grad_norm": 34.5625,
17722
+ "learning_rate": 9.926785640242904e-06,
17723
+ "loss": 20.522,
17724
+ "step": 25250
17725
+ },
17726
+ {
17727
+ "epoch": 0.4687576113960431,
17728
+ "grad_norm": 33.65625,
17729
+ "learning_rate": 9.926756644456862e-06,
17730
+ "loss": 20.7444,
17731
+ "step": 25260
17732
+ },
17733
+ {
17734
+ "epoch": 0.46894318448052297,
17735
+ "grad_norm": 32.21875,
17736
+ "learning_rate": 9.92672764867082e-06,
17737
+ "loss": 20.1346,
17738
+ "step": 25270
17739
+ },
17740
+ {
17741
+ "epoch": 0.4691287575650028,
17742
+ "grad_norm": 34.96875,
17743
+ "learning_rate": 9.926698652884777e-06,
17744
+ "loss": 20.6407,
17745
+ "step": 25280
17746
+ },
17747
+ {
17748
+ "epoch": 0.4693143306494826,
17749
+ "grad_norm": 33.875,
17750
+ "learning_rate": 9.926669657098734e-06,
17751
+ "loss": 20.6497,
17752
+ "step": 25290
17753
+ },
17754
+ {
17755
+ "epoch": 0.46949990373396244,
17756
+ "grad_norm": 34.75,
17757
+ "learning_rate": 9.926640661312692e-06,
17758
+ "loss": 20.4089,
17759
+ "step": 25300
17760
+ },
17761
+ {
17762
+ "epoch": 0.46968547681844225,
17763
+ "grad_norm": 34.8125,
17764
+ "learning_rate": 9.92661166552665e-06,
17765
+ "loss": 20.2013,
17766
+ "step": 25310
17767
+ },
17768
+ {
17769
+ "epoch": 0.46987104990292206,
17770
+ "grad_norm": 34.84375,
17771
+ "learning_rate": 9.926582669740608e-06,
17772
+ "loss": 20.8357,
17773
+ "step": 25320
17774
+ },
17775
+ {
17776
+ "epoch": 0.4700566229874019,
17777
+ "grad_norm": 34.375,
17778
+ "learning_rate": 9.926553673954565e-06,
17779
+ "loss": 20.3461,
17780
+ "step": 25330
17781
+ },
17782
+ {
17783
+ "epoch": 0.4702421960718817,
17784
+ "grad_norm": 33.78125,
17785
+ "learning_rate": 9.926524678168523e-06,
17786
+ "loss": 20.1499,
17787
+ "step": 25340
17788
+ },
17789
+ {
17790
+ "epoch": 0.4704277691563616,
17791
+ "grad_norm": 35.71875,
17792
+ "learning_rate": 9.92649568238248e-06,
17793
+ "loss": 20.4552,
17794
+ "step": 25350
17795
+ },
17796
+ {
17797
+ "epoch": 0.4706133422408414,
17798
+ "grad_norm": 34.28125,
17799
+ "learning_rate": 9.926466686596438e-06,
17800
+ "loss": 20.6206,
17801
+ "step": 25360
17802
+ },
17803
+ {
17804
+ "epoch": 0.4707989153253212,
17805
+ "grad_norm": 35.28125,
17806
+ "learning_rate": 9.926437690810395e-06,
17807
+ "loss": 20.7498,
17808
+ "step": 25370
17809
+ },
17810
+ {
17811
+ "epoch": 0.47098448840980106,
17812
+ "grad_norm": 34.1875,
17813
+ "learning_rate": 9.926408695024352e-06,
17814
+ "loss": 20.6467,
17815
+ "step": 25380
17816
+ },
17817
+ {
17818
+ "epoch": 0.47117006149428087,
17819
+ "grad_norm": 33.0625,
17820
+ "learning_rate": 9.926379699238312e-06,
17821
+ "loss": 20.624,
17822
+ "step": 25390
17823
+ },
17824
+ {
17825
+ "epoch": 0.4713556345787607,
17826
+ "grad_norm": 33.21875,
17827
+ "learning_rate": 9.926350703452267e-06,
17828
+ "loss": 20.5546,
17829
+ "step": 25400
17830
+ },
17831
+ {
17832
+ "epoch": 0.47154120766324054,
17833
+ "grad_norm": 35.875,
17834
+ "learning_rate": 9.926321707666225e-06,
17835
+ "loss": 20.8728,
17836
+ "step": 25410
17837
+ },
17838
+ {
17839
+ "epoch": 0.47172678074772034,
17840
+ "grad_norm": 35.03125,
17841
+ "learning_rate": 9.926292711880184e-06,
17842
+ "loss": 20.7638,
17843
+ "step": 25420
17844
+ },
17845
+ {
17846
+ "epoch": 0.4719123538322002,
17847
+ "grad_norm": 33.6875,
17848
+ "learning_rate": 9.926263716094141e-06,
17849
+ "loss": 20.8226,
17850
+ "step": 25430
17851
+ },
17852
+ {
17853
+ "epoch": 0.47209792691668,
17854
+ "grad_norm": 35.09375,
17855
+ "learning_rate": 9.926234720308099e-06,
17856
+ "loss": 20.8098,
17857
+ "step": 25440
17858
+ },
17859
+ {
17860
+ "epoch": 0.4722835000011598,
17861
+ "grad_norm": 35.90625,
17862
+ "learning_rate": 9.926205724522056e-06,
17863
+ "loss": 20.0728,
17864
+ "step": 25450
17865
+ },
17866
+ {
17867
+ "epoch": 0.4724690730856397,
17868
+ "grad_norm": 35.65625,
17869
+ "learning_rate": 9.926176728736013e-06,
17870
+ "loss": 20.8324,
17871
+ "step": 25460
17872
+ },
17873
+ {
17874
+ "epoch": 0.4726546461701195,
17875
+ "grad_norm": 36.15625,
17876
+ "learning_rate": 9.92614773294997e-06,
17877
+ "loss": 20.5694,
17878
+ "step": 25470
17879
+ },
17880
+ {
17881
+ "epoch": 0.4728402192545993,
17882
+ "grad_norm": 34.4375,
17883
+ "learning_rate": 9.926118737163928e-06,
17884
+ "loss": 21.1526,
17885
+ "step": 25480
17886
+ },
17887
+ {
17888
+ "epoch": 0.47302579233907915,
17889
+ "grad_norm": 35.875,
17890
+ "learning_rate": 9.926089741377887e-06,
17891
+ "loss": 21.0011,
17892
+ "step": 25490
17893
+ },
17894
+ {
17895
+ "epoch": 0.47321136542355896,
17896
+ "grad_norm": 36.5625,
17897
+ "learning_rate": 9.926060745591843e-06,
17898
+ "loss": 20.2465,
17899
+ "step": 25500
17900
+ },
17901
+ {
17902
+ "epoch": 0.47339693850803877,
17903
+ "grad_norm": 37.40625,
17904
+ "learning_rate": 9.9260317498058e-06,
17905
+ "loss": 20.8743,
17906
+ "step": 25510
17907
+ },
17908
+ {
17909
+ "epoch": 0.47358251159251863,
17910
+ "grad_norm": 35.4375,
17911
+ "learning_rate": 9.92600275401976e-06,
17912
+ "loss": 20.9921,
17913
+ "step": 25520
17914
+ },
17915
+ {
17916
+ "epoch": 0.47376808467699844,
17917
+ "grad_norm": 33.03125,
17918
+ "learning_rate": 9.925973758233717e-06,
17919
+ "loss": 20.6059,
17920
+ "step": 25530
17921
+ },
17922
+ {
17923
+ "epoch": 0.4739536577614783,
17924
+ "grad_norm": 35.09375,
17925
+ "learning_rate": 9.925944762447674e-06,
17926
+ "loss": 20.8631,
17927
+ "step": 25540
17928
+ },
17929
+ {
17930
+ "epoch": 0.4741392308459581,
17931
+ "grad_norm": 35.0,
17932
+ "learning_rate": 9.925915766661632e-06,
17933
+ "loss": 20.3443,
17934
+ "step": 25550
17935
+ },
17936
+ {
17937
+ "epoch": 0.4743248039304379,
17938
+ "grad_norm": 36.75,
17939
+ "learning_rate": 9.92588677087559e-06,
17940
+ "loss": 20.7723,
17941
+ "step": 25560
17942
+ },
17943
+ {
17944
+ "epoch": 0.4745103770149178,
17945
+ "grad_norm": 34.6875,
17946
+ "learning_rate": 9.925857775089547e-06,
17947
+ "loss": 20.4458,
17948
+ "step": 25570
17949
+ },
17950
+ {
17951
+ "epoch": 0.4746959500993976,
17952
+ "grad_norm": 35.96875,
17953
+ "learning_rate": 9.925828779303504e-06,
17954
+ "loss": 20.3749,
17955
+ "step": 25580
17956
+ },
17957
+ {
17958
+ "epoch": 0.4748815231838774,
17959
+ "grad_norm": 36.625,
17960
+ "learning_rate": 9.925799783517463e-06,
17961
+ "loss": 20.3563,
17962
+ "step": 25590
17963
+ },
17964
+ {
17965
+ "epoch": 0.47506709626835725,
17966
+ "grad_norm": 34.875,
17967
+ "learning_rate": 9.92577078773142e-06,
17968
+ "loss": 20.9151,
17969
+ "step": 25600
17970
+ },
17971
+ {
17972
+ "epoch": 0.47525266935283705,
17973
+ "grad_norm": 35.28125,
17974
+ "learning_rate": 9.925741791945376e-06,
17975
+ "loss": 20.473,
17976
+ "step": 25610
17977
+ },
17978
+ {
17979
+ "epoch": 0.4754382424373169,
17980
+ "grad_norm": 36.3125,
17981
+ "learning_rate": 9.925712796159335e-06,
17982
+ "loss": 20.9318,
17983
+ "step": 25620
17984
+ },
17985
+ {
17986
+ "epoch": 0.4756238155217967,
17987
+ "grad_norm": 36.28125,
17988
+ "learning_rate": 9.925683800373293e-06,
17989
+ "loss": 20.9008,
17990
+ "step": 25630
17991
+ },
17992
+ {
17993
+ "epoch": 0.47580938860627653,
17994
+ "grad_norm": 35.71875,
17995
+ "learning_rate": 9.92565480458725e-06,
17996
+ "loss": 20.6804,
17997
+ "step": 25640
17998
+ },
17999
+ {
18000
+ "epoch": 0.4759949616907564,
18001
+ "grad_norm": 36.625,
18002
+ "learning_rate": 9.925625808801208e-06,
18003
+ "loss": 20.6793,
18004
+ "step": 25650
18005
+ },
18006
+ {
18007
+ "epoch": 0.4761805347752362,
18008
+ "grad_norm": 36.40625,
18009
+ "learning_rate": 9.925596813015167e-06,
18010
+ "loss": 20.6986,
18011
+ "step": 25660
18012
+ },
18013
+ {
18014
+ "epoch": 0.476366107859716,
18015
+ "grad_norm": 36.0,
18016
+ "learning_rate": 9.925567817229122e-06,
18017
+ "loss": 20.4197,
18018
+ "step": 25670
18019
+ },
18020
+ {
18021
+ "epoch": 0.47655168094419587,
18022
+ "grad_norm": 34.21875,
18023
+ "learning_rate": 9.92553882144308e-06,
18024
+ "loss": 20.203,
18025
+ "step": 25680
18026
+ },
18027
+ {
18028
+ "epoch": 0.47673725402867567,
18029
+ "grad_norm": 37.375,
18030
+ "learning_rate": 9.925509825657039e-06,
18031
+ "loss": 21.2316,
18032
+ "step": 25690
18033
+ },
18034
+ {
18035
+ "epoch": 0.47692282711315553,
18036
+ "grad_norm": 36.03125,
18037
+ "learning_rate": 9.925480829870996e-06,
18038
+ "loss": 20.5119,
18039
+ "step": 25700
18040
+ },
18041
+ {
18042
+ "epoch": 0.47710840019763534,
18043
+ "grad_norm": 34.25,
18044
+ "learning_rate": 9.925451834084954e-06,
18045
+ "loss": 20.4633,
18046
+ "step": 25710
18047
+ },
18048
+ {
18049
+ "epoch": 0.47729397328211515,
18050
+ "grad_norm": 36.15625,
18051
+ "learning_rate": 9.925422838298911e-06,
18052
+ "loss": 20.5057,
18053
+ "step": 25720
18054
+ },
18055
+ {
18056
+ "epoch": 0.477479546366595,
18057
+ "grad_norm": 35.3125,
18058
+ "learning_rate": 9.925393842512868e-06,
18059
+ "loss": 20.8923,
18060
+ "step": 25730
18061
+ },
18062
+ {
18063
+ "epoch": 0.4776651194510748,
18064
+ "grad_norm": 33.59375,
18065
+ "learning_rate": 9.925364846726826e-06,
18066
+ "loss": 20.7508,
18067
+ "step": 25740
18068
+ },
18069
+ {
18070
+ "epoch": 0.4778506925355546,
18071
+ "grad_norm": 34.5625,
18072
+ "learning_rate": 9.925335850940783e-06,
18073
+ "loss": 20.3849,
18074
+ "step": 25750
18075
+ },
18076
+ {
18077
+ "epoch": 0.4780362656200345,
18078
+ "grad_norm": 34.875,
18079
+ "learning_rate": 9.925306855154742e-06,
18080
+ "loss": 21.1023,
18081
+ "step": 25760
18082
+ },
18083
+ {
18084
+ "epoch": 0.4782218387045143,
18085
+ "grad_norm": 35.25,
18086
+ "learning_rate": 9.925277859368698e-06,
18087
+ "loss": 20.5539,
18088
+ "step": 25770
18089
+ },
18090
+ {
18091
+ "epoch": 0.4784074117889941,
18092
+ "grad_norm": 35.75,
18093
+ "learning_rate": 9.925248863582656e-06,
18094
+ "loss": 20.4852,
18095
+ "step": 25780
18096
+ },
18097
+ {
18098
+ "epoch": 0.47859298487347396,
18099
+ "grad_norm": 33.40625,
18100
+ "learning_rate": 9.925219867796615e-06,
18101
+ "loss": 20.5779,
18102
+ "step": 25790
18103
+ },
18104
+ {
18105
+ "epoch": 0.47877855795795377,
18106
+ "grad_norm": 34.96875,
18107
+ "learning_rate": 9.925190872010572e-06,
18108
+ "loss": 20.5704,
18109
+ "step": 25800
18110
+ },
18111
+ {
18112
+ "epoch": 0.4789641310424336,
18113
+ "grad_norm": 34.78125,
18114
+ "learning_rate": 9.92516187622453e-06,
18115
+ "loss": 20.5373,
18116
+ "step": 25810
18117
+ },
18118
+ {
18119
+ "epoch": 0.47914970412691343,
18120
+ "grad_norm": 34.5,
18121
+ "learning_rate": 9.925132880438487e-06,
18122
+ "loss": 20.758,
18123
+ "step": 25820
18124
+ },
18125
+ {
18126
+ "epoch": 0.47933527721139324,
18127
+ "grad_norm": 35.9375,
18128
+ "learning_rate": 9.925103884652444e-06,
18129
+ "loss": 20.5638,
18130
+ "step": 25830
18131
+ },
18132
+ {
18133
+ "epoch": 0.4795208502958731,
18134
+ "grad_norm": 33.5625,
18135
+ "learning_rate": 9.925074888866402e-06,
18136
+ "loss": 20.8422,
18137
+ "step": 25840
18138
+ },
18139
+ {
18140
+ "epoch": 0.4797064233803529,
18141
+ "grad_norm": 33.5625,
18142
+ "learning_rate": 9.925045893080359e-06,
18143
+ "loss": 20.8382,
18144
+ "step": 25850
18145
+ },
18146
+ {
18147
+ "epoch": 0.4798919964648327,
18148
+ "grad_norm": 35.375,
18149
+ "learning_rate": 9.925016897294316e-06,
18150
+ "loss": 20.6228,
18151
+ "step": 25860
18152
+ },
18153
+ {
18154
+ "epoch": 0.4800775695493126,
18155
+ "grad_norm": 33.96875,
18156
+ "learning_rate": 9.924987901508276e-06,
18157
+ "loss": 20.3393,
18158
+ "step": 25870
18159
+ },
18160
+ {
18161
+ "epoch": 0.4802631426337924,
18162
+ "grad_norm": 35.09375,
18163
+ "learning_rate": 9.924958905722231e-06,
18164
+ "loss": 20.7853,
18165
+ "step": 25880
18166
+ },
18167
+ {
18168
+ "epoch": 0.48044871571827225,
18169
+ "grad_norm": 33.53125,
18170
+ "learning_rate": 9.92492990993619e-06,
18171
+ "loss": 21.0041,
18172
+ "step": 25890
18173
+ },
18174
+ {
18175
+ "epoch": 0.48063428880275205,
18176
+ "grad_norm": 38.21875,
18177
+ "learning_rate": 9.924900914150148e-06,
18178
+ "loss": 20.4117,
18179
+ "step": 25900
18180
+ },
18181
+ {
18182
+ "epoch": 0.48081986188723186,
18183
+ "grad_norm": 34.0625,
18184
+ "learning_rate": 9.924871918364105e-06,
18185
+ "loss": 20.6509,
18186
+ "step": 25910
18187
+ },
18188
+ {
18189
+ "epoch": 0.4810054349717117,
18190
+ "grad_norm": 36.0,
18191
+ "learning_rate": 9.924842922578063e-06,
18192
+ "loss": 20.895,
18193
+ "step": 25920
18194
+ },
18195
+ {
18196
+ "epoch": 0.4811910080561915,
18197
+ "grad_norm": 36.28125,
18198
+ "learning_rate": 9.92481392679202e-06,
18199
+ "loss": 20.8673,
18200
+ "step": 25930
18201
+ },
18202
+ {
18203
+ "epoch": 0.48137658114067133,
18204
+ "grad_norm": 34.25,
18205
+ "learning_rate": 9.924784931005977e-06,
18206
+ "loss": 20.5624,
18207
+ "step": 25940
18208
+ },
18209
+ {
18210
+ "epoch": 0.4815621542251512,
18211
+ "grad_norm": 35.125,
18212
+ "learning_rate": 9.924755935219935e-06,
18213
+ "loss": 21.1527,
18214
+ "step": 25950
18215
+ },
18216
+ {
18217
+ "epoch": 0.481747727309631,
18218
+ "grad_norm": 36.71875,
18219
+ "learning_rate": 9.924726939433892e-06,
18220
+ "loss": 20.7524,
18221
+ "step": 25960
18222
+ },
18223
+ {
18224
+ "epoch": 0.48193330039411086,
18225
+ "grad_norm": 33.65625,
18226
+ "learning_rate": 9.924697943647851e-06,
18227
+ "loss": 21.1041,
18228
+ "step": 25970
18229
+ },
18230
+ {
18231
+ "epoch": 0.48211887347859067,
18232
+ "grad_norm": 36.59375,
18233
+ "learning_rate": 9.924668947861809e-06,
18234
+ "loss": 20.6638,
18235
+ "step": 25980
18236
+ },
18237
+ {
18238
+ "epoch": 0.4823044465630705,
18239
+ "grad_norm": 33.59375,
18240
+ "learning_rate": 9.924639952075764e-06,
18241
+ "loss": 20.6987,
18242
+ "step": 25990
18243
+ },
18244
+ {
18245
+ "epoch": 0.48249001964755034,
18246
+ "grad_norm": 34.625,
18247
+ "learning_rate": 9.924610956289724e-06,
18248
+ "loss": 20.7549,
18249
+ "step": 26000
18250
+ },
18251
+ {
18252
+ "epoch": 0.48267559273203015,
18253
+ "grad_norm": 36.125,
18254
+ "learning_rate": 9.924581960503681e-06,
18255
+ "loss": 20.5746,
18256
+ "step": 26010
18257
+ },
18258
+ {
18259
+ "epoch": 0.48286116581650995,
18260
+ "grad_norm": 33.40625,
18261
+ "learning_rate": 9.924552964717638e-06,
18262
+ "loss": 20.6575,
18263
+ "step": 26020
18264
+ },
18265
+ {
18266
+ "epoch": 0.4830467389009898,
18267
+ "grad_norm": 34.0625,
18268
+ "learning_rate": 9.924523968931596e-06,
18269
+ "loss": 20.1084,
18270
+ "step": 26030
18271
+ },
18272
+ {
18273
+ "epoch": 0.4832323119854696,
18274
+ "grad_norm": 37.03125,
18275
+ "learning_rate": 9.924494973145553e-06,
18276
+ "loss": 20.3671,
18277
+ "step": 26040
18278
+ },
18279
+ {
18280
+ "epoch": 0.4834178850699495,
18281
+ "grad_norm": 34.125,
18282
+ "learning_rate": 9.92446597735951e-06,
18283
+ "loss": 20.4181,
18284
+ "step": 26050
18285
+ },
18286
+ {
18287
+ "epoch": 0.4836034581544293,
18288
+ "grad_norm": 35.875,
18289
+ "learning_rate": 9.924436981573468e-06,
18290
+ "loss": 20.5477,
18291
+ "step": 26060
18292
+ },
18293
+ {
18294
+ "epoch": 0.4837890312389091,
18295
+ "grad_norm": 34.90625,
18296
+ "learning_rate": 9.924407985787427e-06,
18297
+ "loss": 20.2599,
18298
+ "step": 26070
18299
+ },
18300
+ {
18301
+ "epoch": 0.48397460432338896,
18302
+ "grad_norm": 33.1875,
18303
+ "learning_rate": 9.924378990001385e-06,
18304
+ "loss": 20.6862,
18305
+ "step": 26080
18306
+ },
18307
+ {
18308
+ "epoch": 0.48416017740786876,
18309
+ "grad_norm": 35.1875,
18310
+ "learning_rate": 9.92434999421534e-06,
18311
+ "loss": 20.6373,
18312
+ "step": 26090
18313
+ },
18314
+ {
18315
+ "epoch": 0.48434575049234857,
18316
+ "grad_norm": 34.8125,
18317
+ "learning_rate": 9.9243209984293e-06,
18318
+ "loss": 20.8698,
18319
+ "step": 26100
18320
+ },
18321
+ {
18322
+ "epoch": 0.48453132357682843,
18323
+ "grad_norm": 35.5625,
18324
+ "learning_rate": 9.924292002643257e-06,
18325
+ "loss": 20.5171,
18326
+ "step": 26110
18327
+ },
18328
+ {
18329
+ "epoch": 0.48471689666130824,
18330
+ "grad_norm": 36.0625,
18331
+ "learning_rate": 9.924263006857214e-06,
18332
+ "loss": 20.0415,
18333
+ "step": 26120
18334
+ },
18335
+ {
18336
+ "epoch": 0.48490246974578805,
18337
+ "grad_norm": 34.46875,
18338
+ "learning_rate": 9.924234011071172e-06,
18339
+ "loss": 20.0809,
18340
+ "step": 26130
18341
+ },
18342
+ {
18343
+ "epoch": 0.4850880428302679,
18344
+ "grad_norm": 35.5625,
18345
+ "learning_rate": 9.92420501528513e-06,
18346
+ "loss": 20.8953,
18347
+ "step": 26140
18348
+ },
18349
+ {
18350
+ "epoch": 0.4852736159147477,
18351
+ "grad_norm": 33.59375,
18352
+ "learning_rate": 9.924176019499086e-06,
18353
+ "loss": 20.5997,
18354
+ "step": 26150
18355
+ },
18356
+ {
18357
+ "epoch": 0.4854591889992276,
18358
+ "grad_norm": 33.6875,
18359
+ "learning_rate": 9.924147023713044e-06,
18360
+ "loss": 20.538,
18361
+ "step": 26160
18362
+ },
18363
+ {
18364
+ "epoch": 0.4856447620837074,
18365
+ "grad_norm": 32.59375,
18366
+ "learning_rate": 9.924118027927003e-06,
18367
+ "loss": 20.5563,
18368
+ "step": 26170
18369
+ },
18370
+ {
18371
+ "epoch": 0.4858303351681872,
18372
+ "grad_norm": 35.28125,
18373
+ "learning_rate": 9.92408903214096e-06,
18374
+ "loss": 20.3517,
18375
+ "step": 26180
18376
+ },
18377
+ {
18378
+ "epoch": 0.48601590825266705,
18379
+ "grad_norm": 34.34375,
18380
+ "learning_rate": 9.924060036354918e-06,
18381
+ "loss": 20.4909,
18382
+ "step": 26190
18383
+ },
18384
+ {
18385
+ "epoch": 0.48620148133714686,
18386
+ "grad_norm": 36.0625,
18387
+ "learning_rate": 9.924031040568875e-06,
18388
+ "loss": 20.3212,
18389
+ "step": 26200
18390
+ },
18391
+ {
18392
+ "epoch": 0.48638705442162666,
18393
+ "grad_norm": 33.84375,
18394
+ "learning_rate": 9.924002044782832e-06,
18395
+ "loss": 20.7427,
18396
+ "step": 26210
18397
+ },
18398
+ {
18399
+ "epoch": 0.4865726275061065,
18400
+ "grad_norm": 35.1875,
18401
+ "learning_rate": 9.92397304899679e-06,
18402
+ "loss": 20.5698,
18403
+ "step": 26220
18404
+ },
18405
+ {
18406
+ "epoch": 0.48675820059058633,
18407
+ "grad_norm": 38.0625,
18408
+ "learning_rate": 9.923944053210747e-06,
18409
+ "loss": 20.6616,
18410
+ "step": 26230
18411
+ },
18412
+ {
18413
+ "epoch": 0.4869437736750662,
18414
+ "grad_norm": 33.6875,
18415
+ "learning_rate": 9.923915057424706e-06,
18416
+ "loss": 20.3389,
18417
+ "step": 26240
18418
+ },
18419
+ {
18420
+ "epoch": 0.487129346759546,
18421
+ "grad_norm": 35.59375,
18422
+ "learning_rate": 9.923886061638664e-06,
18423
+ "loss": 20.3957,
18424
+ "step": 26250
18425
+ },
18426
+ {
18427
+ "epoch": 0.4873149198440258,
18428
+ "grad_norm": 34.25,
18429
+ "learning_rate": 9.92385706585262e-06,
18430
+ "loss": 20.4382,
18431
+ "step": 26260
18432
+ },
18433
+ {
18434
+ "epoch": 0.48750049292850567,
18435
+ "grad_norm": 34.84375,
18436
+ "learning_rate": 9.923828070066579e-06,
18437
+ "loss": 20.4939,
18438
+ "step": 26270
18439
+ },
18440
+ {
18441
+ "epoch": 0.4876860660129855,
18442
+ "grad_norm": 34.875,
18443
+ "learning_rate": 9.923799074280536e-06,
18444
+ "loss": 20.4227,
18445
+ "step": 26280
18446
+ },
18447
+ {
18448
+ "epoch": 0.4878716390974653,
18449
+ "grad_norm": 34.875,
18450
+ "learning_rate": 9.923770078494493e-06,
18451
+ "loss": 20.8318,
18452
+ "step": 26290
18453
+ },
18454
+ {
18455
+ "epoch": 0.48805721218194514,
18456
+ "grad_norm": 33.09375,
18457
+ "learning_rate": 9.923741082708451e-06,
18458
+ "loss": 20.5788,
18459
+ "step": 26300
18460
+ },
18461
+ {
18462
+ "epoch": 0.48824278526642495,
18463
+ "grad_norm": 35.0625,
18464
+ "learning_rate": 9.923712086922408e-06,
18465
+ "loss": 20.7702,
18466
+ "step": 26310
18467
+ },
18468
+ {
18469
+ "epoch": 0.4884283583509048,
18470
+ "grad_norm": 35.40625,
18471
+ "learning_rate": 9.923683091136366e-06,
18472
+ "loss": 20.8507,
18473
+ "step": 26320
18474
+ },
18475
+ {
18476
+ "epoch": 0.4886139314353846,
18477
+ "grad_norm": 34.75,
18478
+ "learning_rate": 9.923654095350323e-06,
18479
+ "loss": 20.8756,
18480
+ "step": 26330
18481
+ },
18482
+ {
18483
+ "epoch": 0.4887995045198644,
18484
+ "grad_norm": 34.0625,
18485
+ "learning_rate": 9.923625099564282e-06,
18486
+ "loss": 20.7579,
18487
+ "step": 26340
18488
+ },
18489
+ {
18490
+ "epoch": 0.4889850776043443,
18491
+ "grad_norm": 34.25,
18492
+ "learning_rate": 9.92359610377824e-06,
18493
+ "loss": 20.6045,
18494
+ "step": 26350
18495
+ },
18496
+ {
18497
+ "epoch": 0.4891706506888241,
18498
+ "grad_norm": 35.3125,
18499
+ "learning_rate": 9.923567107992195e-06,
18500
+ "loss": 20.2022,
18501
+ "step": 26360
18502
+ },
18503
+ {
18504
+ "epoch": 0.4893562237733039,
18505
+ "grad_norm": 34.0625,
18506
+ "learning_rate": 9.923538112206154e-06,
18507
+ "loss": 20.2575,
18508
+ "step": 26370
18509
+ },
18510
+ {
18511
+ "epoch": 0.48954179685778376,
18512
+ "grad_norm": 35.0625,
18513
+ "learning_rate": 9.923509116420112e-06,
18514
+ "loss": 20.9015,
18515
+ "step": 26380
18516
+ },
18517
+ {
18518
+ "epoch": 0.48972736994226357,
18519
+ "grad_norm": 33.875,
18520
+ "learning_rate": 9.92348012063407e-06,
18521
+ "loss": 20.532,
18522
+ "step": 26390
18523
+ },
18524
+ {
18525
+ "epoch": 0.4899129430267434,
18526
+ "grad_norm": 33.65625,
18527
+ "learning_rate": 9.923451124848027e-06,
18528
+ "loss": 20.6322,
18529
+ "step": 26400
18530
+ },
18531
+ {
18532
+ "epoch": 0.49009851611122324,
18533
+ "grad_norm": 34.5625,
18534
+ "learning_rate": 9.923422129061984e-06,
18535
+ "loss": 20.2689,
18536
+ "step": 26410
18537
+ },
18538
+ {
18539
+ "epoch": 0.49028408919570304,
18540
+ "grad_norm": 33.9375,
18541
+ "learning_rate": 9.923393133275941e-06,
18542
+ "loss": 20.7293,
18543
+ "step": 26420
18544
+ },
18545
+ {
18546
+ "epoch": 0.4904696622801829,
18547
+ "grad_norm": 34.5,
18548
+ "learning_rate": 9.923364137489899e-06,
18549
+ "loss": 20.89,
18550
+ "step": 26430
18551
+ },
18552
+ {
18553
+ "epoch": 0.4906552353646627,
18554
+ "grad_norm": 33.5625,
18555
+ "learning_rate": 9.923335141703856e-06,
18556
+ "loss": 19.9809,
18557
+ "step": 26440
18558
+ },
18559
+ {
18560
+ "epoch": 0.4908408084491425,
18561
+ "grad_norm": 36.375,
18562
+ "learning_rate": 9.923306145917815e-06,
18563
+ "loss": 20.7523,
18564
+ "step": 26450
18565
+ },
18566
+ {
18567
+ "epoch": 0.4910263815336224,
18568
+ "grad_norm": 36.5,
18569
+ "learning_rate": 9.923277150131773e-06,
18570
+ "loss": 20.9323,
18571
+ "step": 26460
18572
+ },
18573
+ {
18574
+ "epoch": 0.4912119546181022,
18575
+ "grad_norm": 37.65625,
18576
+ "learning_rate": 9.923248154345728e-06,
18577
+ "loss": 20.9648,
18578
+ "step": 26470
18579
+ },
18580
+ {
18581
+ "epoch": 0.491397527702582,
18582
+ "grad_norm": 33.28125,
18583
+ "learning_rate": 9.923219158559688e-06,
18584
+ "loss": 20.694,
18585
+ "step": 26480
18586
+ },
18587
+ {
18588
+ "epoch": 0.49158310078706186,
18589
+ "grad_norm": 34.75,
18590
+ "learning_rate": 9.923190162773645e-06,
18591
+ "loss": 20.6665,
18592
+ "step": 26490
18593
+ },
18594
+ {
18595
+ "epoch": 0.49176867387154166,
18596
+ "grad_norm": 34.25,
18597
+ "learning_rate": 9.923161166987602e-06,
18598
+ "loss": 20.4606,
18599
+ "step": 26500
18600
+ },
18601
+ {
18602
+ "epoch": 0.4919542469560215,
18603
+ "grad_norm": 35.375,
18604
+ "learning_rate": 9.92313217120156e-06,
18605
+ "loss": 20.5022,
18606
+ "step": 26510
18607
+ },
18608
+ {
18609
+ "epoch": 0.49213982004050133,
18610
+ "grad_norm": 34.9375,
18611
+ "learning_rate": 9.923103175415517e-06,
18612
+ "loss": 20.5076,
18613
+ "step": 26520
18614
+ },
18615
+ {
18616
+ "epoch": 0.49232539312498114,
18617
+ "grad_norm": 33.3125,
18618
+ "learning_rate": 9.923074179629475e-06,
18619
+ "loss": 20.7874,
18620
+ "step": 26530
18621
+ },
18622
+ {
18623
+ "epoch": 0.492510966209461,
18624
+ "grad_norm": 35.4375,
18625
+ "learning_rate": 9.923045183843432e-06,
18626
+ "loss": 20.9914,
18627
+ "step": 26540
18628
+ },
18629
+ {
18630
+ "epoch": 0.4926965392939408,
18631
+ "grad_norm": 35.25,
18632
+ "learning_rate": 9.923016188057391e-06,
18633
+ "loss": 19.7607,
18634
+ "step": 26550
18635
+ },
18636
+ {
18637
+ "epoch": 0.4928821123784206,
18638
+ "grad_norm": 34.9375,
18639
+ "learning_rate": 9.922987192271349e-06,
18640
+ "loss": 21.0187,
18641
+ "step": 26560
18642
+ },
18643
+ {
18644
+ "epoch": 0.4930676854629005,
18645
+ "grad_norm": 35.96875,
18646
+ "learning_rate": 9.922958196485306e-06,
18647
+ "loss": 20.2126,
18648
+ "step": 26570
18649
+ },
18650
+ {
18651
+ "epoch": 0.4932532585473803,
18652
+ "grad_norm": 35.0,
18653
+ "learning_rate": 9.922929200699263e-06,
18654
+ "loss": 20.2824,
18655
+ "step": 26580
18656
+ },
18657
+ {
18658
+ "epoch": 0.49343883163186014,
18659
+ "grad_norm": 37.78125,
18660
+ "learning_rate": 9.92290020491322e-06,
18661
+ "loss": 21.1446,
18662
+ "step": 26590
18663
+ },
18664
+ {
18665
+ "epoch": 0.49362440471633995,
18666
+ "grad_norm": 35.9375,
18667
+ "learning_rate": 9.922871209127178e-06,
18668
+ "loss": 20.6024,
18669
+ "step": 26600
18670
+ },
18671
+ {
18672
+ "epoch": 0.49380997780081975,
18673
+ "grad_norm": 34.75,
18674
+ "learning_rate": 9.922842213341136e-06,
18675
+ "loss": 20.4418,
18676
+ "step": 26610
18677
+ },
18678
+ {
18679
+ "epoch": 0.4939955508852996,
18680
+ "grad_norm": 33.78125,
18681
+ "learning_rate": 9.922813217555095e-06,
18682
+ "loss": 20.5188,
18683
+ "step": 26620
18684
+ },
18685
+ {
18686
+ "epoch": 0.4941811239697794,
18687
+ "grad_norm": 34.21875,
18688
+ "learning_rate": 9.92278422176905e-06,
18689
+ "loss": 20.2447,
18690
+ "step": 26630
18691
+ },
18692
+ {
18693
+ "epoch": 0.49436669705425923,
18694
+ "grad_norm": 35.59375,
18695
+ "learning_rate": 9.922755225983008e-06,
18696
+ "loss": 20.735,
18697
+ "step": 26640
18698
+ },
18699
+ {
18700
+ "epoch": 0.4945522701387391,
18701
+ "grad_norm": 37.03125,
18702
+ "learning_rate": 9.922726230196967e-06,
18703
+ "loss": 21.0627,
18704
+ "step": 26650
18705
+ },
18706
+ {
18707
+ "epoch": 0.4947378432232189,
18708
+ "grad_norm": 35.0625,
18709
+ "learning_rate": 9.922697234410924e-06,
18710
+ "loss": 20.1211,
18711
+ "step": 26660
18712
+ },
18713
+ {
18714
+ "epoch": 0.4949234163076987,
18715
+ "grad_norm": 34.90625,
18716
+ "learning_rate": 9.922668238624882e-06,
18717
+ "loss": 20.5947,
18718
+ "step": 26670
18719
+ },
18720
+ {
18721
+ "epoch": 0.49510898939217857,
18722
+ "grad_norm": 34.3125,
18723
+ "learning_rate": 9.922639242838839e-06,
18724
+ "loss": 20.4556,
18725
+ "step": 26680
18726
+ },
18727
+ {
18728
+ "epoch": 0.4952945624766584,
18729
+ "grad_norm": 32.75,
18730
+ "learning_rate": 9.922610247052797e-06,
18731
+ "loss": 20.0388,
18732
+ "step": 26690
18733
+ },
18734
+ {
18735
+ "epoch": 0.49548013556113824,
18736
+ "grad_norm": 33.4375,
18737
+ "learning_rate": 9.922581251266754e-06,
18738
+ "loss": 20.3324,
18739
+ "step": 26700
18740
+ },
18741
+ {
18742
+ "epoch": 0.49566570864561804,
18743
+ "grad_norm": 35.09375,
18744
+ "learning_rate": 9.922552255480711e-06,
18745
+ "loss": 20.444,
18746
+ "step": 26710
18747
+ },
18748
+ {
18749
+ "epoch": 0.49585128173009785,
18750
+ "grad_norm": 35.15625,
18751
+ "learning_rate": 9.92252325969467e-06,
18752
+ "loss": 20.5939,
18753
+ "step": 26720
18754
+ },
18755
+ {
18756
+ "epoch": 0.4960368548145777,
18757
+ "grad_norm": 35.84375,
18758
+ "learning_rate": 9.922494263908628e-06,
18759
+ "loss": 20.3899,
18760
+ "step": 26730
18761
+ },
18762
+ {
18763
+ "epoch": 0.4962224278990575,
18764
+ "grad_norm": 34.8125,
18765
+ "learning_rate": 9.922465268122584e-06,
18766
+ "loss": 20.5074,
18767
+ "step": 26740
18768
+ },
18769
+ {
18770
+ "epoch": 0.4964080009835373,
18771
+ "grad_norm": 37.03125,
18772
+ "learning_rate": 9.922436272336543e-06,
18773
+ "loss": 20.4035,
18774
+ "step": 26750
18775
+ },
18776
+ {
18777
+ "epoch": 0.4965935740680172,
18778
+ "grad_norm": 34.5,
18779
+ "learning_rate": 9.9224072765505e-06,
18780
+ "loss": 21.0219,
18781
+ "step": 26760
18782
+ },
18783
+ {
18784
+ "epoch": 0.496779147152497,
18785
+ "grad_norm": 33.75,
18786
+ "learning_rate": 9.922378280764457e-06,
18787
+ "loss": 20.5443,
18788
+ "step": 26770
18789
+ },
18790
+ {
18791
+ "epoch": 0.49696472023697685,
18792
+ "grad_norm": 35.09375,
18793
+ "learning_rate": 9.922349284978415e-06,
18794
+ "loss": 20.6201,
18795
+ "step": 26780
18796
+ },
18797
+ {
18798
+ "epoch": 0.49715029332145666,
18799
+ "grad_norm": 33.40625,
18800
+ "learning_rate": 9.922320289192372e-06,
18801
+ "loss": 20.0897,
18802
+ "step": 26790
18803
+ },
18804
+ {
18805
+ "epoch": 0.49733586640593647,
18806
+ "grad_norm": 33.90625,
18807
+ "learning_rate": 9.92229129340633e-06,
18808
+ "loss": 20.9902,
18809
+ "step": 26800
18810
+ },
18811
+ {
18812
+ "epoch": 0.49752143949041633,
18813
+ "grad_norm": 34.3125,
18814
+ "learning_rate": 9.922262297620287e-06,
18815
+ "loss": 21.3532,
18816
+ "step": 26810
18817
+ },
18818
+ {
18819
+ "epoch": 0.49770701257489613,
18820
+ "grad_norm": 34.375,
18821
+ "learning_rate": 9.922233301834246e-06,
18822
+ "loss": 20.4501,
18823
+ "step": 26820
18824
+ },
18825
+ {
18826
+ "epoch": 0.49789258565937594,
18827
+ "grad_norm": 34.8125,
18828
+ "learning_rate": 9.922204306048204e-06,
18829
+ "loss": 20.4959,
18830
+ "step": 26830
18831
+ },
18832
+ {
18833
+ "epoch": 0.4980781587438558,
18834
+ "grad_norm": 35.75,
18835
+ "learning_rate": 9.922175310262161e-06,
18836
+ "loss": 20.5544,
18837
+ "step": 26840
18838
+ },
18839
+ {
18840
+ "epoch": 0.4982637318283356,
18841
+ "grad_norm": 33.0,
18842
+ "learning_rate": 9.922146314476118e-06,
18843
+ "loss": 20.6951,
18844
+ "step": 26850
18845
+ },
18846
+ {
18847
+ "epoch": 0.49844930491281547,
18848
+ "grad_norm": 34.625,
18849
+ "learning_rate": 9.922117318690076e-06,
18850
+ "loss": 20.583,
18851
+ "step": 26860
18852
+ },
18853
+ {
18854
+ "epoch": 0.4986348779972953,
18855
+ "grad_norm": 36.875,
18856
+ "learning_rate": 9.922088322904033e-06,
18857
+ "loss": 20.3859,
18858
+ "step": 26870
18859
+ },
18860
+ {
18861
+ "epoch": 0.4988204510817751,
18862
+ "grad_norm": 34.0,
18863
+ "learning_rate": 9.92205932711799e-06,
18864
+ "loss": 20.0682,
18865
+ "step": 26880
18866
+ },
18867
+ {
18868
+ "epoch": 0.49900602416625495,
18869
+ "grad_norm": 35.375,
18870
+ "learning_rate": 9.922030331331948e-06,
18871
+ "loss": 20.5781,
18872
+ "step": 26890
18873
+ },
18874
+ {
18875
+ "epoch": 0.49919159725073475,
18876
+ "grad_norm": 34.75,
18877
+ "learning_rate": 9.922001335545905e-06,
18878
+ "loss": 20.8437,
18879
+ "step": 26900
18880
+ },
18881
+ {
18882
+ "epoch": 0.49937717033521456,
18883
+ "grad_norm": 34.4375,
18884
+ "learning_rate": 9.921972339759863e-06,
18885
+ "loss": 20.4446,
18886
+ "step": 26910
18887
+ },
18888
+ {
18889
+ "epoch": 0.4995627434196944,
18890
+ "grad_norm": 34.71875,
18891
+ "learning_rate": 9.92194334397382e-06,
18892
+ "loss": 20.7822,
18893
+ "step": 26920
18894
+ },
18895
+ {
18896
+ "epoch": 0.49974831650417423,
18897
+ "grad_norm": 34.46875,
18898
+ "learning_rate": 9.92191434818778e-06,
18899
+ "loss": 20.6735,
18900
+ "step": 26930
18901
+ },
18902
+ {
18903
+ "epoch": 0.49993388958865403,
18904
+ "grad_norm": 34.21875,
18905
+ "learning_rate": 9.921885352401737e-06,
18906
+ "loss": 21.0007,
18907
+ "step": 26940
18908
+ },
18909
+ {
18910
+ "epoch": 0.5001194626731339,
18911
+ "grad_norm": 33.46875,
18912
+ "learning_rate": 9.921856356615694e-06,
18913
+ "loss": 20.2895,
18914
+ "step": 26950
18915
+ },
18916
+ {
18917
+ "epoch": 0.5003050357576138,
18918
+ "grad_norm": 36.625,
18919
+ "learning_rate": 9.921827360829652e-06,
18920
+ "loss": 20.3101,
18921
+ "step": 26960
18922
+ },
18923
+ {
18924
+ "epoch": 0.5004906088420935,
18925
+ "grad_norm": 36.53125,
18926
+ "learning_rate": 9.921798365043609e-06,
18927
+ "loss": 20.482,
18928
+ "step": 26970
18929
+ },
18930
+ {
18931
+ "epoch": 0.5006761819265734,
18932
+ "grad_norm": 33.84375,
18933
+ "learning_rate": 9.921769369257566e-06,
18934
+ "loss": 20.8419,
18935
+ "step": 26980
18936
+ },
18937
+ {
18938
+ "epoch": 0.5008617550110532,
18939
+ "grad_norm": 36.25,
18940
+ "learning_rate": 9.921740373471524e-06,
18941
+ "loss": 20.4644,
18942
+ "step": 26990
18943
+ },
18944
+ {
18945
+ "epoch": 0.501047328095533,
18946
+ "grad_norm": 34.625,
18947
+ "learning_rate": 9.921711377685483e-06,
18948
+ "loss": 20.5031,
18949
+ "step": 27000
18950
+ },
18951
+ {
18952
+ "epoch": 0.5012329011800128,
18953
+ "grad_norm": 34.65625,
18954
+ "learning_rate": 9.921682381899439e-06,
18955
+ "loss": 20.6603,
18956
+ "step": 27010
18957
+ },
18958
+ {
18959
+ "epoch": 0.5014184742644927,
18960
+ "grad_norm": 33.78125,
18961
+ "learning_rate": 9.921653386113396e-06,
18962
+ "loss": 20.7035,
18963
+ "step": 27020
18964
+ },
18965
+ {
18966
+ "epoch": 0.5016040473489725,
18967
+ "grad_norm": 34.65625,
18968
+ "learning_rate": 9.921624390327355e-06,
18969
+ "loss": 21.0303,
18970
+ "step": 27030
18971
+ },
18972
+ {
18973
+ "epoch": 0.5017896204334523,
18974
+ "grad_norm": 33.5,
18975
+ "learning_rate": 9.921595394541313e-06,
18976
+ "loss": 20.2431,
18977
+ "step": 27040
18978
+ },
18979
+ {
18980
+ "epoch": 0.5019751935179322,
18981
+ "grad_norm": 36.8125,
18982
+ "learning_rate": 9.92156639875527e-06,
18983
+ "loss": 20.9503,
18984
+ "step": 27050
18985
+ },
18986
+ {
18987
+ "epoch": 0.5021607666024119,
18988
+ "grad_norm": 36.25,
18989
+ "learning_rate": 9.921537402969227e-06,
18990
+ "loss": 20.5597,
18991
+ "step": 27060
18992
+ },
18993
+ {
18994
+ "epoch": 0.5023463396868918,
18995
+ "grad_norm": 35.53125,
18996
+ "learning_rate": 9.921508407183185e-06,
18997
+ "loss": 20.2874,
18998
+ "step": 27070
18999
+ },
19000
+ {
19001
+ "epoch": 0.5025319127713717,
19002
+ "grad_norm": 34.5625,
19003
+ "learning_rate": 9.921479411397142e-06,
19004
+ "loss": 20.2342,
19005
+ "step": 27080
19006
+ },
19007
+ {
19008
+ "epoch": 0.5027174858558515,
19009
+ "grad_norm": 35.625,
19010
+ "learning_rate": 9.9214504156111e-06,
19011
+ "loss": 20.2432,
19012
+ "step": 27090
19013
+ },
19014
+ {
19015
+ "epoch": 0.5029030589403313,
19016
+ "grad_norm": 36.71875,
19017
+ "learning_rate": 9.921421419825059e-06,
19018
+ "loss": 20.8367,
19019
+ "step": 27100
19020
+ },
19021
+ {
19022
+ "epoch": 0.5030886320248111,
19023
+ "grad_norm": 35.75,
19024
+ "learning_rate": 9.921392424039014e-06,
19025
+ "loss": 20.4609,
19026
+ "step": 27110
19027
+ },
19028
+ {
19029
+ "epoch": 0.503274205109291,
19030
+ "grad_norm": 35.5,
19031
+ "learning_rate": 9.921363428252972e-06,
19032
+ "loss": 20.4391,
19033
+ "step": 27120
19034
+ },
19035
+ {
19036
+ "epoch": 0.5034597781937707,
19037
+ "grad_norm": 34.0625,
19038
+ "learning_rate": 9.921334432466931e-06,
19039
+ "loss": 20.4591,
19040
+ "step": 27130
19041
+ },
19042
+ {
19043
+ "epoch": 0.5036453512782506,
19044
+ "grad_norm": 35.03125,
19045
+ "learning_rate": 9.921305436680888e-06,
19046
+ "loss": 20.8571,
19047
+ "step": 27140
19048
+ },
19049
+ {
19050
+ "epoch": 0.5038309243627305,
19051
+ "grad_norm": 38.15625,
19052
+ "learning_rate": 9.921276440894846e-06,
19053
+ "loss": 20.877,
19054
+ "step": 27150
19055
+ },
19056
+ {
19057
+ "epoch": 0.5040164974472102,
19058
+ "grad_norm": 35.125,
19059
+ "learning_rate": 9.921247445108803e-06,
19060
+ "loss": 20.7956,
19061
+ "step": 27160
19062
+ },
19063
+ {
19064
+ "epoch": 0.5042020705316901,
19065
+ "grad_norm": 35.46875,
19066
+ "learning_rate": 9.92121844932276e-06,
19067
+ "loss": 20.4186,
19068
+ "step": 27170
19069
+ },
19070
+ {
19071
+ "epoch": 0.50438764361617,
19072
+ "grad_norm": 35.25,
19073
+ "learning_rate": 9.921189453536718e-06,
19074
+ "loss": 20.5766,
19075
+ "step": 27180
19076
+ },
19077
+ {
19078
+ "epoch": 0.5045732167006497,
19079
+ "grad_norm": 35.40625,
19080
+ "learning_rate": 9.921160457750675e-06,
19081
+ "loss": 20.2852,
19082
+ "step": 27190
19083
+ },
19084
+ {
19085
+ "epoch": 0.5047587897851296,
19086
+ "grad_norm": 36.28125,
19087
+ "learning_rate": 9.921131461964634e-06,
19088
+ "loss": 20.5424,
19089
+ "step": 27200
19090
+ },
19091
+ {
19092
+ "epoch": 0.5049443628696094,
19093
+ "grad_norm": 35.4375,
19094
+ "learning_rate": 9.921102466178592e-06,
19095
+ "loss": 20.4194,
19096
+ "step": 27210
19097
+ },
19098
+ {
19099
+ "epoch": 0.5051299359540892,
19100
+ "grad_norm": 33.0625,
19101
+ "learning_rate": 9.921073470392548e-06,
19102
+ "loss": 20.407,
19103
+ "step": 27220
19104
+ },
19105
+ {
19106
+ "epoch": 0.505315509038569,
19107
+ "grad_norm": 35.65625,
19108
+ "learning_rate": 9.921044474606507e-06,
19109
+ "loss": 20.6512,
19110
+ "step": 27230
19111
+ },
19112
+ {
19113
+ "epoch": 0.5055010821230489,
19114
+ "grad_norm": 34.09375,
19115
+ "learning_rate": 9.921015478820464e-06,
19116
+ "loss": 20.7855,
19117
+ "step": 27240
19118
+ },
19119
+ {
19120
+ "epoch": 0.5056866552075286,
19121
+ "grad_norm": 36.6875,
19122
+ "learning_rate": 9.920986483034421e-06,
19123
+ "loss": 20.5273,
19124
+ "step": 27250
19125
+ },
19126
+ {
19127
+ "epoch": 0.5058722282920085,
19128
+ "grad_norm": 33.96875,
19129
+ "learning_rate": 9.920957487248379e-06,
19130
+ "loss": 20.1875,
19131
+ "step": 27260
19132
+ },
19133
+ {
19134
+ "epoch": 0.5060578013764884,
19135
+ "grad_norm": 32.96875,
19136
+ "learning_rate": 9.920928491462336e-06,
19137
+ "loss": 20.7281,
19138
+ "step": 27270
19139
+ },
19140
+ {
19141
+ "epoch": 0.5062433744609682,
19142
+ "grad_norm": 34.5,
19143
+ "learning_rate": 9.920899495676294e-06,
19144
+ "loss": 20.489,
19145
+ "step": 27280
19146
+ },
19147
+ {
19148
+ "epoch": 0.506428947545448,
19149
+ "grad_norm": 32.625,
19150
+ "learning_rate": 9.920870499890251e-06,
19151
+ "loss": 20.938,
19152
+ "step": 27290
19153
+ },
19154
+ {
19155
+ "epoch": 0.5066145206299278,
19156
+ "grad_norm": 34.46875,
19157
+ "learning_rate": 9.92084150410421e-06,
19158
+ "loss": 20.6521,
19159
+ "step": 27300
19160
+ },
19161
+ {
19162
+ "epoch": 0.5068000937144077,
19163
+ "grad_norm": 34.1875,
19164
+ "learning_rate": 9.920812508318168e-06,
19165
+ "loss": 20.7266,
19166
+ "step": 27310
19167
+ },
19168
+ {
19169
+ "epoch": 0.5069856667988875,
19170
+ "grad_norm": 35.125,
19171
+ "learning_rate": 9.920783512532125e-06,
19172
+ "loss": 20.4679,
19173
+ "step": 27320
19174
+ },
19175
+ {
19176
+ "epoch": 0.5071712398833673,
19177
+ "grad_norm": 34.25,
19178
+ "learning_rate": 9.920754516746082e-06,
19179
+ "loss": 20.5604,
19180
+ "step": 27330
19181
+ },
19182
+ {
19183
+ "epoch": 0.5073568129678472,
19184
+ "grad_norm": 35.90625,
19185
+ "learning_rate": 9.92072552096004e-06,
19186
+ "loss": 20.9174,
19187
+ "step": 27340
19188
+ },
19189
+ {
19190
+ "epoch": 0.5075423860523269,
19191
+ "grad_norm": 34.90625,
19192
+ "learning_rate": 9.920696525173997e-06,
19193
+ "loss": 20.5337,
19194
+ "step": 27350
19195
+ },
19196
+ {
19197
+ "epoch": 0.5077279591368068,
19198
+ "grad_norm": 35.375,
19199
+ "learning_rate": 9.920667529387955e-06,
19200
+ "loss": 20.4249,
19201
+ "step": 27360
19202
+ },
19203
+ {
19204
+ "epoch": 0.5079135322212867,
19205
+ "grad_norm": 34.125,
19206
+ "learning_rate": 9.920638533601912e-06,
19207
+ "loss": 19.9978,
19208
+ "step": 27370
19209
+ },
19210
+ {
19211
+ "epoch": 0.5080991053057664,
19212
+ "grad_norm": 32.4375,
19213
+ "learning_rate": 9.92060953781587e-06,
19214
+ "loss": 20.4063,
19215
+ "step": 27380
19216
+ },
19217
+ {
19218
+ "epoch": 0.5082846783902463,
19219
+ "grad_norm": 36.59375,
19220
+ "learning_rate": 9.920580542029827e-06,
19221
+ "loss": 20.2915,
19222
+ "step": 27390
19223
+ },
19224
+ {
19225
+ "epoch": 0.5084702514747261,
19226
+ "grad_norm": 34.90625,
19227
+ "learning_rate": 9.920551546243786e-06,
19228
+ "loss": 20.5562,
19229
+ "step": 27400
19230
+ },
19231
+ {
19232
+ "epoch": 0.5086558245592059,
19233
+ "grad_norm": 36.875,
19234
+ "learning_rate": 9.920522550457743e-06,
19235
+ "loss": 20.6528,
19236
+ "step": 27410
19237
+ },
19238
+ {
19239
+ "epoch": 0.5088413976436857,
19240
+ "grad_norm": 34.09375,
19241
+ "learning_rate": 9.9204935546717e-06,
19242
+ "loss": 20.4691,
19243
+ "step": 27420
19244
+ },
19245
+ {
19246
+ "epoch": 0.5090269707281656,
19247
+ "grad_norm": 35.09375,
19248
+ "learning_rate": 9.920464558885658e-06,
19249
+ "loss": 20.3996,
19250
+ "step": 27430
19251
+ },
19252
+ {
19253
+ "epoch": 0.5092125438126455,
19254
+ "grad_norm": 34.8125,
19255
+ "learning_rate": 9.920435563099616e-06,
19256
+ "loss": 20.4117,
19257
+ "step": 27440
19258
+ },
19259
+ {
19260
+ "epoch": 0.5093981168971252,
19261
+ "grad_norm": 36.125,
19262
+ "learning_rate": 9.920406567313573e-06,
19263
+ "loss": 20.6251,
19264
+ "step": 27450
19265
+ },
19266
+ {
19267
+ "epoch": 0.5095836899816051,
19268
+ "grad_norm": 35.84375,
19269
+ "learning_rate": 9.92037757152753e-06,
19270
+ "loss": 20.3157,
19271
+ "step": 27460
19272
+ },
19273
+ {
19274
+ "epoch": 0.5097692630660849,
19275
+ "grad_norm": 33.46875,
19276
+ "learning_rate": 9.920348575741488e-06,
19277
+ "loss": 20.4708,
19278
+ "step": 27470
19279
+ },
19280
+ {
19281
+ "epoch": 0.5099548361505647,
19282
+ "grad_norm": 34.375,
19283
+ "learning_rate": 9.920319579955447e-06,
19284
+ "loss": 20.7527,
19285
+ "step": 27480
19286
+ },
19287
+ {
19288
+ "epoch": 0.5101404092350446,
19289
+ "grad_norm": 34.25,
19290
+ "learning_rate": 9.920290584169403e-06,
19291
+ "loss": 20.7704,
19292
+ "step": 27490
19293
+ },
19294
+ {
19295
+ "epoch": 0.5103259823195244,
19296
+ "grad_norm": 35.90625,
19297
+ "learning_rate": 9.92026158838336e-06,
19298
+ "loss": 20.2382,
19299
+ "step": 27500
19300
+ },
19301
+ {
19302
+ "epoch": 0.5105115554040042,
19303
+ "grad_norm": 36.3125,
19304
+ "learning_rate": 9.920232592597319e-06,
19305
+ "loss": 20.698,
19306
+ "step": 27510
19307
+ },
19308
+ {
19309
+ "epoch": 0.510697128488484,
19310
+ "grad_norm": 33.9375,
19311
+ "learning_rate": 9.920203596811277e-06,
19312
+ "loss": 20.4556,
19313
+ "step": 27520
19314
+ },
19315
+ {
19316
+ "epoch": 0.5108827015729639,
19317
+ "grad_norm": 34.5625,
19318
+ "learning_rate": 9.920174601025234e-06,
19319
+ "loss": 20.5015,
19320
+ "step": 27530
19321
+ },
19322
+ {
19323
+ "epoch": 0.5110682746574436,
19324
+ "grad_norm": 36.5,
19325
+ "learning_rate": 9.920145605239191e-06,
19326
+ "loss": 20.179,
19327
+ "step": 27540
19328
+ },
19329
+ {
19330
+ "epoch": 0.5112538477419235,
19331
+ "grad_norm": 33.75,
19332
+ "learning_rate": 9.920116609453149e-06,
19333
+ "loss": 20.2646,
19334
+ "step": 27550
19335
+ },
19336
+ {
19337
+ "epoch": 0.5114394208264034,
19338
+ "grad_norm": 34.75,
19339
+ "learning_rate": 9.920087613667106e-06,
19340
+ "loss": 20.6251,
19341
+ "step": 27560
19342
+ },
19343
+ {
19344
+ "epoch": 0.5116249939108831,
19345
+ "grad_norm": 34.34375,
19346
+ "learning_rate": 9.920058617881064e-06,
19347
+ "loss": 19.9915,
19348
+ "step": 27570
19349
+ },
19350
+ {
19351
+ "epoch": 0.511810566995363,
19352
+ "grad_norm": 34.78125,
19353
+ "learning_rate": 9.920029622095023e-06,
19354
+ "loss": 20.6833,
19355
+ "step": 27580
19356
+ },
19357
+ {
19358
+ "epoch": 0.5119961400798428,
19359
+ "grad_norm": 32.09375,
19360
+ "learning_rate": 9.92000062630898e-06,
19361
+ "loss": 20.4652,
19362
+ "step": 27590
19363
+ },
19364
+ {
19365
+ "epoch": 0.5121817131643226,
19366
+ "grad_norm": 36.53125,
19367
+ "learning_rate": 9.919971630522936e-06,
19368
+ "loss": 20.854,
19369
+ "step": 27600
19370
+ },
19371
+ {
19372
+ "epoch": 0.5123672862488025,
19373
+ "grad_norm": 34.0,
19374
+ "learning_rate": 9.919942634736895e-06,
19375
+ "loss": 20.5599,
19376
+ "step": 27610
19377
+ },
19378
+ {
19379
+ "epoch": 0.5125528593332823,
19380
+ "grad_norm": 33.34375,
19381
+ "learning_rate": 9.919913638950852e-06,
19382
+ "loss": 20.8087,
19383
+ "step": 27620
19384
+ },
19385
+ {
19386
+ "epoch": 0.5127384324177622,
19387
+ "grad_norm": 34.59375,
19388
+ "learning_rate": 9.91988464316481e-06,
19389
+ "loss": 20.2152,
19390
+ "step": 27630
19391
+ },
19392
+ {
19393
+ "epoch": 0.5129240055022419,
19394
+ "grad_norm": 35.34375,
19395
+ "learning_rate": 9.919855647378767e-06,
19396
+ "loss": 20.4706,
19397
+ "step": 27640
19398
+ },
19399
+ {
19400
+ "epoch": 0.5131095785867218,
19401
+ "grad_norm": 33.71875,
19402
+ "learning_rate": 9.919826651592725e-06,
19403
+ "loss": 20.428,
19404
+ "step": 27650
19405
+ },
19406
+ {
19407
+ "epoch": 0.5132951516712017,
19408
+ "grad_norm": 34.25,
19409
+ "learning_rate": 9.919797655806682e-06,
19410
+ "loss": 20.24,
19411
+ "step": 27660
19412
+ },
19413
+ {
19414
+ "epoch": 0.5134807247556814,
19415
+ "grad_norm": 35.21875,
19416
+ "learning_rate": 9.91976866002064e-06,
19417
+ "loss": 20.7069,
19418
+ "step": 27670
19419
+ },
19420
+ {
19421
+ "epoch": 0.5136662978401613,
19422
+ "grad_norm": 36.5,
19423
+ "learning_rate": 9.919739664234598e-06,
19424
+ "loss": 20.4921,
19425
+ "step": 27680
19426
+ },
19427
+ {
19428
+ "epoch": 0.5138518709246411,
19429
+ "grad_norm": 34.21875,
19430
+ "learning_rate": 9.919710668448556e-06,
19431
+ "loss": 20.3458,
19432
+ "step": 27690
19433
+ },
19434
+ {
19435
+ "epoch": 0.5140374440091209,
19436
+ "grad_norm": 34.0,
19437
+ "learning_rate": 9.919681672662512e-06,
19438
+ "loss": 20.4302,
19439
+ "step": 27700
19440
+ },
19441
+ {
19442
+ "epoch": 0.5142230170936007,
19443
+ "grad_norm": 33.96875,
19444
+ "learning_rate": 9.91965267687647e-06,
19445
+ "loss": 20.6798,
19446
+ "step": 27710
19447
+ },
19448
+ {
19449
+ "epoch": 0.5144085901780806,
19450
+ "grad_norm": 36.96875,
19451
+ "learning_rate": 9.919623681090428e-06,
19452
+ "loss": 20.571,
19453
+ "step": 27720
19454
+ },
19455
+ {
19456
+ "epoch": 0.5145941632625604,
19457
+ "grad_norm": 34.59375,
19458
+ "learning_rate": 9.919594685304385e-06,
19459
+ "loss": 19.6917,
19460
+ "step": 27730
19461
+ },
19462
+ {
19463
+ "epoch": 0.5147797363470402,
19464
+ "grad_norm": 34.40625,
19465
+ "learning_rate": 9.919565689518343e-06,
19466
+ "loss": 20.6663,
19467
+ "step": 27740
19468
+ },
19469
+ {
19470
+ "epoch": 0.5149653094315201,
19471
+ "grad_norm": 35.71875,
19472
+ "learning_rate": 9.919536693732302e-06,
19473
+ "loss": 20.3554,
19474
+ "step": 27750
19475
+ },
19476
+ {
19477
+ "epoch": 0.5151508825159998,
19478
+ "grad_norm": 34.6875,
19479
+ "learning_rate": 9.919507697946258e-06,
19480
+ "loss": 20.623,
19481
+ "step": 27760
19482
+ },
19483
+ {
19484
+ "epoch": 0.5153364556004797,
19485
+ "grad_norm": 34.0625,
19486
+ "learning_rate": 9.919478702160215e-06,
19487
+ "loss": 20.3847,
19488
+ "step": 27770
19489
+ },
19490
+ {
19491
+ "epoch": 0.5155220286849596,
19492
+ "grad_norm": 38.625,
19493
+ "learning_rate": 9.919449706374174e-06,
19494
+ "loss": 20.4449,
19495
+ "step": 27780
19496
+ },
19497
+ {
19498
+ "epoch": 0.5157076017694393,
19499
+ "grad_norm": 34.03125,
19500
+ "learning_rate": 9.919420710588132e-06,
19501
+ "loss": 20.3852,
19502
+ "step": 27790
19503
+ },
19504
+ {
19505
+ "epoch": 0.5158931748539192,
19506
+ "grad_norm": 36.0625,
19507
+ "learning_rate": 9.919391714802089e-06,
19508
+ "loss": 20.4759,
19509
+ "step": 27800
19510
+ },
19511
+ {
19512
+ "epoch": 0.516078747938399,
19513
+ "grad_norm": 34.4375,
19514
+ "learning_rate": 9.919362719016046e-06,
19515
+ "loss": 20.3306,
19516
+ "step": 27810
19517
+ },
19518
+ {
19519
+ "epoch": 0.5162643210228789,
19520
+ "grad_norm": 32.90625,
19521
+ "learning_rate": 9.919333723230004e-06,
19522
+ "loss": 20.121,
19523
+ "step": 27820
19524
+ },
19525
+ {
19526
+ "epoch": 0.5164498941073586,
19527
+ "grad_norm": 35.15625,
19528
+ "learning_rate": 9.919304727443961e-06,
19529
+ "loss": 20.4499,
19530
+ "step": 27830
19531
+ },
19532
+ {
19533
+ "epoch": 0.5166354671918385,
19534
+ "grad_norm": 37.40625,
19535
+ "learning_rate": 9.919275731657919e-06,
19536
+ "loss": 20.4448,
19537
+ "step": 27840
19538
+ },
19539
+ {
19540
+ "epoch": 0.5168210402763184,
19541
+ "grad_norm": 34.96875,
19542
+ "learning_rate": 9.919246735871878e-06,
19543
+ "loss": 20.3247,
19544
+ "step": 27850
19545
+ },
19546
+ {
19547
+ "epoch": 0.5170066133607981,
19548
+ "grad_norm": 36.6875,
19549
+ "learning_rate": 9.919217740085833e-06,
19550
+ "loss": 20.2849,
19551
+ "step": 27860
19552
+ },
19553
+ {
19554
+ "epoch": 0.517192186445278,
19555
+ "grad_norm": 34.21875,
19556
+ "learning_rate": 9.919188744299791e-06,
19557
+ "loss": 20.6523,
19558
+ "step": 27870
19559
+ },
19560
+ {
19561
+ "epoch": 0.5173777595297578,
19562
+ "grad_norm": 34.59375,
19563
+ "learning_rate": 9.91915974851375e-06,
19564
+ "loss": 20.3804,
19565
+ "step": 27880
19566
+ },
19567
+ {
19568
+ "epoch": 0.5175633326142376,
19569
+ "grad_norm": 35.15625,
19570
+ "learning_rate": 9.919130752727707e-06,
19571
+ "loss": 20.2975,
19572
+ "step": 27890
19573
+ },
19574
+ {
19575
+ "epoch": 0.5177489056987175,
19576
+ "grad_norm": 35.84375,
19577
+ "learning_rate": 9.919101756941665e-06,
19578
+ "loss": 20.4741,
19579
+ "step": 27900
19580
+ },
19581
+ {
19582
+ "epoch": 0.5179344787831973,
19583
+ "grad_norm": 35.3125,
19584
+ "learning_rate": 9.919072761155622e-06,
19585
+ "loss": 20.7598,
19586
+ "step": 27910
19587
+ },
19588
+ {
19589
+ "epoch": 0.5181200518676771,
19590
+ "grad_norm": 34.9375,
19591
+ "learning_rate": 9.91904376536958e-06,
19592
+ "loss": 19.9323,
19593
+ "step": 27920
19594
+ },
19595
+ {
19596
+ "epoch": 0.5183056249521569,
19597
+ "grad_norm": 35.09375,
19598
+ "learning_rate": 9.919014769583537e-06,
19599
+ "loss": 20.2018,
19600
+ "step": 27930
19601
+ },
19602
+ {
19603
+ "epoch": 0.5184911980366368,
19604
+ "grad_norm": 38.0,
19605
+ "learning_rate": 9.918985773797494e-06,
19606
+ "loss": 20.4047,
19607
+ "step": 27940
19608
+ },
19609
+ {
19610
+ "epoch": 0.5186767711211165,
19611
+ "grad_norm": 34.34375,
19612
+ "learning_rate": 9.918956778011452e-06,
19613
+ "loss": 20.4592,
19614
+ "step": 27950
19615
+ },
19616
+ {
19617
+ "epoch": 0.5188623442055964,
19618
+ "grad_norm": 34.15625,
19619
+ "learning_rate": 9.918927782225411e-06,
19620
+ "loss": 20.2424,
19621
+ "step": 27960
19622
+ },
19623
+ {
19624
+ "epoch": 0.5190479172900763,
19625
+ "grad_norm": 36.0625,
19626
+ "learning_rate": 9.918898786439367e-06,
19627
+ "loss": 20.5012,
19628
+ "step": 27970
19629
+ },
19630
+ {
19631
+ "epoch": 0.5192334903745561,
19632
+ "grad_norm": 34.53125,
19633
+ "learning_rate": 9.918869790653324e-06,
19634
+ "loss": 20.6693,
19635
+ "step": 27980
19636
+ },
19637
+ {
19638
+ "epoch": 0.5194190634590359,
19639
+ "grad_norm": 35.78125,
19640
+ "learning_rate": 9.918840794867283e-06,
19641
+ "loss": 20.6029,
19642
+ "step": 27990
19643
+ },
19644
+ {
19645
+ "epoch": 0.5196046365435157,
19646
+ "grad_norm": 33.6875,
19647
+ "learning_rate": 9.91881179908124e-06,
19648
+ "loss": 20.3617,
19649
+ "step": 28000
19650
+ },
19651
+ {
19652
+ "epoch": 0.5197902096279956,
19653
+ "grad_norm": 32.34375,
19654
+ "learning_rate": 9.918782803295198e-06,
19655
+ "loss": 20.85,
19656
+ "step": 28010
19657
+ },
19658
+ {
19659
+ "epoch": 0.5199757827124754,
19660
+ "grad_norm": 36.375,
19661
+ "learning_rate": 9.918753807509155e-06,
19662
+ "loss": 20.1941,
19663
+ "step": 28020
19664
+ },
19665
+ {
19666
+ "epoch": 0.5201613557969552,
19667
+ "grad_norm": 35.78125,
19668
+ "learning_rate": 9.918724811723113e-06,
19669
+ "loss": 20.3447,
19670
+ "step": 28030
19671
+ },
19672
+ {
19673
+ "epoch": 0.5203469288814351,
19674
+ "grad_norm": 35.0625,
19675
+ "learning_rate": 9.91869581593707e-06,
19676
+ "loss": 20.8577,
19677
+ "step": 28040
19678
+ },
19679
+ {
19680
+ "epoch": 0.5205325019659148,
19681
+ "grad_norm": 36.0625,
19682
+ "learning_rate": 9.918666820151028e-06,
19683
+ "loss": 20.098,
19684
+ "step": 28050
19685
+ },
19686
+ {
19687
+ "epoch": 0.5207180750503947,
19688
+ "grad_norm": 34.90625,
19689
+ "learning_rate": 9.918637824364987e-06,
19690
+ "loss": 20.6016,
19691
+ "step": 28060
19692
+ },
19693
+ {
19694
+ "epoch": 0.5209036481348746,
19695
+ "grad_norm": 32.3125,
19696
+ "learning_rate": 9.918608828578944e-06,
19697
+ "loss": 20.3424,
19698
+ "step": 28070
19699
+ },
19700
+ {
19701
+ "epoch": 0.5210892212193543,
19702
+ "grad_norm": 36.46875,
19703
+ "learning_rate": 9.9185798327929e-06,
19704
+ "loss": 20.371,
19705
+ "step": 28080
19706
+ },
19707
+ {
19708
+ "epoch": 0.5212747943038342,
19709
+ "grad_norm": 35.96875,
19710
+ "learning_rate": 9.918550837006859e-06,
19711
+ "loss": 20.4307,
19712
+ "step": 28090
19713
+ },
19714
+ {
19715
+ "epoch": 0.521460367388314,
19716
+ "grad_norm": 35.34375,
19717
+ "learning_rate": 9.918521841220816e-06,
19718
+ "loss": 20.6163,
19719
+ "step": 28100
19720
+ },
19721
+ {
19722
+ "epoch": 0.5216459404727938,
19723
+ "grad_norm": 34.25,
19724
+ "learning_rate": 9.918492845434774e-06,
19725
+ "loss": 20.8442,
19726
+ "step": 28110
19727
+ },
19728
+ {
19729
+ "epoch": 0.5218315135572736,
19730
+ "grad_norm": 34.5,
19731
+ "learning_rate": 9.918463849648731e-06,
19732
+ "loss": 20.2093,
19733
+ "step": 28120
19734
+ },
19735
+ {
19736
+ "epoch": 0.5220170866417535,
19737
+ "grad_norm": 34.8125,
19738
+ "learning_rate": 9.918434853862689e-06,
19739
+ "loss": 20.2828,
19740
+ "step": 28130
19741
+ },
19742
+ {
19743
+ "epoch": 0.5222026597262333,
19744
+ "grad_norm": 37.0625,
19745
+ "learning_rate": 9.918405858076646e-06,
19746
+ "loss": 19.9799,
19747
+ "step": 28140
19748
+ },
19749
+ {
19750
+ "epoch": 0.5223882328107131,
19751
+ "grad_norm": 34.5,
19752
+ "learning_rate": 9.918376862290603e-06,
19753
+ "loss": 20.411,
19754
+ "step": 28150
19755
+ },
19756
+ {
19757
+ "epoch": 0.522573805895193,
19758
+ "grad_norm": 34.6875,
19759
+ "learning_rate": 9.918347866504562e-06,
19760
+ "loss": 20.6861,
19761
+ "step": 28160
19762
+ },
19763
+ {
19764
+ "epoch": 0.5227593789796728,
19765
+ "grad_norm": 36.09375,
19766
+ "learning_rate": 9.91831887071852e-06,
19767
+ "loss": 20.4573,
19768
+ "step": 28170
19769
+ },
19770
+ {
19771
+ "epoch": 0.5229449520641526,
19772
+ "grad_norm": 33.90625,
19773
+ "learning_rate": 9.918289874932477e-06,
19774
+ "loss": 20.773,
19775
+ "step": 28180
19776
+ },
19777
+ {
19778
+ "epoch": 0.5231305251486325,
19779
+ "grad_norm": 36.25,
19780
+ "learning_rate": 9.918260879146435e-06,
19781
+ "loss": 20.5433,
19782
+ "step": 28190
19783
+ },
19784
+ {
19785
+ "epoch": 0.5233160982331123,
19786
+ "grad_norm": 34.5625,
19787
+ "learning_rate": 9.918231883360392e-06,
19788
+ "loss": 20.2934,
19789
+ "step": 28200
19790
+ },
19791
+ {
19792
+ "epoch": 0.5235016713175921,
19793
+ "grad_norm": 38.0625,
19794
+ "learning_rate": 9.91820288757435e-06,
19795
+ "loss": 20.8898,
19796
+ "step": 28210
19797
+ },
19798
+ {
19799
+ "epoch": 0.5236872444020719,
19800
+ "grad_norm": 33.90625,
19801
+ "learning_rate": 9.918173891788307e-06,
19802
+ "loss": 20.4065,
19803
+ "step": 28220
19804
+ },
19805
+ {
19806
+ "epoch": 0.5238728174865518,
19807
+ "grad_norm": 35.375,
19808
+ "learning_rate": 9.918144896002266e-06,
19809
+ "loss": 20.3046,
19810
+ "step": 28230
19811
+ },
19812
+ {
19813
+ "epoch": 0.5240583905710315,
19814
+ "grad_norm": 33.71875,
19815
+ "learning_rate": 9.918115900216222e-06,
19816
+ "loss": 19.9315,
19817
+ "step": 28240
19818
+ },
19819
+ {
19820
+ "epoch": 0.5242439636555114,
19821
+ "grad_norm": 35.71875,
19822
+ "learning_rate": 9.918086904430179e-06,
19823
+ "loss": 20.2579,
19824
+ "step": 28250
19825
+ },
19826
+ {
19827
+ "epoch": 0.5244295367399913,
19828
+ "grad_norm": 35.09375,
19829
+ "learning_rate": 9.918057908644138e-06,
19830
+ "loss": 20.2974,
19831
+ "step": 28260
19832
+ },
19833
+ {
19834
+ "epoch": 0.524615109824471,
19835
+ "grad_norm": 34.5,
19836
+ "learning_rate": 9.918028912858096e-06,
19837
+ "loss": 20.3451,
19838
+ "step": 28270
19839
+ },
19840
+ {
19841
+ "epoch": 0.5248006829089509,
19842
+ "grad_norm": 32.625,
19843
+ "learning_rate": 9.917999917072053e-06,
19844
+ "loss": 20.5492,
19845
+ "step": 28280
19846
+ },
19847
+ {
19848
+ "epoch": 0.5249862559934307,
19849
+ "grad_norm": 35.34375,
19850
+ "learning_rate": 9.91797092128601e-06,
19851
+ "loss": 20.4729,
19852
+ "step": 28290
19853
+ },
19854
+ {
19855
+ "epoch": 0.5251718290779105,
19856
+ "grad_norm": 34.84375,
19857
+ "learning_rate": 9.917941925499968e-06,
19858
+ "loss": 20.4557,
19859
+ "step": 28300
19860
+ },
19861
+ {
19862
+ "epoch": 0.5253574021623904,
19863
+ "grad_norm": 34.4375,
19864
+ "learning_rate": 9.917912929713925e-06,
19865
+ "loss": 20.4742,
19866
+ "step": 28310
19867
+ },
19868
+ {
19869
+ "epoch": 0.5255429752468702,
19870
+ "grad_norm": 36.59375,
19871
+ "learning_rate": 9.917883933927883e-06,
19872
+ "loss": 20.2819,
19873
+ "step": 28320
19874
+ },
19875
+ {
19876
+ "epoch": 0.5257285483313501,
19877
+ "grad_norm": 31.46875,
19878
+ "learning_rate": 9.917854938141842e-06,
19879
+ "loss": 20.7438,
19880
+ "step": 28330
19881
+ },
19882
+ {
19883
+ "epoch": 0.5259141214158298,
19884
+ "grad_norm": 34.6875,
19885
+ "learning_rate": 9.9178259423558e-06,
19886
+ "loss": 20.2566,
19887
+ "step": 28340
19888
+ },
19889
+ {
19890
+ "epoch": 0.5260996945003097,
19891
+ "grad_norm": 34.375,
19892
+ "learning_rate": 9.917796946569755e-06,
19893
+ "loss": 20.168,
19894
+ "step": 28350
19895
+ },
19896
+ {
19897
+ "epoch": 0.5262852675847896,
19898
+ "grad_norm": 35.46875,
19899
+ "learning_rate": 9.917767950783714e-06,
19900
+ "loss": 19.9336,
19901
+ "step": 28360
19902
+ },
19903
+ {
19904
+ "epoch": 0.5264708406692693,
19905
+ "grad_norm": 35.15625,
19906
+ "learning_rate": 9.917738954997671e-06,
19907
+ "loss": 20.2843,
19908
+ "step": 28370
19909
+ },
19910
+ {
19911
+ "epoch": 0.5266564137537492,
19912
+ "grad_norm": 35.53125,
19913
+ "learning_rate": 9.917709959211629e-06,
19914
+ "loss": 20.5998,
19915
+ "step": 28380
19916
+ },
19917
+ {
19918
+ "epoch": 0.526841986838229,
19919
+ "grad_norm": 34.71875,
19920
+ "learning_rate": 9.917680963425586e-06,
19921
+ "loss": 20.7026,
19922
+ "step": 28390
19923
+ },
19924
+ {
19925
+ "epoch": 0.5270275599227088,
19926
+ "grad_norm": 36.125,
19927
+ "learning_rate": 9.917651967639544e-06,
19928
+ "loss": 20.4475,
19929
+ "step": 28400
19930
+ },
19931
+ {
19932
+ "epoch": 0.5272131330071886,
19933
+ "grad_norm": 34.5,
19934
+ "learning_rate": 9.917622971853501e-06,
19935
+ "loss": 20.56,
19936
+ "step": 28410
19937
+ },
19938
+ {
19939
+ "epoch": 0.5273987060916685,
19940
+ "grad_norm": 35.09375,
19941
+ "learning_rate": 9.917593976067458e-06,
19942
+ "loss": 20.4937,
19943
+ "step": 28420
19944
+ },
19945
+ {
19946
+ "epoch": 0.5275842791761483,
19947
+ "grad_norm": 33.53125,
19948
+ "learning_rate": 9.917564980281416e-06,
19949
+ "loss": 20.5536,
19950
+ "step": 28430
19951
+ },
19952
+ {
19953
+ "epoch": 0.5277698522606281,
19954
+ "grad_norm": 34.25,
19955
+ "learning_rate": 9.917535984495375e-06,
19956
+ "loss": 19.9744,
19957
+ "step": 28440
19958
+ },
19959
+ {
19960
+ "epoch": 0.527955425345108,
19961
+ "grad_norm": 33.25,
19962
+ "learning_rate": 9.91750698870933e-06,
19963
+ "loss": 20.3831,
19964
+ "step": 28450
19965
+ },
19966
+ {
19967
+ "epoch": 0.5281409984295877,
19968
+ "grad_norm": 35.6875,
19969
+ "learning_rate": 9.91747799292329e-06,
19970
+ "loss": 20.5233,
19971
+ "step": 28460
19972
+ },
19973
+ {
19974
+ "epoch": 0.5283265715140676,
19975
+ "grad_norm": 35.4375,
19976
+ "learning_rate": 9.917448997137247e-06,
19977
+ "loss": 20.7939,
19978
+ "step": 28470
19979
+ },
19980
+ {
19981
+ "epoch": 0.5285121445985475,
19982
+ "grad_norm": 34.65625,
19983
+ "learning_rate": 9.917420001351205e-06,
19984
+ "loss": 20.3844,
19985
+ "step": 28480
19986
+ },
19987
+ {
19988
+ "epoch": 0.5286977176830272,
19989
+ "grad_norm": 34.90625,
19990
+ "learning_rate": 9.917391005565162e-06,
19991
+ "loss": 20.6473,
19992
+ "step": 28490
19993
+ },
19994
+ {
19995
+ "epoch": 0.5288832907675071,
19996
+ "grad_norm": 34.65625,
19997
+ "learning_rate": 9.91736200977912e-06,
19998
+ "loss": 20.1855,
19999
+ "step": 28500
20000
+ },
20001
+ {
20002
+ "epoch": 0.5290688638519869,
20003
+ "grad_norm": 35.6875,
20004
+ "learning_rate": 9.917333013993077e-06,
20005
+ "loss": 20.6092,
20006
+ "step": 28510
20007
+ },
20008
+ {
20009
+ "epoch": 0.5292544369364668,
20010
+ "grad_norm": 34.4375,
20011
+ "learning_rate": 9.917304018207034e-06,
20012
+ "loss": 20.351,
20013
+ "step": 28520
20014
+ },
20015
+ {
20016
+ "epoch": 0.5294400100209465,
20017
+ "grad_norm": 33.59375,
20018
+ "learning_rate": 9.917275022420992e-06,
20019
+ "loss": 19.9355,
20020
+ "step": 28530
20021
+ },
20022
+ {
20023
+ "epoch": 0.5296255831054264,
20024
+ "grad_norm": 35.84375,
20025
+ "learning_rate": 9.91724602663495e-06,
20026
+ "loss": 20.3667,
20027
+ "step": 28540
20028
+ },
20029
+ {
20030
+ "epoch": 0.5298111561899063,
20031
+ "grad_norm": 33.34375,
20032
+ "learning_rate": 9.917217030848908e-06,
20033
+ "loss": 20.1995,
20034
+ "step": 28550
20035
+ },
20036
+ {
20037
+ "epoch": 0.529996729274386,
20038
+ "grad_norm": 35.65625,
20039
+ "learning_rate": 9.917188035062864e-06,
20040
+ "loss": 20.2276,
20041
+ "step": 28560
20042
+ },
20043
+ {
20044
+ "epoch": 0.5301823023588659,
20045
+ "grad_norm": 35.75,
20046
+ "learning_rate": 9.917159039276823e-06,
20047
+ "loss": 20.5546,
20048
+ "step": 28570
20049
+ },
20050
+ {
20051
+ "epoch": 0.5303678754433457,
20052
+ "grad_norm": 36.9375,
20053
+ "learning_rate": 9.91713004349078e-06,
20054
+ "loss": 20.7107,
20055
+ "step": 28580
20056
+ },
20057
+ {
20058
+ "epoch": 0.5305534485278255,
20059
+ "grad_norm": 35.875,
20060
+ "learning_rate": 9.917101047704738e-06,
20061
+ "loss": 20.6607,
20062
+ "step": 28590
20063
+ },
20064
+ {
20065
+ "epoch": 0.5307390216123054,
20066
+ "grad_norm": 33.90625,
20067
+ "learning_rate": 9.917072051918695e-06,
20068
+ "loss": 20.3141,
20069
+ "step": 28600
20070
+ },
20071
+ {
20072
+ "epoch": 0.5309245946967852,
20073
+ "grad_norm": 37.34375,
20074
+ "learning_rate": 9.917043056132653e-06,
20075
+ "loss": 20.4263,
20076
+ "step": 28610
20077
+ },
20078
+ {
20079
+ "epoch": 0.531110167781265,
20080
+ "grad_norm": 34.15625,
20081
+ "learning_rate": 9.91701406034661e-06,
20082
+ "loss": 20.6688,
20083
+ "step": 28620
20084
+ },
20085
+ {
20086
+ "epoch": 0.5312957408657448,
20087
+ "grad_norm": 33.65625,
20088
+ "learning_rate": 9.916985064560567e-06,
20089
+ "loss": 20.6133,
20090
+ "step": 28630
20091
+ },
20092
+ {
20093
+ "epoch": 0.5314813139502247,
20094
+ "grad_norm": 33.625,
20095
+ "learning_rate": 9.916956068774526e-06,
20096
+ "loss": 20.1227,
20097
+ "step": 28640
20098
+ },
20099
+ {
20100
+ "epoch": 0.5316668870347044,
20101
+ "grad_norm": 36.28125,
20102
+ "learning_rate": 9.916927072988484e-06,
20103
+ "loss": 20.2326,
20104
+ "step": 28650
20105
+ },
20106
+ {
20107
+ "epoch": 0.5318524601191843,
20108
+ "grad_norm": 35.8125,
20109
+ "learning_rate": 9.916898077202441e-06,
20110
+ "loss": 20.3055,
20111
+ "step": 28660
20112
+ },
20113
+ {
20114
+ "epoch": 0.5320380332036642,
20115
+ "grad_norm": 34.21875,
20116
+ "learning_rate": 9.916869081416399e-06,
20117
+ "loss": 20.5771,
20118
+ "step": 28670
20119
+ },
20120
+ {
20121
+ "epoch": 0.5322236062881439,
20122
+ "grad_norm": 33.96875,
20123
+ "learning_rate": 9.916840085630356e-06,
20124
+ "loss": 20.036,
20125
+ "step": 28680
20126
+ },
20127
+ {
20128
+ "epoch": 0.5324091793726238,
20129
+ "grad_norm": 34.0,
20130
+ "learning_rate": 9.916811089844314e-06,
20131
+ "loss": 20.4672,
20132
+ "step": 28690
20133
+ },
20134
+ {
20135
+ "epoch": 0.5325947524571036,
20136
+ "grad_norm": 35.90625,
20137
+ "learning_rate": 9.916782094058271e-06,
20138
+ "loss": 20.2936,
20139
+ "step": 28700
20140
+ },
20141
+ {
20142
+ "epoch": 0.5327803255415835,
20143
+ "grad_norm": 34.4375,
20144
+ "learning_rate": 9.91675309827223e-06,
20145
+ "loss": 20.5766,
20146
+ "step": 28710
20147
+ },
20148
+ {
20149
+ "epoch": 0.5329658986260633,
20150
+ "grad_norm": 32.40625,
20151
+ "learning_rate": 9.916724102486186e-06,
20152
+ "loss": 20.6091,
20153
+ "step": 28720
20154
+ },
20155
+ {
20156
+ "epoch": 0.5331514717105431,
20157
+ "grad_norm": 34.5,
20158
+ "learning_rate": 9.916695106700143e-06,
20159
+ "loss": 20.1864,
20160
+ "step": 28730
20161
+ },
20162
+ {
20163
+ "epoch": 0.533337044795023,
20164
+ "grad_norm": 37.6875,
20165
+ "learning_rate": 9.916666110914102e-06,
20166
+ "loss": 20.4298,
20167
+ "step": 28740
20168
+ },
20169
+ {
20170
+ "epoch": 0.5335226178795027,
20171
+ "grad_norm": 34.9375,
20172
+ "learning_rate": 9.91663711512806e-06,
20173
+ "loss": 20.1732,
20174
+ "step": 28750
20175
+ },
20176
+ {
20177
+ "epoch": 0.5337081909639826,
20178
+ "grad_norm": 35.5,
20179
+ "learning_rate": 9.916608119342017e-06,
20180
+ "loss": 20.4916,
20181
+ "step": 28760
20182
+ },
20183
+ {
20184
+ "epoch": 0.5338937640484624,
20185
+ "grad_norm": 35.125,
20186
+ "learning_rate": 9.916579123555974e-06,
20187
+ "loss": 20.4197,
20188
+ "step": 28770
20189
+ },
20190
+ {
20191
+ "epoch": 0.5340793371329422,
20192
+ "grad_norm": 37.21875,
20193
+ "learning_rate": 9.916550127769932e-06,
20194
+ "loss": 20.1008,
20195
+ "step": 28780
20196
+ },
20197
+ {
20198
+ "epoch": 0.5342649102174221,
20199
+ "grad_norm": 35.96875,
20200
+ "learning_rate": 9.91652113198389e-06,
20201
+ "loss": 20.4591,
20202
+ "step": 28790
20203
+ },
20204
+ {
20205
+ "epoch": 0.5344504833019019,
20206
+ "grad_norm": 35.03125,
20207
+ "learning_rate": 9.916492136197847e-06,
20208
+ "loss": 20.3487,
20209
+ "step": 28800
20210
+ },
20211
+ {
20212
+ "epoch": 0.5346360563863817,
20213
+ "grad_norm": 33.84375,
20214
+ "learning_rate": 9.916463140411806e-06,
20215
+ "loss": 20.0548,
20216
+ "step": 28810
20217
+ },
20218
+ {
20219
+ "epoch": 0.5348216294708615,
20220
+ "grad_norm": 34.375,
20221
+ "learning_rate": 9.916434144625763e-06,
20222
+ "loss": 20.0794,
20223
+ "step": 28820
20224
+ },
20225
+ {
20226
+ "epoch": 0.5350072025553414,
20227
+ "grad_norm": 36.84375,
20228
+ "learning_rate": 9.916405148839719e-06,
20229
+ "loss": 20.7802,
20230
+ "step": 28830
20231
+ },
20232
+ {
20233
+ "epoch": 0.5351927756398211,
20234
+ "grad_norm": 35.03125,
20235
+ "learning_rate": 9.916376153053678e-06,
20236
+ "loss": 20.5046,
20237
+ "step": 28840
20238
+ },
20239
+ {
20240
+ "epoch": 0.535378348724301,
20241
+ "grad_norm": 36.15625,
20242
+ "learning_rate": 9.916347157267635e-06,
20243
+ "loss": 20.5881,
20244
+ "step": 28850
20245
+ },
20246
+ {
20247
+ "epoch": 0.5355639218087809,
20248
+ "grad_norm": 36.375,
20249
+ "learning_rate": 9.916318161481593e-06,
20250
+ "loss": 20.5849,
20251
+ "step": 28860
20252
+ },
20253
+ {
20254
+ "epoch": 0.5357494948932607,
20255
+ "grad_norm": 35.71875,
20256
+ "learning_rate": 9.91628916569555e-06,
20257
+ "loss": 20.5164,
20258
+ "step": 28870
20259
+ },
20260
+ {
20261
+ "epoch": 0.5359350679777405,
20262
+ "grad_norm": 36.28125,
20263
+ "learning_rate": 9.916260169909508e-06,
20264
+ "loss": 20.0847,
20265
+ "step": 28880
20266
+ },
20267
+ {
20268
+ "epoch": 0.5361206410622203,
20269
+ "grad_norm": 34.21875,
20270
+ "learning_rate": 9.916231174123465e-06,
20271
+ "loss": 19.9122,
20272
+ "step": 28890
20273
+ },
20274
+ {
20275
+ "epoch": 0.5363062141467002,
20276
+ "grad_norm": 34.0,
20277
+ "learning_rate": 9.916202178337422e-06,
20278
+ "loss": 20.5583,
20279
+ "step": 28900
20280
+ },
20281
+ {
20282
+ "epoch": 0.53649178723118,
20283
+ "grad_norm": 34.1875,
20284
+ "learning_rate": 9.916173182551382e-06,
20285
+ "loss": 20.2385,
20286
+ "step": 28910
20287
+ },
20288
+ {
20289
+ "epoch": 0.5366773603156598,
20290
+ "grad_norm": 33.5,
20291
+ "learning_rate": 9.916144186765339e-06,
20292
+ "loss": 20.233,
20293
+ "step": 28920
20294
+ },
20295
+ {
20296
+ "epoch": 0.5368629334001397,
20297
+ "grad_norm": 35.6875,
20298
+ "learning_rate": 9.916115190979296e-06,
20299
+ "loss": 20.5665,
20300
+ "step": 28930
20301
+ },
20302
+ {
20303
+ "epoch": 0.5370485064846194,
20304
+ "grad_norm": 34.53125,
20305
+ "learning_rate": 9.916086195193254e-06,
20306
+ "loss": 20.1071,
20307
+ "step": 28940
20308
+ },
20309
+ {
20310
+ "epoch": 0.5372340795690993,
20311
+ "grad_norm": 35.34375,
20312
+ "learning_rate": 9.916057199407211e-06,
20313
+ "loss": 20.5049,
20314
+ "step": 28950
20315
+ },
20316
+ {
20317
+ "epoch": 0.5374196526535792,
20318
+ "grad_norm": 34.96875,
20319
+ "learning_rate": 9.916028203621169e-06,
20320
+ "loss": 20.1108,
20321
+ "step": 28960
20322
+ },
20323
+ {
20324
+ "epoch": 0.5376052257380589,
20325
+ "grad_norm": 37.625,
20326
+ "learning_rate": 9.915999207835126e-06,
20327
+ "loss": 20.7692,
20328
+ "step": 28970
20329
+ },
20330
+ {
20331
+ "epoch": 0.5377907988225388,
20332
+ "grad_norm": 37.0625,
20333
+ "learning_rate": 9.915970212049083e-06,
20334
+ "loss": 20.4374,
20335
+ "step": 28980
20336
+ },
20337
+ {
20338
+ "epoch": 0.5379763719070186,
20339
+ "grad_norm": 34.28125,
20340
+ "learning_rate": 9.91594121626304e-06,
20341
+ "loss": 20.5759,
20342
+ "step": 28990
20343
+ },
20344
+ {
20345
+ "epoch": 0.5381619449914984,
20346
+ "grad_norm": 34.6875,
20347
+ "learning_rate": 9.915912220476998e-06,
20348
+ "loss": 20.2427,
20349
+ "step": 29000
20350
+ },
20351
+ {
20352
+ "epoch": 0.5383475180759782,
20353
+ "grad_norm": 36.25,
20354
+ "learning_rate": 9.915883224690956e-06,
20355
+ "loss": 20.5409,
20356
+ "step": 29010
20357
+ },
20358
+ {
20359
+ "epoch": 0.5385330911604581,
20360
+ "grad_norm": 35.25,
20361
+ "learning_rate": 9.915854228904915e-06,
20362
+ "loss": 20.6585,
20363
+ "step": 29020
20364
+ },
20365
+ {
20366
+ "epoch": 0.5387186642449379,
20367
+ "grad_norm": 35.0,
20368
+ "learning_rate": 9.915825233118872e-06,
20369
+ "loss": 20.4239,
20370
+ "step": 29030
20371
+ },
20372
+ {
20373
+ "epoch": 0.5389042373294177,
20374
+ "grad_norm": 35.5,
20375
+ "learning_rate": 9.91579623733283e-06,
20376
+ "loss": 20.3294,
20377
+ "step": 29040
20378
+ },
20379
+ {
20380
+ "epoch": 0.5390898104138976,
20381
+ "grad_norm": 32.21875,
20382
+ "learning_rate": 9.915767241546787e-06,
20383
+ "loss": 20.3876,
20384
+ "step": 29050
20385
+ },
20386
+ {
20387
+ "epoch": 0.5392753834983774,
20388
+ "grad_norm": 34.3125,
20389
+ "learning_rate": 9.915738245760744e-06,
20390
+ "loss": 20.8428,
20391
+ "step": 29060
20392
+ },
20393
+ {
20394
+ "epoch": 0.5394609565828572,
20395
+ "grad_norm": 35.3125,
20396
+ "learning_rate": 9.915709249974702e-06,
20397
+ "loss": 20.0861,
20398
+ "step": 29070
20399
+ },
20400
+ {
20401
+ "epoch": 0.5396465296673371,
20402
+ "grad_norm": 35.9375,
20403
+ "learning_rate": 9.91568025418866e-06,
20404
+ "loss": 20.6351,
20405
+ "step": 29080
20406
+ },
20407
+ {
20408
+ "epoch": 0.5398321027518169,
20409
+ "grad_norm": 34.75,
20410
+ "learning_rate": 9.915651258402618e-06,
20411
+ "loss": 20.3223,
20412
+ "step": 29090
20413
+ },
20414
+ {
20415
+ "epoch": 0.5400176758362967,
20416
+ "grad_norm": 34.90625,
20417
+ "learning_rate": 9.915622262616574e-06,
20418
+ "loss": 20.0895,
20419
+ "step": 29100
20420
+ },
20421
+ {
20422
+ "epoch": 0.5402032489207765,
20423
+ "grad_norm": 33.9375,
20424
+ "learning_rate": 9.915593266830531e-06,
20425
+ "loss": 20.2776,
20426
+ "step": 29110
20427
+ },
20428
+ {
20429
+ "epoch": 0.5403888220052564,
20430
+ "grad_norm": 35.75,
20431
+ "learning_rate": 9.91556427104449e-06,
20432
+ "loss": 20.3674,
20433
+ "step": 29120
20434
+ },
20435
+ {
20436
+ "epoch": 0.5405743950897361,
20437
+ "grad_norm": 33.75,
20438
+ "learning_rate": 9.915535275258448e-06,
20439
+ "loss": 20.5777,
20440
+ "step": 29130
20441
+ },
20442
+ {
20443
+ "epoch": 0.540759968174216,
20444
+ "grad_norm": 34.84375,
20445
+ "learning_rate": 9.915506279472405e-06,
20446
+ "loss": 20.3793,
20447
+ "step": 29140
20448
+ },
20449
+ {
20450
+ "epoch": 0.5409455412586959,
20451
+ "grad_norm": 35.78125,
20452
+ "learning_rate": 9.915477283686363e-06,
20453
+ "loss": 20.5047,
20454
+ "step": 29150
20455
+ },
20456
+ {
20457
+ "epoch": 0.5411311143431756,
20458
+ "grad_norm": 35.46875,
20459
+ "learning_rate": 9.91544828790032e-06,
20460
+ "loss": 20.7663,
20461
+ "step": 29160
20462
+ },
20463
+ {
20464
+ "epoch": 0.5413166874276555,
20465
+ "grad_norm": 35.84375,
20466
+ "learning_rate": 9.915419292114278e-06,
20467
+ "loss": 20.3021,
20468
+ "step": 29170
20469
+ },
20470
+ {
20471
+ "epoch": 0.5415022605121353,
20472
+ "grad_norm": 35.21875,
20473
+ "learning_rate": 9.915390296328235e-06,
20474
+ "loss": 20.4529,
20475
+ "step": 29180
20476
+ },
20477
+ {
20478
+ "epoch": 0.5416878335966151,
20479
+ "grad_norm": 34.8125,
20480
+ "learning_rate": 9.915361300542194e-06,
20481
+ "loss": 20.3839,
20482
+ "step": 29190
20483
+ },
20484
+ {
20485
+ "epoch": 0.541873406681095,
20486
+ "grad_norm": 33.0625,
20487
+ "learning_rate": 9.91533230475615e-06,
20488
+ "loss": 20.3712,
20489
+ "step": 29200
20490
+ },
20491
+ {
20492
+ "epoch": 0.5420589797655748,
20493
+ "grad_norm": 34.1875,
20494
+ "learning_rate": 9.915303308970107e-06,
20495
+ "loss": 20.1465,
20496
+ "step": 29210
20497
+ },
20498
+ {
20499
+ "epoch": 0.5422445528500546,
20500
+ "grad_norm": 36.90625,
20501
+ "learning_rate": 9.915274313184066e-06,
20502
+ "loss": 20.0052,
20503
+ "step": 29220
20504
+ },
20505
+ {
20506
+ "epoch": 0.5424301259345344,
20507
+ "grad_norm": 36.6875,
20508
+ "learning_rate": 9.915245317398024e-06,
20509
+ "loss": 20.3256,
20510
+ "step": 29230
20511
+ },
20512
+ {
20513
+ "epoch": 0.5426156990190143,
20514
+ "grad_norm": 36.125,
20515
+ "learning_rate": 9.915216321611981e-06,
20516
+ "loss": 20.0692,
20517
+ "step": 29240
20518
+ },
20519
+ {
20520
+ "epoch": 0.5428012721034942,
20521
+ "grad_norm": 34.46875,
20522
+ "learning_rate": 9.915187325825938e-06,
20523
+ "loss": 20.5439,
20524
+ "step": 29250
20525
+ },
20526
+ {
20527
+ "epoch": 0.5429868451879739,
20528
+ "grad_norm": 34.78125,
20529
+ "learning_rate": 9.915158330039896e-06,
20530
+ "loss": 20.3697,
20531
+ "step": 29260
20532
+ },
20533
+ {
20534
+ "epoch": 0.5431724182724538,
20535
+ "grad_norm": 34.0625,
20536
+ "learning_rate": 9.915129334253853e-06,
20537
+ "loss": 20.0717,
20538
+ "step": 29270
20539
+ },
20540
+ {
20541
+ "epoch": 0.5433579913569336,
20542
+ "grad_norm": 33.84375,
20543
+ "learning_rate": 9.91510033846781e-06,
20544
+ "loss": 20.1454,
20545
+ "step": 29280
20546
+ },
20547
+ {
20548
+ "epoch": 0.5435435644414134,
20549
+ "grad_norm": 33.78125,
20550
+ "learning_rate": 9.91507134268177e-06,
20551
+ "loss": 20.4484,
20552
+ "step": 29290
20553
+ },
20554
+ {
20555
+ "epoch": 0.5437291375258932,
20556
+ "grad_norm": 34.0625,
20557
+ "learning_rate": 9.915042346895727e-06,
20558
+ "loss": 20.3169,
20559
+ "step": 29300
20560
+ },
20561
+ {
20562
+ "epoch": 0.5439147106103731,
20563
+ "grad_norm": 33.25,
20564
+ "learning_rate": 9.915013351109683e-06,
20565
+ "loss": 20.0393,
20566
+ "step": 29310
20567
+ },
20568
+ {
20569
+ "epoch": 0.5441002836948529,
20570
+ "grad_norm": 35.625,
20571
+ "learning_rate": 9.914984355323642e-06,
20572
+ "loss": 20.2945,
20573
+ "step": 29320
20574
+ },
20575
+ {
20576
+ "epoch": 0.5442858567793327,
20577
+ "grad_norm": 33.96875,
20578
+ "learning_rate": 9.9149553595376e-06,
20579
+ "loss": 20.3528,
20580
+ "step": 29330
20581
+ },
20582
+ {
20583
+ "epoch": 0.5444714298638126,
20584
+ "grad_norm": 35.03125,
20585
+ "learning_rate": 9.914926363751557e-06,
20586
+ "loss": 20.6093,
20587
+ "step": 29340
20588
+ },
20589
+ {
20590
+ "epoch": 0.5446570029482923,
20591
+ "grad_norm": 34.5,
20592
+ "learning_rate": 9.914897367965514e-06,
20593
+ "loss": 20.2775,
20594
+ "step": 29350
20595
+ },
20596
+ {
20597
+ "epoch": 0.5448425760327722,
20598
+ "grad_norm": 35.5,
20599
+ "learning_rate": 9.914868372179473e-06,
20600
+ "loss": 21.0042,
20601
+ "step": 29360
20602
+ },
20603
+ {
20604
+ "epoch": 0.5450281491172521,
20605
+ "grad_norm": 35.65625,
20606
+ "learning_rate": 9.914839376393429e-06,
20607
+ "loss": 20.6964,
20608
+ "step": 29370
20609
+ },
20610
+ {
20611
+ "epoch": 0.5452137222017318,
20612
+ "grad_norm": 35.8125,
20613
+ "learning_rate": 9.914810380607386e-06,
20614
+ "loss": 20.6275,
20615
+ "step": 29380
20616
+ },
20617
+ {
20618
+ "epoch": 0.5453992952862117,
20619
+ "grad_norm": 35.09375,
20620
+ "learning_rate": 9.914781384821346e-06,
20621
+ "loss": 19.9444,
20622
+ "step": 29390
20623
+ },
20624
+ {
20625
+ "epoch": 0.5455848683706915,
20626
+ "grad_norm": 35.21875,
20627
+ "learning_rate": 9.914752389035303e-06,
20628
+ "loss": 20.543,
20629
+ "step": 29400
20630
+ },
20631
+ {
20632
+ "epoch": 0.5457704414551714,
20633
+ "grad_norm": 37.21875,
20634
+ "learning_rate": 9.91472339324926e-06,
20635
+ "loss": 20.303,
20636
+ "step": 29410
20637
+ },
20638
+ {
20639
+ "epoch": 0.5459560145396511,
20640
+ "grad_norm": 34.9375,
20641
+ "learning_rate": 9.914694397463218e-06,
20642
+ "loss": 20.1036,
20643
+ "step": 29420
20644
+ },
20645
+ {
20646
+ "epoch": 0.546141587624131,
20647
+ "grad_norm": 33.78125,
20648
+ "learning_rate": 9.914665401677175e-06,
20649
+ "loss": 20.4787,
20650
+ "step": 29430
20651
+ },
20652
+ {
20653
+ "epoch": 0.5463271607086109,
20654
+ "grad_norm": 34.5,
20655
+ "learning_rate": 9.914636405891133e-06,
20656
+ "loss": 20.1483,
20657
+ "step": 29440
20658
+ },
20659
+ {
20660
+ "epoch": 0.5465127337930906,
20661
+ "grad_norm": 34.53125,
20662
+ "learning_rate": 9.91460741010509e-06,
20663
+ "loss": 20.1377,
20664
+ "step": 29450
20665
+ },
20666
+ {
20667
+ "epoch": 0.5466983068775705,
20668
+ "grad_norm": 32.65625,
20669
+ "learning_rate": 9.914578414319047e-06,
20670
+ "loss": 20.1915,
20671
+ "step": 29460
20672
+ },
20673
+ {
20674
+ "epoch": 0.5468838799620503,
20675
+ "grad_norm": 36.3125,
20676
+ "learning_rate": 9.914549418533005e-06,
20677
+ "loss": 20.0376,
20678
+ "step": 29470
20679
+ },
20680
+ {
20681
+ "epoch": 0.5470694530465301,
20682
+ "grad_norm": 36.34375,
20683
+ "learning_rate": 9.914520422746962e-06,
20684
+ "loss": 20.5274,
20685
+ "step": 29480
20686
+ },
20687
+ {
20688
+ "epoch": 0.54725502613101,
20689
+ "grad_norm": 37.5625,
20690
+ "learning_rate": 9.914491426960921e-06,
20691
+ "loss": 20.618,
20692
+ "step": 29490
20693
+ },
20694
+ {
20695
+ "epoch": 0.5474405992154898,
20696
+ "grad_norm": 34.78125,
20697
+ "learning_rate": 9.914462431174879e-06,
20698
+ "loss": 20.2392,
20699
+ "step": 29500
20700
+ },
20701
+ {
20702
+ "epoch": 0.5476261722999696,
20703
+ "grad_norm": 35.5,
20704
+ "learning_rate": 9.914433435388836e-06,
20705
+ "loss": 20.0363,
20706
+ "step": 29510
20707
+ },
20708
+ {
20709
+ "epoch": 0.5478117453844494,
20710
+ "grad_norm": 35.6875,
20711
+ "learning_rate": 9.914404439602794e-06,
20712
+ "loss": 19.957,
20713
+ "step": 29520
20714
+ },
20715
+ {
20716
+ "epoch": 0.5479973184689293,
20717
+ "grad_norm": 33.3125,
20718
+ "learning_rate": 9.914375443816751e-06,
20719
+ "loss": 20.0681,
20720
+ "step": 29530
20721
+ },
20722
+ {
20723
+ "epoch": 0.548182891553409,
20724
+ "grad_norm": 32.8125,
20725
+ "learning_rate": 9.914346448030708e-06,
20726
+ "loss": 20.4819,
20727
+ "step": 29540
20728
+ },
20729
+ {
20730
+ "epoch": 0.5483684646378889,
20731
+ "grad_norm": 35.1875,
20732
+ "learning_rate": 9.914317452244666e-06,
20733
+ "loss": 20.1795,
20734
+ "step": 29550
20735
+ },
20736
+ {
20737
+ "epoch": 0.5485540377223688,
20738
+ "grad_norm": 34.25,
20739
+ "learning_rate": 9.914288456458623e-06,
20740
+ "loss": 20.3919,
20741
+ "step": 29560
20742
+ },
20743
+ {
20744
+ "epoch": 0.5487396108068485,
20745
+ "grad_norm": 35.03125,
20746
+ "learning_rate": 9.914259460672582e-06,
20747
+ "loss": 20.1524,
20748
+ "step": 29570
20749
+ },
20750
+ {
20751
+ "epoch": 0.5489251838913284,
20752
+ "grad_norm": 33.4375,
20753
+ "learning_rate": 9.914230464886538e-06,
20754
+ "loss": 20.2308,
20755
+ "step": 29580
20756
+ },
20757
+ {
20758
+ "epoch": 0.5491107569758082,
20759
+ "grad_norm": 35.4375,
20760
+ "learning_rate": 9.914201469100495e-06,
20761
+ "loss": 19.9178,
20762
+ "step": 29590
20763
+ },
20764
+ {
20765
+ "epoch": 0.5492963300602881,
20766
+ "grad_norm": 34.8125,
20767
+ "learning_rate": 9.914172473314454e-06,
20768
+ "loss": 20.6476,
20769
+ "step": 29600
20770
+ },
20771
+ {
20772
+ "epoch": 0.5494819031447679,
20773
+ "grad_norm": 34.40625,
20774
+ "learning_rate": 9.914143477528412e-06,
20775
+ "loss": 20.2983,
20776
+ "step": 29610
20777
+ },
20778
+ {
20779
+ "epoch": 0.5496674762292477,
20780
+ "grad_norm": 35.09375,
20781
+ "learning_rate": 9.91411448174237e-06,
20782
+ "loss": 20.609,
20783
+ "step": 29620
20784
+ },
20785
+ {
20786
+ "epoch": 0.5498530493137276,
20787
+ "grad_norm": 35.375,
20788
+ "learning_rate": 9.914085485956327e-06,
20789
+ "loss": 20.1885,
20790
+ "step": 29630
20791
+ },
20792
+ {
20793
+ "epoch": 0.5500386223982073,
20794
+ "grad_norm": 34.1875,
20795
+ "learning_rate": 9.914056490170284e-06,
20796
+ "loss": 19.9611,
20797
+ "step": 29640
20798
+ },
20799
+ {
20800
+ "epoch": 0.5502241954826872,
20801
+ "grad_norm": 33.9375,
20802
+ "learning_rate": 9.914027494384242e-06,
20803
+ "loss": 19.8009,
20804
+ "step": 29650
20805
+ },
20806
+ {
20807
+ "epoch": 0.5504097685671671,
20808
+ "grad_norm": 34.34375,
20809
+ "learning_rate": 9.913998498598199e-06,
20810
+ "loss": 20.1612,
20811
+ "step": 29660
20812
+ },
20813
+ {
20814
+ "epoch": 0.5505953416516468,
20815
+ "grad_norm": 35.1875,
20816
+ "learning_rate": 9.913969502812158e-06,
20817
+ "loss": 20.1682,
20818
+ "step": 29670
20819
+ },
20820
+ {
20821
+ "epoch": 0.5507809147361267,
20822
+ "grad_norm": 32.9375,
20823
+ "learning_rate": 9.913940507026115e-06,
20824
+ "loss": 20.3603,
20825
+ "step": 29680
20826
+ },
20827
+ {
20828
+ "epoch": 0.5509664878206065,
20829
+ "grad_norm": 34.09375,
20830
+ "learning_rate": 9.913911511240071e-06,
20831
+ "loss": 20.2844,
20832
+ "step": 29690
20833
+ },
20834
+ {
20835
+ "epoch": 0.5511520609050863,
20836
+ "grad_norm": 35.78125,
20837
+ "learning_rate": 9.91388251545403e-06,
20838
+ "loss": 20.8106,
20839
+ "step": 29700
20840
+ },
20841
+ {
20842
+ "epoch": 0.5513376339895661,
20843
+ "grad_norm": 35.25,
20844
+ "learning_rate": 9.913853519667988e-06,
20845
+ "loss": 20.042,
20846
+ "step": 29710
20847
+ },
20848
+ {
20849
+ "epoch": 0.551523207074046,
20850
+ "grad_norm": 35.15625,
20851
+ "learning_rate": 9.913824523881945e-06,
20852
+ "loss": 20.7616,
20853
+ "step": 29720
20854
+ },
20855
+ {
20856
+ "epoch": 0.5517087801585258,
20857
+ "grad_norm": 37.375,
20858
+ "learning_rate": 9.913795528095902e-06,
20859
+ "loss": 20.4981,
20860
+ "step": 29730
20861
+ },
20862
+ {
20863
+ "epoch": 0.5518943532430056,
20864
+ "grad_norm": 34.34375,
20865
+ "learning_rate": 9.91376653230986e-06,
20866
+ "loss": 20.4069,
20867
+ "step": 29740
20868
+ },
20869
+ {
20870
+ "epoch": 0.5520799263274855,
20871
+ "grad_norm": 34.25,
20872
+ "learning_rate": 9.913737536523817e-06,
20873
+ "loss": 20.3144,
20874
+ "step": 29750
20875
+ },
20876
+ {
20877
+ "epoch": 0.5522654994119653,
20878
+ "grad_norm": 35.28125,
20879
+ "learning_rate": 9.913708540737775e-06,
20880
+ "loss": 20.3102,
20881
+ "step": 29760
20882
+ },
20883
+ {
20884
+ "epoch": 0.5524510724964451,
20885
+ "grad_norm": 34.09375,
20886
+ "learning_rate": 9.913679544951734e-06,
20887
+ "loss": 20.3573,
20888
+ "step": 29770
20889
+ },
20890
+ {
20891
+ "epoch": 0.552636645580925,
20892
+ "grad_norm": 37.34375,
20893
+ "learning_rate": 9.913650549165691e-06,
20894
+ "loss": 19.9171,
20895
+ "step": 29780
20896
+ },
20897
+ {
20898
+ "epoch": 0.5528222186654048,
20899
+ "grad_norm": 32.78125,
20900
+ "learning_rate": 9.913621553379647e-06,
20901
+ "loss": 20.3493,
20902
+ "step": 29790
20903
+ },
20904
+ {
20905
+ "epoch": 0.5530077917498846,
20906
+ "grad_norm": 35.28125,
20907
+ "learning_rate": 9.913592557593606e-06,
20908
+ "loss": 20.3546,
20909
+ "step": 29800
20910
+ },
20911
+ {
20912
+ "epoch": 0.5531933648343644,
20913
+ "grad_norm": 33.09375,
20914
+ "learning_rate": 9.913563561807563e-06,
20915
+ "loss": 20.5089,
20916
+ "step": 29810
20917
+ },
20918
+ {
20919
+ "epoch": 0.5533789379188443,
20920
+ "grad_norm": 35.03125,
20921
+ "learning_rate": 9.91353456602152e-06,
20922
+ "loss": 20.2858,
20923
+ "step": 29820
20924
+ },
20925
+ {
20926
+ "epoch": 0.553564511003324,
20927
+ "grad_norm": 33.9375,
20928
+ "learning_rate": 9.913505570235478e-06,
20929
+ "loss": 20.5666,
20930
+ "step": 29830
20931
+ },
20932
+ {
20933
+ "epoch": 0.5537500840878039,
20934
+ "grad_norm": 33.3125,
20935
+ "learning_rate": 9.913476574449437e-06,
20936
+ "loss": 20.3237,
20937
+ "step": 29840
20938
+ },
20939
+ {
20940
+ "epoch": 0.5539356571722838,
20941
+ "grad_norm": 33.4375,
20942
+ "learning_rate": 9.913447578663393e-06,
20943
+ "loss": 20.3019,
20944
+ "step": 29850
20945
+ },
20946
+ {
20947
+ "epoch": 0.5541212302567635,
20948
+ "grad_norm": 34.4375,
20949
+ "learning_rate": 9.91341858287735e-06,
20950
+ "loss": 20.2433,
20951
+ "step": 29860
20952
+ },
20953
+ {
20954
+ "epoch": 0.5543068033412434,
20955
+ "grad_norm": 37.40625,
20956
+ "learning_rate": 9.91338958709131e-06,
20957
+ "loss": 20.648,
20958
+ "step": 29870
20959
+ },
20960
+ {
20961
+ "epoch": 0.5544923764257232,
20962
+ "grad_norm": 34.28125,
20963
+ "learning_rate": 9.913360591305267e-06,
20964
+ "loss": 20.0626,
20965
+ "step": 29880
20966
+ },
20967
+ {
20968
+ "epoch": 0.554677949510203,
20969
+ "grad_norm": 37.0,
20970
+ "learning_rate": 9.913331595519224e-06,
20971
+ "loss": 19.7648,
20972
+ "step": 29890
20973
+ },
20974
+ {
20975
+ "epoch": 0.5548635225946829,
20976
+ "grad_norm": 35.71875,
20977
+ "learning_rate": 9.913302599733182e-06,
20978
+ "loss": 20.2157,
20979
+ "step": 29900
20980
+ },
20981
+ {
20982
+ "epoch": 0.5550490956791627,
20983
+ "grad_norm": 35.125,
20984
+ "learning_rate": 9.91327360394714e-06,
20985
+ "loss": 20.0799,
20986
+ "step": 29910
20987
+ },
20988
+ {
20989
+ "epoch": 0.5552346687636425,
20990
+ "grad_norm": 36.6875,
20991
+ "learning_rate": 9.913244608161097e-06,
20992
+ "loss": 20.2929,
20993
+ "step": 29920
20994
+ },
20995
+ {
20996
+ "epoch": 0.5554202418481223,
20997
+ "grad_norm": 34.25,
20998
+ "learning_rate": 9.913215612375054e-06,
20999
+ "loss": 20.0606,
21000
+ "step": 29930
21001
+ },
21002
+ {
21003
+ "epoch": 0.5556058149326022,
21004
+ "grad_norm": 35.3125,
21005
+ "learning_rate": 9.913186616589011e-06,
21006
+ "loss": 20.3643,
21007
+ "step": 29940
21008
+ },
21009
+ {
21010
+ "epoch": 0.555791388017082,
21011
+ "grad_norm": 34.84375,
21012
+ "learning_rate": 9.91315762080297e-06,
21013
+ "loss": 20.3112,
21014
+ "step": 29950
21015
+ },
21016
+ {
21017
+ "epoch": 0.5559769611015618,
21018
+ "grad_norm": 34.46875,
21019
+ "learning_rate": 9.913128625016926e-06,
21020
+ "loss": 20.1955,
21021
+ "step": 29960
21022
+ },
21023
+ {
21024
+ "epoch": 0.5561625341860417,
21025
+ "grad_norm": 34.4375,
21026
+ "learning_rate": 9.913099629230885e-06,
21027
+ "loss": 20.1614,
21028
+ "step": 29970
21029
+ },
21030
+ {
21031
+ "epoch": 0.5563481072705215,
21032
+ "grad_norm": 35.625,
21033
+ "learning_rate": 9.913070633444843e-06,
21034
+ "loss": 20.1444,
21035
+ "step": 29980
21036
+ },
21037
+ {
21038
+ "epoch": 0.5565336803550013,
21039
+ "grad_norm": 34.34375,
21040
+ "learning_rate": 9.9130416376588e-06,
21041
+ "loss": 20.4401,
21042
+ "step": 29990
21043
+ },
21044
+ {
21045
+ "epoch": 0.5567192534394811,
21046
+ "grad_norm": 35.65625,
21047
+ "learning_rate": 9.913012641872758e-06,
21048
+ "loss": 20.0201,
21049
+ "step": 30000
21050
+ },
21051
+ {
21052
+ "epoch": 0.5567192534394811,
21053
+ "eval_loss": 2.5262889862060547,
21054
+ "eval_runtime": 455.282,
21055
+ "eval_samples_per_second": 3189.489,
21056
+ "eval_steps_per_second": 49.837,
21057
+ "step": 30000
21058
  }
21059
  ],
21060
  "logging_steps": 10,
 
21074
  "attributes": {}
21075
  }
21076
  },
21077
+ "total_flos": 5.236568277123072e+18,
21078
  "train_batch_size": 8,
21079
  "trial_name": null,
21080
  "trial_params": null